@@ -28,7 +28,8 @@ def metadata():
2828 'categorical' : {'sdtype' : 'categorical' },
2929 'pii' : {'sdtype' : 'ssn' },
3030 'extra_column' : {'sdtype' : 'numerical' },
31- }
31+ },
32+ 'primary_key' : 'id' ,
3233 }
3334 }
3435 })
@@ -97,21 +98,31 @@ def test__validate_column_parameter():
9798 column_metadata = {'sdtype' : 'id' }
9899 bad_column_parameters = {'invalid_key' : None }
99100 bad_missing_value = {'missing_values_proportion' : 100 }
101+ bad_key_missing_value = {'missing_values_proportion' : 0.5 }
100102
101103 # Run and Assert
102104 expected_bad_column_msg = re .escape (
103105 "The parameters for column 'column' in table 'table' contains unexpected "
104106 "key(s) 'invalid_key'."
105107 )
106108 with pytest .raises (SynthesizerProcessingError , match = expected_bad_column_msg ):
107- _validate_column_parameters ('table' , 'column' , column_metadata , bad_column_parameters )
109+ _validate_column_parameters (
110+ 'table' , 'column' , column_metadata , bad_column_parameters , False
111+ )
108112
109113 expected_bad_missing_value_msg = re .escape (
110114 "The 'missing_values_proportion' parameter for column 'column' in table 'table' "
111115 'must be a float between 0.0 and 1.0.'
112116 )
113117 with pytest .raises (SynthesizerProcessingError , match = expected_bad_missing_value_msg ):
114- _validate_column_parameters ('table' , 'column' , column_metadata , bad_missing_value )
118+ _validate_column_parameters ('table' , 'column' , column_metadata , bad_missing_value , False )
119+
120+ expected_missing_values_with_key_msg = re .escape (
121+ "Invalid 'missing_values_proportion' parameter for column 'column' in table 'table'. "
122+ "Primary and alternate keys must have 'missing_values_proportion' parameter set to zero."
123+ )
124+ with pytest .raises (SynthesizerProcessingError , match = expected_missing_values_with_key_msg ):
125+ _validate_column_parameters ('table' , 'column' , column_metadata , bad_key_missing_value , True )
115126
116127
117128def test__validate_column_parameters_numerical ():
@@ -127,21 +138,25 @@ def test__validate_column_parameters_numerical():
127138 "The 'min_value' parameter for column 'column' in table 'table' must be a float."
128139 )
129140 with pytest .raises (SynthesizerProcessingError , match = expected_bad_parameter_value_msg ):
130- _validate_column_parameters ('table' , 'column' , column_metadata , bad_parameter_value )
141+ _validate_column_parameters ('table' , 'column' , column_metadata , bad_parameter_value , False )
131142
132143 expected_bad_min_max_msg = re .escape (
133144 "Invalid parameters for column 'column' in table 'table'. The 'min_value' "
134145 "must be less than or equal to the 'max_value'"
135146 )
136147 with pytest .raises (SynthesizerProcessingError , match = expected_bad_min_max_msg ):
137- _validate_column_parameters ('table' , 'column' , column_metadata , bad_min_max_combination )
148+ _validate_column_parameters (
149+ 'table' , 'column' , column_metadata , bad_min_max_combination , False
150+ )
138151
139152 expected_bad_num_decimal_digits_msg = re .escape (
140153 "The 'num_decimal_digits' parameter for column 'column' in table 'table' must be an "
141154 'integer greater than or equal to zero.'
142155 )
143156 with pytest .raises (SynthesizerProcessingError , match = expected_bad_num_decimal_digits_msg ):
144- _validate_column_parameters ('table' , 'column' , column_metadata , bad_num_decimal_digits )
157+ _validate_column_parameters (
158+ 'table' , 'column' , column_metadata , bad_num_decimal_digits , False
159+ )
145160
146161
147162def test__validate_column_parameters_datetime ():
@@ -157,28 +172,32 @@ def test__validate_column_parameters_datetime():
157172 "The 'start_timestamp' parameter for column 'column' in table 'table' must be a string."
158173 )
159174 with pytest .raises (SynthesizerProcessingError , match = expected_bad_parameter_value_msg ):
160- _validate_column_parameters ('table' , 'column' , column_metadata , bad_parameter_value )
175+ _validate_column_parameters ('table' , 'column' , column_metadata , bad_parameter_value , False )
161176
162177 expected_bad_datetime_value_msg = re .escape (
163178 "The 'start_timestamp' parameter for column 'column' in table 'table' is not a valid "
164179 'datetime string or does not match the date time format (%d %b %Y).'
165180 )
166181 with pytest .raises (SynthesizerProcessingError , match = expected_bad_datetime_value_msg ):
167- _validate_column_parameters ('table' , 'column' , column_metadata , bad_datetime_value )
182+ _validate_column_parameters ('table' , 'column' , column_metadata , bad_datetime_value , False )
168183
169184 expected_bad_value_no_format_msg = re .escape (
170185 "The 'start_timestamp' parameter for column 'column' in table 'table' is not a "
171186 'valid datetime string.'
172187 )
173188 with pytest .raises (SynthesizerProcessingError , match = expected_bad_value_no_format_msg ):
174- _validate_column_parameters ('table' , 'column' , {'sdtype' : 'datetime' }, bad_datetime_value )
189+ _validate_column_parameters (
190+ 'table' , 'column' , {'sdtype' : 'datetime' }, bad_datetime_value , False
191+ )
175192
176193 expected_bad_start_end_msg = re .escape (
177194 "Invalid parameters for column 'column' in table 'table'. The 'start_timestamp' "
178195 "must be less than the 'end_timestamp'"
179196 )
180197 with pytest .raises (SynthesizerProcessingError , match = expected_bad_start_end_msg ):
181- _validate_column_parameters ('table' , 'column' , column_metadata , bad_start_end_combination )
198+ _validate_column_parameters (
199+ 'table' , 'column' , column_metadata , bad_start_end_combination , False
200+ )
182201
183202
184203def test__validate_column_parameters_categorical ():
@@ -192,7 +211,7 @@ def test__validate_column_parameters_categorical():
192211 "The 'category_values' parameter for column 'column' in table 'table' must be a list."
193212 )
194213 with pytest .raises (SynthesizerProcessingError , match = expected_msg ):
195- _validate_column_parameters ('table' , 'column' , column_metadata , bad_category_values )
214+ _validate_column_parameters ('table' , 'column' , column_metadata , bad_category_values , False )
196215
197216
198217@patch ('sdv.single_table.dayz._validate_column_parameters' )
@@ -202,6 +221,7 @@ def test__validate_table_parameters(mock__validate_column_parameters, metadata,
202221 table_metadata = metadata .tables ['table' ]
203222 bad_table_columns = {'columns' : {'bad_column' : {}}}
204223 bad_num_rows = {'num_rows' : - 1 }
224+ keys = ['id' ]
205225
206226 # Run and Assert
207227 expected_bad_column_msg = re .escape (
@@ -222,7 +242,7 @@ def test__validate_table_parameters(mock__validate_column_parameters, metadata,
222242
223243 # Assert
224244 expected_calls = [
225- call ('table' , col , table_metadata .columns [col ], col_parameters )
245+ call ('table' , col , table_metadata .columns [col ], col_parameters , col in keys )
226246 for col , col_parameters in dayz_parameters ['tables' ]['table' ]['columns' ].items ()
227247 ]
228248 mock__validate_column_parameters .assert_has_calls (expected_calls )
0 commit comments