|
17 | 17 | is_aggr_not_equal, |
18 | 18 | foreign_key, |
19 | 19 | compare_datasets, |
| 20 | + DatasetComparisonConfig, |
20 | 21 | is_data_fresh_per_time_window, |
21 | 22 | has_valid_schema, |
22 | 23 | ) |
@@ -550,7 +551,7 @@ def test_dataset_compare(spark: SparkSession, set_utc_timezone): |
550 | 551 | columns=columns, |
551 | 552 | ref_columns=columns, |
552 | 553 | ref_df_name="df_ref", |
553 | | - check_missing_records=False, |
| 554 | + config=DatasetComparisonConfig(check_missing_records=False), |
554 | 555 | ) |
555 | 556 |
|
556 | 557 | actual: DataFrame = apply(df, spark, {"df_ref": df_ref}) |
@@ -659,8 +660,10 @@ def test_compare_datasets_with_diff_col_names_and_check_missing(spark: SparkSess |
659 | 660 | columns=columns, |
660 | 661 | ref_columns=ref_columns, |
661 | 662 | ref_df_name="df_ref", |
662 | | - check_missing_records=True, |
663 | | - exclude_columns=[F.col("score")], |
| 663 | + config=DatasetComparisonConfig( |
| 664 | + check_missing_records=True, |
| 665 | + exclude_columns=[F.col("score")], |
| 666 | + ), |
664 | 667 | ) |
665 | 668 |
|
666 | 669 | actual: DataFrame = apply(df, spark, {"df_ref": df_ref}) |
@@ -863,7 +866,7 @@ def test_dataset_compare_ref_as_table_and_skip_map_col(spark: SparkSession, set_ |
863 | 866 | columns=columns, |
864 | 867 | ref_columns=columns, |
865 | 868 | ref_table=ref_table, |
866 | | - check_missing_records=False, |
| 869 | + config=DatasetComparisonConfig(check_missing_records=False), |
867 | 870 | ) |
868 | 871 |
|
869 | 872 | actual: DataFrame = apply(df, spark, {}) |
@@ -955,7 +958,7 @@ def test_dataset_compare_with_no_columns_to_compare_and_check_missing(spark: Spa |
955 | 958 | columns=columns, |
956 | 959 | ref_columns=columns, |
957 | 960 | ref_df_name="df_ref", |
958 | | - check_missing_records=True, |
| 961 | + config=DatasetComparisonConfig(check_missing_records=True), |
959 | 962 | ) |
960 | 963 |
|
961 | 964 | actual: DataFrame = apply(df, spark, {"df_ref": df_ref}) |
@@ -988,7 +991,7 @@ def test_dataset_compare_with_empty_ref_and_check_missing(spark: SparkSession): |
988 | 991 | columns=columns, |
989 | 992 | ref_columns=columns, |
990 | 993 | ref_df_name="df_ref", |
991 | | - check_missing_records=True, |
| 994 | + config=DatasetComparisonConfig(check_missing_records=True), |
992 | 995 | ) |
993 | 996 |
|
994 | 997 | actual: DataFrame = apply(df, spark, {"df_ref": df_ref}) |
@@ -1042,7 +1045,7 @@ def test_dataset_compare_with_empty_df_and_check_missing(spark: SparkSession): |
1042 | 1045 | columns=columns, |
1043 | 1046 | ref_columns=columns, |
1044 | 1047 | ref_df_name="df_ref", |
1045 | | - check_missing_records=True, |
| 1048 | + config=DatasetComparisonConfig(check_missing_records=True), |
1046 | 1049 | ) |
1047 | 1050 |
|
1048 | 1051 | actual: DataFrame = apply(df, spark, {"df_ref": df_ref}) |
@@ -1097,7 +1100,7 @@ def test_dataset_compare_with_empty_df_and_ref(spark: SparkSession): |
1097 | 1100 | columns=columns, |
1098 | 1101 | ref_columns=columns, |
1099 | 1102 | ref_df_name="df_ref", |
1100 | | - check_missing_records=True, |
| 1103 | + config=DatasetComparisonConfig(check_missing_records=True), |
1101 | 1104 | ) |
1102 | 1105 |
|
1103 | 1106 | actual: DataFrame = apply(df, spark, {"df_ref": df_ref}) |
@@ -1155,7 +1158,7 @@ def test_dataset_compare_unsorted_df_columns(spark: SparkSession): |
1155 | 1158 | columns=columns, |
1156 | 1159 | ref_columns=columns, # columns are matched by position, so the order of columns must align exactly |
1157 | 1160 | ref_df_name="df_ref", |
1158 | | - check_missing_records=True, |
| 1161 | + config=DatasetComparisonConfig(check_missing_records=True), |
1159 | 1162 | ) |
1160 | 1163 |
|
1161 | 1164 | actual: DataFrame = apply(df, spark, {"df_ref": df_ref}) |
@@ -1204,9 +1207,11 @@ def test_compare_dataset_disabled_null_safe_row_matching(spark: SparkSession): |
1204 | 1207 | columns=columns, |
1205 | 1208 | ref_columns=columns, # columns are matched by position, so the order of columns must align exactly |
1206 | 1209 | ref_df_name="df_ref", |
1207 | | - check_missing_records=True, |
1208 | | - null_safe_row_matching=False, |
1209 | | - null_safe_column_value_matching=True, |
| 1210 | + config=DatasetComparisonConfig( |
| 1211 | + check_missing_records=True, |
| 1212 | + null_safe_row_matching=False, |
| 1213 | + null_safe_column_value_matching=True, |
| 1214 | + ), |
1210 | 1215 | ) |
1211 | 1216 |
|
1212 | 1217 | actual: DataFrame = apply(df, spark, {"df_ref": df_ref}) |
@@ -1304,8 +1309,10 @@ def test_compare_dataset_disabled_null_safe_column_value_matching(spark: SparkSe |
1304 | 1309 | columns=columns, |
1305 | 1310 | ref_columns=columns, |
1306 | 1311 | ref_df_name="df_ref", |
1307 | | - check_missing_records=True, |
1308 | | - null_safe_column_value_matching=False, |
| 1312 | + config=DatasetComparisonConfig( |
| 1313 | + check_missing_records=True, |
| 1314 | + null_safe_column_value_matching=False, |
| 1315 | + ), |
1309 | 1316 | ) |
1310 | 1317 |
|
1311 | 1318 | actual: DataFrame = apply(df, spark, {"df_ref": df_ref}) |
|
0 commit comments