55
66from databricks .labs .lakebridge .reconcile .connectors .dialect_utils import DialectUtils
77from databricks .labs .lakebridge .reconcile .exception import ColumnMismatchException
8- from databricks .labs .lakebridge .reconcile .recon_capture import AbstractReconIntermediatePersist
8+ from databricks .labs .lakebridge .reconcile .recon_capture import (
9+ AbstractReconIntermediatePersist ,
10+ )
911from databricks .labs .lakebridge .reconcile .recon_output_config import (
1012 DataReconcileOutput ,
1113 MismatchOutput ,
@@ -56,7 +58,7 @@ def reconcile_data(
5658 target : DataFrame ,
5759 key_columns : list [str ],
5860 report_type : str ,
59- inter_persist : AbstractReconIntermediatePersist ,
61+ persistence : AbstractReconIntermediatePersist ,
6062) -> DataReconcileOutput :
6163 source_alias = "src"
6264 target_alias = "tgt"
@@ -75,7 +77,7 @@ def reconcile_data(
7577 )
7678 )
7779
78- df = inter_persist .write_and_read_df_with_volumes (df )
80+ df = persistence .write_and_read_df_with_volumes (df )
7981 # Checkpoint after joining source and target to backpressure
8082
8183 mismatch = _get_mismatch_data (df , source_alias , target_alias ) if report_type in {"all" , "data" } else None
@@ -414,7 +416,12 @@ def reconcile_agg_data_per_rule(
414416 return rule_reconcile_output
415417
416418
417- def join_aggregate_data (source : DataFrame , target : DataFrame , key_columns : list [str ] | None ) -> DataFrame :
419+ def join_aggregate_data (
420+ source : DataFrame ,
421+ target : DataFrame ,
422+ key_columns : list [str ] | None ,
423+ persistence : AbstractReconIntermediatePersist ,
424+ ) -> DataFrame :
418425 # TODO: Integrate with reconcile_data function
419426
420427 source_alias = "src"
@@ -439,5 +446,5 @@ def join_aggregate_data(source: DataFrame, target: DataFrame, key_columns: list[
439446
440447 joined_cols = source .columns + target .columns
441448 normalized_joined_cols = [DialectUtils .ansi_normalize_identifier (col ) for col in joined_cols ]
442- joined_df = df .select (* normalized_joined_cols )
449+ joined_df = persistence . write_and_read_df_with_volumes ( df .select (* normalized_joined_cols ) )
443450 return joined_df
0 commit comments