@@ -374,10 +374,6 @@ def create_summary_stmt(self, *, estimate_optimization_results: bool = False) ->
374374 is_data_file_from_widening_src_partition,
375375 -- Aggregations for content = 0 (data files)
376376 count_if(content = 0) over (partition by { grouping_stmt } ) as n_files,
377- sum(case when content = 0 then record_count end) over (partition by { grouping_stmt } ) as n_records,
378- avg(case when content = 0 then file_size_in_bytes end) over (partition by { grouping_stmt } ) as avg_file_size,
379- min(case when content = 0 then file_size_in_bytes end) over (partition by { grouping_stmt } ) as min_file_size,
380- max(case when content = 0 then file_size_in_bytes end) over (partition by { grouping_stmt } ) as max_file_size,
381377 sum(case when content = 0 then file_size_in_bytes end) over (partition by { grouping_stmt } ) as sum_file_size
382378 from
383379 ranked_data_files
@@ -392,12 +388,9 @@ def create_summary_stmt(self, *, estimate_optimization_results: bool = False) ->
392388 rn2,
393389 is_data_file_from_widening_src_partition,
394390 n_files,
395- n_records,
396- avg_file_size,
397- min_file_size,
398- max_file_size,
399391 sum_file_size,
400- { target_file_size_stmt } as target_file_size
392+ { target_file_size_stmt } as target_file_size,
393+ { corr_threshold_expr } as corr_threshold
401394 from
402395 file_stats_per_partition
403396 ),
@@ -412,15 +405,14 @@ def create_summary_stmt(self, *, estimate_optimization_results: bool = False) ->
412405
413406 -- Aggregations for content = 0 (data files)
414407 first(n_files) as n_files,
415-
416- first(n_records) as n_records,
417- first(avg_file_size) as avg_file_size,
418- first(min_file_size) as min_file_size,
419- first(max_file_size) as max_file_size,
408+ sum(case when content = 0 then record_count end) as n_records,
409+ avg(case when content = 0 then file_size_in_bytes end) as avg_file_size,
410+ min(case when content = 0 then file_size_in_bytes end) as min_file_size,
411+ max(case when content = 0 then file_size_in_bytes end) as max_file_size,
420412 first(sum_file_size) as sum_file_size,
421413
422414 first(target_file_size) as target_file_size,
423-
415+ first(corr_threshold) as corr_threshold,
424416 count_if(
425417 content = 0 and
426418 (file_size_in_bytes < int(target_file_size * 0.75)
@@ -439,8 +431,6 @@ def create_summary_stmt(self, *, estimate_optimization_results: bool = False) ->
439431 else coalesce(corr(rn1, rn2), 1)
440432 end as float) as corr,
441433
442- { corr_threshold_expr } as corr_threshold,
443-
444434 -- Aggregations for content > 0 (delete files)
445435 count_if(content > 0) as n_delete_files,
446436
0 commit comments