@@ -27,14 +27,17 @@ use std::fmt::{self, Debug};
27
27
use std:: sync:: Arc ;
28
28
29
29
use arrow_array:: types:: UInt16Type ;
30
- use arrow_array:: { Array , DictionaryArray , RecordBatch , StringArray , TypedDictionaryArray } ;
30
+ use arrow_array:: {
31
+ Array , BooleanArray , DictionaryArray , RecordBatch , StringArray , TypedDictionaryArray ,
32
+ } ;
31
33
use arrow_cast:: display:: array_value_to_string;
32
34
use arrow_cast:: { cast_with_options, CastOptions } ;
33
35
use arrow_schema:: {
34
36
ArrowError , DataType as ArrowDataType , Field , Schema as ArrowSchema , SchemaRef ,
35
37
SchemaRef as ArrowSchemaRef , TimeUnit ,
36
38
} ;
37
39
use arrow_select:: concat:: concat_batches;
40
+ use arrow_select:: filter:: filter_record_batch;
38
41
use async_trait:: async_trait;
39
42
use chrono:: { DateTime , TimeZone , Utc } ;
40
43
use datafusion:: catalog:: memory:: DataSourceExec ;
@@ -59,8 +62,11 @@ use datafusion_common::{
59
62
use datafusion_expr:: execution_props:: ExecutionProps ;
60
63
use datafusion_expr:: logical_plan:: CreateExternalTable ;
61
64
use datafusion_expr:: simplify:: SimplifyContext ;
62
- use datafusion_expr:: utils:: conjunction;
63
- use datafusion_expr:: { col, Expr , Extension , LogicalPlan , TableProviderFilterPushDown , Volatility } ;
65
+ use datafusion_expr:: utils:: { conjunction, split_conjunction} ;
66
+ use datafusion_expr:: {
67
+ col, BinaryExpr , Expr , Extension , LogicalPlan , Operator , TableProviderFilterPushDown ,
68
+ Volatility ,
69
+ } ;
64
70
use datafusion_physical_expr:: { create_physical_expr, PhysicalExpr } ;
65
71
use datafusion_physical_plan:: filter:: FilterExec ;
66
72
use datafusion_physical_plan:: limit:: { GlobalLimitExec , LocalLimitExec } ;
@@ -86,7 +92,9 @@ use url::Url;
86
92
use crate :: delta_datafusion:: expr:: parse_predicate_expression;
87
93
use crate :: delta_datafusion:: schema_adapter:: DeltaSchemaAdapterFactory ;
88
94
use crate :: errors:: { DeltaResult , DeltaTableError } ;
89
- use crate :: kernel:: { Add , DataCheck , EagerSnapshot , Invariant , Snapshot , StructTypeExt } ;
95
+ use crate :: kernel:: {
96
+ Add , DataCheck , EagerSnapshot , Invariant , LogDataHandler , Snapshot , StructTypeExt ,
97
+ } ;
90
98
use crate :: logstore:: LogStoreRef ;
91
99
use crate :: table:: builder:: ensure_table_uri;
92
100
use crate :: table:: state:: DeltaTableState ;
@@ -541,6 +549,16 @@ impl<'a> DeltaScanBuilder<'a> {
541
549
for idx in used_columns {
542
550
fields. push ( logical_schema. field ( * idx) . to_owned ( ) ) ;
543
551
}
552
+ // partition filters with Exact pushdown were removed from projection by DF optimizer,
553
+ // we need to add them back for the predicate pruning to work
554
+ if let Some ( expr) = & self . filter {
555
+ for c in expr. column_refs ( ) {
556
+ let idx = logical_schema. index_of ( c. name . as_str ( ) ) ?;
557
+ if !used_columns. contains ( & idx) {
558
+ fields. push ( logical_schema. field ( idx) . to_owned ( ) ) ;
559
+ }
560
+ }
561
+ }
544
562
Arc :: new ( ArrowSchema :: new ( fields) )
545
563
} else {
546
564
logical_schema
@@ -549,32 +567,48 @@ impl<'a> DeltaScanBuilder<'a> {
549
567
let context = SessionContext :: new ( ) ;
550
568
let df_schema = logical_schema. clone ( ) . to_dfschema ( ) ?;
551
569
552
- let logical_filter = self . filter . map ( |expr| {
553
- // Simplify the expression first
554
- let props = ExecutionProps :: new ( ) ;
555
- let simplify_context =
556
- SimplifyContext :: new ( & props) . with_schema ( df_schema. clone ( ) . into ( ) ) ;
557
- let simplifier = ExprSimplifier :: new ( simplify_context) . with_max_cycles ( 10 ) ;
558
- let simplified = simplifier. simplify ( expr) . unwrap ( ) ;
570
+ let logical_filter = self
571
+ . filter
572
+ . clone ( )
573
+ . map ( |expr| simplify_expr ( & context, & df_schema, expr) ) ;
574
+ // only inexact filters should be pushed down to the data source, doing otherwise
575
+ // will make stats inexact and disable datafusion optimizations like AggregateStatistics
576
+ let pushdown_filter = self
577
+ . filter
578
+ . and_then ( |expr| {
579
+ let predicates = split_conjunction ( & expr) ;
580
+ let pushdown_filters = get_pushdown_filters (
581
+ & predicates,
582
+ self . snapshot . metadata ( ) . partition_columns . as_slice ( ) ,
583
+ ) ;
559
584
560
- context
561
- . create_physical_expr ( simplified, & df_schema)
562
- . unwrap ( )
563
- } ) ;
585
+ let filtered_predicates = predicates
586
+ . into_iter ( )
587
+ . zip ( pushdown_filters. into_iter ( ) )
588
+ . filter_map ( |( filter, pushdown) | {
589
+ if pushdown == TableProviderFilterPushDown :: Inexact {
590
+ Some ( filter. clone ( ) )
591
+ } else {
592
+ None
593
+ }
594
+ } ) ;
595
+ conjunction ( filtered_predicates)
596
+ } )
597
+ . map ( |expr| simplify_expr ( & context, & df_schema, expr) ) ;
564
598
565
599
// Perform Pruning of files to scan
566
- let ( files, files_scanned, files_pruned) = match self . files {
600
+ let ( files, files_scanned, files_pruned, pruning_mask ) = match self . files {
567
601
Some ( files) => {
568
602
let files = files. to_owned ( ) ;
569
603
let files_scanned = files. len ( ) ;
570
- ( files, files_scanned, 0 )
604
+ ( files, files_scanned, 0 , None )
571
605
}
572
606
None => {
573
607
// early return in case we have no push down filters or limit
574
608
if logical_filter. is_none ( ) && self . limit . is_none ( ) {
575
609
let files = self . snapshot . file_actions ( ) ?;
576
610
let files_scanned = files. len ( ) ;
577
- ( files, files_scanned, 0 )
611
+ ( files, files_scanned, 0 , None )
578
612
} else {
579
613
let num_containers = self . snapshot . num_containers ( ) ;
580
614
@@ -595,7 +629,7 @@ impl<'a> DeltaScanBuilder<'a> {
595
629
for ( action, keep) in self
596
630
. snapshot
597
631
. file_actions_iter ( ) ?
598
- . zip ( files_to_prune. into_iter ( ) )
632
+ . zip ( files_to_prune. iter ( ) . cloned ( ) )
599
633
{
600
634
// prune file based on predicate pushdown
601
635
if keep {
@@ -627,7 +661,7 @@ impl<'a> DeltaScanBuilder<'a> {
627
661
628
662
let files_scanned = files. len ( ) ;
629
663
let files_pruned = num_containers - files_scanned;
630
- ( files, files_scanned, files_pruned)
664
+ ( files, files_scanned, files_pruned, Some ( files_to_prune ) )
631
665
}
632
666
}
633
667
} ;
@@ -684,10 +718,18 @@ impl<'a> DeltaScanBuilder<'a> {
684
718
) ) ;
685
719
}
686
720
687
- let stats = self
688
- . snapshot
689
- . datafusion_table_statistics ( )
690
- . unwrap_or ( Statistics :: new_unknown ( & schema) ) ;
721
+ // FIXME - where is the correct place to marry file pruning with statistics pruning?
722
+ // Temporarily re-generating the log handler, just so that we can compute the stats.
723
+ // Should we update datafusion_table_statistics to optionally take the mask?
724
+ let stats = if let Some ( mask) = pruning_mask {
725
+ let es = self . snapshot . snapshot ( ) ;
726
+ let pruned_stats = prune_file_statistics ( & es. files , mask) ;
727
+ LogDataHandler :: new ( & pruned_stats, es. metadata ( ) , es. schema ( ) ) . statistics ( )
728
+ } else {
729
+ self . snapshot . datafusion_table_statistics ( )
730
+ } ;
731
+
732
+ let stats = stats. unwrap_or ( Statistics :: new_unknown ( & schema) ) ;
691
733
692
734
let parquet_options = TableParquetOptions {
693
735
global : self . session . config ( ) . options ( ) . execution . parquet . clone ( ) ,
@@ -700,7 +742,7 @@ impl<'a> DeltaScanBuilder<'a> {
700
742
// Sometimes (i.e Merge) we want to prune files that don't make the
701
743
// filter and read the entire contents for files that do match the
702
744
// filter
703
- if let Some ( predicate) = logical_filter {
745
+ if let Some ( predicate) = pushdown_filter {
704
746
if config. enable_parquet_pushdown {
705
747
file_source = file_source. with_predicate ( Arc :: clone ( & file_schema) , predicate) ;
706
748
}
@@ -746,6 +788,43 @@ impl<'a> DeltaScanBuilder<'a> {
746
788
}
747
789
}
748
790
791
+ fn simplify_expr (
792
+ context : & SessionContext ,
793
+ df_schema : & DFSchema ,
794
+ expr : Expr ,
795
+ ) -> Arc < dyn PhysicalExpr > {
796
+ // Simplify the expression first
797
+ let props = ExecutionProps :: new ( ) ;
798
+ let simplify_context = SimplifyContext :: new ( & props) . with_schema ( df_schema. clone ( ) . into ( ) ) ;
799
+ let simplifier = ExprSimplifier :: new ( simplify_context) . with_max_cycles ( 10 ) ;
800
+ let simplified = simplifier. simplify ( expr) . unwrap ( ) ;
801
+
802
+ context
803
+ . create_physical_expr ( simplified, & df_schema)
804
+ . unwrap ( )
805
+ }
806
+
807
+ fn prune_file_statistics (
808
+ record_batches : & Vec < RecordBatch > ,
809
+ pruning_mask : Vec < bool > ,
810
+ ) -> Vec < RecordBatch > {
811
+ let mut filtered_batches = Vec :: new ( ) ;
812
+ let mut mask_offset = 0 ;
813
+
814
+ for batch in record_batches {
815
+ let num_rows = batch. num_rows ( ) ;
816
+ let batch_mask = & pruning_mask[ mask_offset..mask_offset + num_rows] ;
817
+ mask_offset += num_rows;
818
+
819
+ let boolean_mask = BooleanArray :: from ( batch_mask. to_vec ( ) ) ;
820
+ let filtered_batch =
821
+ filter_record_batch ( batch, & boolean_mask) . expect ( "Failed to filter RecordBatch" ) ;
822
+ filtered_batches. push ( filtered_batch) ;
823
+ }
824
+
825
+ filtered_batches
826
+ }
827
+
749
828
// TODO: implement this for Snapshot, not for DeltaTable
750
829
#[ async_trait]
751
830
impl TableProvider for DeltaTable {
@@ -793,17 +872,81 @@ impl TableProvider for DeltaTable {
793
872
& self ,
794
873
filter : & [ & Expr ] ,
795
874
) -> DataFusionResult < Vec < TableProviderFilterPushDown > > {
796
- Ok ( filter
797
- . iter ( )
798
- . map ( |_| TableProviderFilterPushDown :: Inexact )
799
- . collect ( ) )
875
+ let partition_cols = self . snapshot ( ) ?. metadata ( ) . partition_columns . as_slice ( ) ;
876
+ Ok ( get_pushdown_filters ( filter, partition_cols) )
800
877
}
801
878
802
879
fn statistics ( & self ) -> Option < Statistics > {
803
880
self . snapshot ( ) . ok ( ) ?. datafusion_table_statistics ( )
804
881
}
805
882
}
806
883
884
+ fn get_pushdown_filters (
885
+ filter : & [ & Expr ] ,
886
+ partition_cols : & [ String ] ,
887
+ ) -> Vec < TableProviderFilterPushDown > {
888
+ filter
889
+ . iter ( )
890
+ . cloned ( )
891
+ . map ( |expr| {
892
+ let applicable = expr_is_exact_predicate_for_cols ( partition_cols, expr) ;
893
+ if !expr. column_refs ( ) . is_empty ( ) && applicable {
894
+ TableProviderFilterPushDown :: Exact
895
+ } else {
896
+ TableProviderFilterPushDown :: Inexact
897
+ }
898
+ } )
899
+ . collect ( )
900
+ }
901
+
902
+ // inspired from datafusion::listing::helpers, but adapted to only stats based pruning
903
+ fn expr_is_exact_predicate_for_cols ( partition_cols : & [ String ] , expr : & Expr ) -> bool {
904
+ let mut is_applicable = true ;
905
+ expr. apply ( |expr| match expr {
906
+ Expr :: Column ( Column { ref name, .. } ) => {
907
+ is_applicable &= partition_cols. contains ( & name) ;
908
+
909
+ // TODO: decide if we should constrain this to Utf8 columns (including views, dicts etc)
910
+
911
+ if is_applicable {
912
+ Ok ( TreeNodeRecursion :: Jump )
913
+ } else {
914
+ Ok ( TreeNodeRecursion :: Stop )
915
+ }
916
+ }
917
+ Expr :: BinaryExpr ( BinaryExpr { ref op, .. } ) => {
918
+ is_applicable &= matches ! (
919
+ op,
920
+ Operator :: And
921
+ | Operator :: Or
922
+ | Operator :: NotEq
923
+ | Operator :: Eq
924
+ | Operator :: Gt
925
+ | Operator :: GtEq
926
+ | Operator :: Lt
927
+ | Operator :: LtEq
928
+ ) ;
929
+ if is_applicable {
930
+ Ok ( TreeNodeRecursion :: Continue )
931
+ } else {
932
+ Ok ( TreeNodeRecursion :: Stop )
933
+ }
934
+ }
935
+ Expr :: Literal ( _)
936
+ | Expr :: Not ( _)
937
+ | Expr :: IsNotNull ( _)
938
+ | Expr :: IsNull ( _)
939
+ | Expr :: Between ( _)
940
+ | Expr :: InList ( _) => Ok ( TreeNodeRecursion :: Continue ) ,
941
+ _ => {
942
+ is_applicable = false ;
943
+ Ok ( TreeNodeRecursion :: Stop )
944
+ }
945
+ } )
946
+ . unwrap ( ) ;
947
+ is_applicable
948
+ }
949
+
807
950
/// A Delta table provider that enables additional metadata columns to be included during the scan
808
951
#[ derive( Debug ) ]
809
952
pub struct DeltaTableProvider {
@@ -885,10 +1028,8 @@ impl TableProvider for DeltaTableProvider {
885
1028
& self ,
886
1029
filter : & [ & Expr ] ,
887
1030
) -> DataFusionResult < Vec < TableProviderFilterPushDown > > {
888
- Ok ( filter
889
- . iter ( )
890
- . map ( |_| TableProviderFilterPushDown :: Inexact )
891
- . collect ( ) )
1031
+ let partition_cols = self . snapshot . metadata ( ) . partition_columns . as_slice ( ) ;
1032
+ Ok ( get_pushdown_filters ( filter, partition_cols) )
892
1033
}
893
1034
894
1035
fn statistics ( & self ) -> Option < Statistics > {
0 commit comments