@@ -58,7 +58,9 @@ use datafusion_expr::execution_props::ExecutionProps;
58
58
use datafusion_expr:: logical_plan:: CreateExternalTable ;
59
59
use datafusion_expr:: simplify:: SimplifyContext ;
60
60
use datafusion_expr:: utils:: conjunction;
61
- use datafusion_expr:: { col, Expr , Extension , LogicalPlan , TableProviderFilterPushDown , Volatility } ;
61
+ use datafusion_expr:: {
62
+ col, BinaryExpr , Expr , Extension , LogicalPlan , TableProviderFilterPushDown , Volatility ,
63
+ } ;
62
64
use datafusion_physical_expr:: { create_physical_expr, PhysicalExpr } ;
63
65
use datafusion_physical_plan:: filter:: FilterExec ;
64
66
use datafusion_physical_plan:: limit:: { GlobalLimitExec , LocalLimitExec } ;
@@ -534,6 +536,10 @@ impl<'a> DeltaScanBuilder<'a> {
534
536
Some ( schema. clone ( ) ) ,
535
537
) ?;
536
538
539
+ // TODO temporarily using full schema to generate pruning predicates
540
+ // should we optimize this by only including fields referenced from predicates?
541
+ let filter_df_schema = logical_schema. clone ( ) . to_dfschema ( ) ?;
542
+
537
543
let logical_schema = if let Some ( used_columns) = self . projection {
538
544
let mut fields = vec ! [ ] ;
539
545
for idx in used_columns {
@@ -545,18 +551,17 @@ impl<'a> DeltaScanBuilder<'a> {
545
551
} ;
546
552
547
553
let context = SessionContext :: new ( ) ;
548
- let df_schema = logical_schema. clone ( ) . to_dfschema ( ) ?;
549
554
550
555
let logical_filter = self . filter . map ( |expr| {
551
556
// Simplify the expression first
552
557
let props = ExecutionProps :: new ( ) ;
553
558
let simplify_context =
554
- SimplifyContext :: new ( & props) . with_schema ( df_schema . clone ( ) . into ( ) ) ;
559
+ SimplifyContext :: new ( & props) . with_schema ( filter_df_schema . clone ( ) . into ( ) ) ;
555
560
let simplifier = ExprSimplifier :: new ( simplify_context) . with_max_cycles ( 10 ) ;
556
561
let simplified = simplifier. simplify ( expr) . unwrap ( ) ;
557
562
558
563
context
559
- . create_physical_expr ( simplified, & df_schema )
564
+ . create_physical_expr ( simplified, & filter_df_schema )
560
565
. unwrap ( )
561
566
} ) ;
562
567
@@ -757,17 +762,47 @@ impl TableProvider for DeltaTable {
757
762
& self ,
758
763
filter : & [ & Expr ] ,
759
764
) -> DataFusionResult < Vec < TableProviderFilterPushDown > > {
760
- Ok ( filter
761
- . iter ( )
762
- . map ( |_| TableProviderFilterPushDown :: Inexact )
763
- . collect ( ) )
765
+ let partition_cols = self . snapshot ( ) ?. metadata ( ) . partition_columns . clone ( ) ;
766
+ Ok ( get_pushdown_filters ( filter, partition_cols) )
764
767
}
765
768
766
769
fn statistics ( & self ) -> Option < Statistics > {
767
770
self . snapshot ( ) . ok ( ) ?. datafusion_table_statistics ( )
768
771
}
769
772
}
770
773
774
+ fn get_pushdown_filters (
775
+ filter : & [ & Expr ] ,
776
+ partition_cols : Vec < String > ,
777
+ ) -> Vec < TableProviderFilterPushDown > {
778
+ filter
779
+ . iter ( )
780
+ . map ( |filter| {
781
+ let columns = extract_columns ( filter) ;
782
+ if !columns. is_empty ( ) && columns. iter ( ) . all ( |col| partition_cols. contains ( col) ) {
783
+ TableProviderFilterPushDown :: Exact
784
+ } else {
785
+ TableProviderFilterPushDown :: Inexact
786
+ }
787
+ } )
788
+ . collect ( )
789
+ }
790
+
791
+ fn extract_columns ( expr : & Expr ) -> Vec < String > {
792
+ let mut columns = Vec :: new ( ) ;
793
+ match expr {
794
+ Expr :: Column ( col) => columns. push ( col. name . clone ( ) ) ,
795
+ Expr :: BinaryExpr ( BinaryExpr { left, right, .. } ) => {
796
+ let left_columns = extract_columns ( left) ;
797
+ let right_columns = extract_columns ( right) ;
798
+ columns. extend ( left_columns) ;
799
+ columns. extend ( right_columns) ;
800
+ }
801
+ _ => { }
802
+ }
803
+ columns
804
+ }
805
+
771
806
/// A Delta table provider that enables additional metadata columns to be included during the scan
772
807
#[ derive( Debug ) ]
773
808
pub struct DeltaTableProvider {
@@ -849,10 +884,8 @@ impl TableProvider for DeltaTableProvider {
849
884
& self ,
850
885
filter : & [ & Expr ] ,
851
886
) -> DataFusionResult < Vec < TableProviderFilterPushDown > > {
852
- Ok ( filter
853
- . iter ( )
854
- . map ( |_| TableProviderFilterPushDown :: Inexact )
855
- . collect ( ) )
887
+ let partition_cols = self . snapshot . metadata ( ) . partition_columns . clone ( ) ;
888
+ Ok ( get_pushdown_filters ( filter, partition_cols) )
856
889
}
857
890
858
891
fn statistics ( & self ) -> Option < Statistics > {
0 commit comments