@@ -1358,7 +1358,7 @@ impl PhysicalPlanner {
13581358 let common = scan
13591359 . common
13601360 . as_ref ( )
1361- . ok_or_else ( || GeneralError ( "DeltaScan missing common data " . into ( ) ) ) ?;
1361+ . ok_or_else ( || GeneralError ( "DeltaScan proto missing ' common' field (Scala serialization error) " . into ( ) ) ) ?;
13621362
13631363 let required_schema: SchemaRef =
13641364 convert_spark_types_to_arrow_schema ( common. required_schema . as_slice ( ) ) ;
@@ -1415,7 +1415,19 @@ impl PhysicalPlanner {
14151415 let data_filters: Result < Vec < Arc < dyn PhysicalExpr > > , ExecutionError > = common
14161416 . data_filters
14171417 . iter ( )
1418- . map ( |expr| self . create_expr ( expr, Arc :: clone ( & required_schema) ) )
1418+ . map ( |expr| {
1419+ let filter =
1420+ self . create_expr ( expr, Arc :: clone ( & required_schema) ) ?;
1421+ if has_column_mapping {
1422+ let mut rewriter = ColumnMappingFilterRewriter {
1423+ logical_to_physical : & logical_to_physical,
1424+ data_schema : & data_schema,
1425+ } ;
1426+ Ok ( filter. rewrite ( & mut rewriter) . data ( ) ?)
1427+ } else {
1428+ Ok ( filter)
1429+ }
1430+ } )
14191431 . collect ( ) ;
14201432
14211433 let object_store_options: HashMap < String , String > = common
@@ -1459,39 +1471,21 @@ impl PhysicalPlanner {
14591471 . tasks
14601472 . first ( )
14611473 . map ( |t| t. file_path . clone ( ) )
1462- . ok_or_else ( || GeneralError ( "DeltaScan has no tasks" . into ( ) ) ) ?;
1474+ . ok_or_else ( || GeneralError ( "DeltaScan has no tasks after split-mode injection (check DeltaPlanDataInjector) " . into ( ) ) ) ?;
14631475 let ( object_store_url, _) = prepare_object_store_with_configs (
14641476 self . session_ctx . runtime_env ( ) ,
14651477 one_file,
14661478 & object_store_options,
14671479 ) ?;
14681480
1469- // When column mapping is active, required_schema also needs physical
1470- // names so init_datasource_exec's name-matching logic works against the
1471- // physical data_schema.
1472- let read_required_schema = if has_column_mapping {
1473- let new_fields: Vec < _ > = required_schema
1474- . fields ( )
1475- . iter ( )
1476- . map ( |f| {
1477- if let Some ( physical) = logical_to_physical. get ( f. name ( ) ) {
1478- Arc :: new ( Field :: new (
1479- physical,
1480- f. data_type ( ) . clone ( ) ,
1481- f. is_nullable ( ) ,
1482- ) )
1483- } else {
1484- Arc :: clone ( f)
1485- }
1486- } )
1487- . collect ( ) ;
1488- Arc :: new ( Schema :: new ( new_fields) )
1489- } else {
1490- Arc :: clone ( & required_schema)
1491- } ;
1492-
1481+ // Keep required_schema in LOGICAL names (Spark's convention).
1482+ // data_schema uses physical names (when column mapping is active).
1483+ // DataFusion's schema adapter bridges the gap: it matches file
1484+ // columns against data_schema by name and produces the
1485+ // required_schema output shape, injecting nulls for missing
1486+ // columns (schema evolution).
14931487 let delta_exec = init_datasource_exec (
1494- read_required_schema ,
1488+ Arc :: clone ( & required_schema ) ,
14951489 Some ( data_schema) ,
14961490 Some ( partition_schema) ,
14971491 object_store_url,
@@ -1518,33 +1512,9 @@ impl PhysicalPlanner {
15181512 delta_exec
15191513 } ;
15201514
1521- // Phase 4: when column mapping is active, the output has PHYSICAL
1522- // column names. Add a ProjectionExec to rename back to logical.
1523- let final_exec = if has_column_mapping {
1524- let physical_to_logical: HashMap < String , String > = logical_to_physical
1525- . iter ( )
1526- . map ( |( l, p) | ( p. clone ( ) , l. clone ( ) ) )
1527- . collect ( ) ;
1528- let input_schema = final_exec. schema ( ) ;
1529- let rename_exprs: Result < Vec < ( Arc < dyn PhysicalExpr > , String ) > , ExecutionError > = input_schema
1530- . fields ( )
1531- . iter ( )
1532- . enumerate ( )
1533- . map ( |( idx, f) | {
1534- let col: Arc < dyn PhysicalExpr > =
1535- Arc :: new ( Column :: new ( f. name ( ) , idx) ) ;
1536- let logical = physical_to_logical
1537- . get ( f. name ( ) )
1538- . cloned ( )
1539- . unwrap_or_else ( || f. name ( ) . clone ( ) ) ;
1540- Ok ( ( col, logical) )
1541- } )
1542- . collect ( ) ;
1543- let rename_exprs = rename_exprs?;
1544- Arc :: new ( ProjectionExec :: try_new ( rename_exprs, final_exec) ?) as Arc < dyn ExecutionPlan >
1545- } else {
1546- final_exec
1547- } ;
1515+ // No rename projection needed: required_schema already uses
1516+ // logical names, and DataFusion's schema adapter handles the
1517+ // physical→logical mapping internally via data_schema.
15481518
15491519 Ok ( (
15501520 vec ! [ ] ,
@@ -2999,6 +2969,45 @@ fn expr_to_columns(
29992969 Ok ( ( left_field_indices, right_field_indices) )
30002970}
30012971
2972+ /// Rewrites Column references in a PhysicalExpr from logical names/indices
2973+ /// (as in required_schema) to physical names/indices (as in data_schema).
2974+ /// Used by the Delta scan path when column mapping is active so that pushed-down
2975+ /// data filters match the DataSourceExec's base schema (physical column names).
2976+ struct ColumnMappingFilterRewriter < ' a > {
2977+ logical_to_physical : & ' a HashMap < String , String > ,
2978+ data_schema : & ' a SchemaRef ,
2979+ }
2980+
2981+ impl TreeNodeRewriter for ColumnMappingFilterRewriter < ' _ > {
2982+ type Node = Arc < dyn PhysicalExpr > ;
2983+
2984+ fn f_down (
2985+ & mut self ,
2986+ node : Self :: Node ,
2987+ ) -> datafusion:: common:: Result < Transformed < Self :: Node > > {
2988+ if let Some ( column) = node. as_any ( ) . downcast_ref :: < Column > ( ) {
2989+ if let Some ( physical_name) = self . logical_to_physical . get ( column. name ( ) ) {
2990+ if let Some ( idx) = self
2991+ . data_schema
2992+ . fields ( )
2993+ . iter ( )
2994+ . position ( |f| f. name ( ) == physical_name)
2995+ {
2996+ return Ok ( Transformed :: yes ( Arc :: new ( Column :: new ( physical_name, idx) ) ) ) ;
2997+ }
2998+ log:: warn!(
2999+ "Column mapping: physical name '{}' for logical '{}' not found in data_schema; \
3000+ filter may fail at execution time",
3001+ physical_name, column. name( )
3002+ ) ;
3003+ }
3004+ Ok ( Transformed :: no ( node) )
3005+ } else {
3006+ Ok ( Transformed :: no ( node) )
3007+ }
3008+ }
3009+ }
3010+
30023011/// A physical join filter rewritter which rewrites the column indices in the expression
30033012/// to use the new column indices. See `rewrite_physical_expr`.
30043013struct JoinFilterRewriter < ' a > {
0 commit comments