@@ -24,6 +24,7 @@ pub mod operator_registry;
2424use crate :: execution:: operators:: init_csv_datasource_exec;
2525use crate :: execution:: operators:: IcebergScanExec ;
2626use crate :: execution:: {
27+ expressions:: list_positions:: ListPositionsExpr ,
2728 expressions:: subquery:: Subquery ,
2829 operators:: { ExecutionError , ExpandExec , ParquetWriterExec , ScanExec , ShuffleScanExec } ,
2930 planner:: expression_registry:: ExpressionRegistry ,
@@ -1656,12 +1657,8 @@ impl PhysicalPlanner {
16561657 . map ( |expr| self . create_expr ( expr, child. schema ( ) ) )
16571658 . collect :: < Result < Vec < _ > , _ > > ( ) ?;
16581659
1659- // For UnnestExec, we need to add a projection to put the columns in the right order:
1660- // 1. First add all projection columns
1661- // 2. Then add the array column to be exploded
1662- // Then UnnestExec will unnest the last column
1663-
1664- // Use return_field() to get the proper column names from the expressions
1660+ // For posexplode, a parallel List<Int32> positions column is added before the
1661+ // array column so UnnestExec can unnest both in parallel.
16651662 let child_schema = child. schema ( ) ;
16661663 let mut project_exprs: Vec < ( Arc < dyn PhysicalExpr > , String ) > = projections
16671664 . iter ( )
@@ -1674,34 +1671,44 @@ impl PhysicalPlanner {
16741671 } )
16751672 . collect ( ) ;
16761673
1677- // Add the array column as the last column
16781674 let array_field = child_expr
16791675 . return_field ( & child_schema)
16801676 . expect ( "Failed to get field from array expression" ) ;
16811677 let array_col_name = array_field. name ( ) . to_string ( ) ;
1678+
1679+ if explode. position {
1680+ let positions_expr: Arc < dyn PhysicalExpr > =
1681+ Arc :: new ( ListPositionsExpr :: new ( Arc :: clone ( & child_expr) ) ) ;
1682+ project_exprs. push ( ( positions_expr, "pos" . to_string ( ) ) ) ;
1683+ }
16821684 project_exprs. push ( ( Arc :: clone ( & child_expr) , array_col_name. clone ( ) ) ) ;
16831685
1684- // Create a projection to arrange columns as needed
16851686 let project_exec = Arc :: new ( ProjectionExec :: try_new (
16861687 project_exprs,
16871688 Arc :: clone ( & child. native_plan ) ,
16881689 ) ?) ;
16891690
1690- // Get the input schema from the projection
16911691 let project_schema = project_exec. schema ( ) ;
16921692
16931693 // Build the output schema for UnnestExec
1694- // The output schema replaces the list column with its element type
16951694 let mut output_fields: Vec < Field > = Vec :: new ( ) ;
16961695
16971696 // Add all projection columns (non-array columns)
16981697 for i in 0 ..projections. len ( ) {
16991698 output_fields. push ( project_schema. field ( i) . clone ( ) ) ;
17001699 }
17011700
1702- // Add the unnested array element field
1701+ let array_input_index = if explode. position {
1702+ // With outer=true, UnnestExec preserves rows whose array is empty or NULL
1703+ // and emits a NULL position for them, so pos must be nullable in that case.
1704+ output_fields. push ( Field :: new ( "pos" , DataType :: Int32 , explode. outer ) ) ;
1705+ projections. len ( ) + 1
1706+ } else {
1707+ projections. len ( )
1708+ } ;
1709+
17031710 // Extract the element type from the list/array type
1704- let array_field = project_schema. field ( projections . len ( ) ) ;
1711+ let array_field = project_schema. field ( array_input_index ) ;
17051712 let element_type = match array_field. data_type ( ) {
17061713 DataType :: List ( field) => field. data_type ( ) . clone ( ) ,
17071714 dt => {
@@ -1712,8 +1719,6 @@ impl PhysicalPlanner {
17121719 }
17131720 } ;
17141721
1715- // The output column has the same name as the input array column
1716- // but with the element type instead of the list type
17171722 output_fields. push ( Field :: new (
17181723 array_field. name ( ) ,
17191724 element_type,
@@ -1722,12 +1727,17 @@ impl PhysicalPlanner {
17221727
17231728 let output_schema = Arc :: new ( Schema :: new ( output_fields) ) ;
17241729
1725- // Use UnnestExec to explode the last column (the array column)
1726- // ListUnnest specifies which column to unnest and the depth (1 for single level)
1727- let list_unnest = ListUnnest {
1728- index_in_input_schema : projections. len ( ) , // Index of the array column to unnest
1729- depth : 1 , // Unnest one level (explode single array)
1730- } ;
1730+ let mut list_unnests = Vec :: with_capacity ( 2 ) ;
1731+ if explode. position {
1732+ list_unnests. push ( ListUnnest {
1733+ index_in_input_schema : projections. len ( ) ,
1734+ depth : 1 ,
1735+ } ) ;
1736+ }
1737+ list_unnests. push ( ListUnnest {
1738+ index_in_input_schema : array_input_index,
1739+ depth : 1 ,
1740+ } ) ;
17311741
17321742 let unnest_options = UnnestOptions {
17331743 preserve_nulls : explode. outer ,
@@ -1736,7 +1746,7 @@ impl PhysicalPlanner {
17361746
17371747 let unnest_exec = Arc :: new ( UnnestExec :: new (
17381748 project_exec,
1739- vec ! [ list_unnest ] ,
1749+ list_unnests ,
17401750 vec ! [ ] , // No struct columns to unnest
17411751 output_schema,
17421752 unnest_options,
0 commit comments