File tree Expand file tree Collapse file tree 2 files changed +23
-0
lines changed
crates/polars-io/src/parquet/read Expand file tree Collapse file tree 2 files changed +23
-0
lines changed Original file line number Diff line number Diff line change @@ -266,11 +266,21 @@ fn rg_to_dfs_prefiltered(
266266 let num_live_columns = live_variables. len ( ) ;
267267 let num_dead_columns = projection. len ( ) - num_live_columns;
268268
269+ // @NOTE: This is probably already sorted, but just to be sure.
270+ let mut projection_sorted = projection. to_vec ( ) ;
271+ projection_sorted. sort ( ) ;
272+
269273 // We create two look-up tables that map indexes offsets into the live- and dead-set onto
270274 // column indexes of the schema.
271275 let mut live_idx_to_col_idx = Vec :: with_capacity ( num_live_columns) ;
272276 let mut dead_idx_to_col_idx = Vec :: with_capacity ( num_dead_columns) ;
277+ let mut offset = 0 ;
273278 for ( i, field) in schema. iter_values ( ) . enumerate ( ) {
279+ if projection_sorted. get ( offset) . copied ( ) != Some ( i) {
280+ continue ;
281+ }
282+
283+ offset += 1 ;
274284 if live_variables. contains ( & field. name [ ..] ) {
275285 live_idx_to_col_idx. push ( i) ;
276286 } else {
Original file line number Diff line number Diff line change @@ -1904,3 +1904,16 @@ def test_write_binary_open_file(tmp_path: Path) -> None:
19041904
19051905 out = pl .read_parquet (path )
19061906 assert_frame_equal (out , df )
1907+
1908+
1909+ def test_prefilter_with_projection () -> None :
1910+ f = io .BytesIO ()
1911+ pl .DataFrame ({"a" : [1 ], "b" : [2 ]}).write_parquet (f )
1912+
1913+ f .seek (0 )
1914+ (
1915+ pl .scan_parquet (f , parallel = "prefiltered" )
1916+ .filter (pl .col .a == 1 )
1917+ .select (pl .col .a )
1918+ .collect ()
1919+ )
You can’t perform that action at this time.
0 commit comments