@@ -33,7 +33,7 @@ pub struct FileMetadata {
3333 pub blocks : Vec < arrow_format:: ipc:: Block > ,
3434
3535 /// Dictionaries associated to each dict_id
36- pub ( crate ) dictionaries : Option < Vec < arrow_format:: ipc:: Block > > ,
36+ pub dictionaries : Option < Vec < arrow_format:: ipc:: Block > > ,
3737
3838 /// The total size of the file in bytes
3939 pub size : u64 ,
@@ -78,27 +78,34 @@ pub(crate) fn get_dictionary_batch<'a>(
7878 }
7979}
8080
81- fn read_dictionary_block < R : Read + Seek > (
81+ pub fn read_dictionary_block < R : Read + Seek > (
8282 reader : & mut R ,
8383 metadata : & FileMetadata ,
8484 block : & arrow_format:: ipc:: Block ,
85+ // When true, the underlying reader bytestream represents a standalone IPC Block
86+ // rather than a complete IPC File.
87+ force_zero_offset : bool ,
8588 dictionaries : & mut Dictionaries ,
8689 message_scratch : & mut Vec < u8 > ,
8790 dictionary_scratch : & mut Vec < u8 > ,
8891) -> PolarsResult < ( ) > {
89- let message = get_message_from_block ( reader, block, message_scratch) ?;
90- let batch = get_dictionary_batch ( & message) ?;
91-
92- let offset: u64 = block
93- . offset
94- . try_into ( )
95- . map_err ( |_| polars_err ! ( oos = OutOfSpecKind :: UnexpectedNegativeInteger ) ) ?;
92+ let offset: u64 = if force_zero_offset {
93+ 0
94+ } else {
95+ block
96+ . offset
97+ . try_into ( )
98+ . map_err ( |_| polars_err ! ( oos = OutOfSpecKind :: UnexpectedNegativeInteger ) ) ?
99+ } ;
96100
97101 let length: u64 = block
98102 . meta_data_length
99103 . try_into ( )
100104 . map_err ( |_| polars_err ! ( oos = OutOfSpecKind :: UnexpectedNegativeInteger ) ) ?;
101105
106+ let message = get_message_from_block_offset ( reader, offset, message_scratch) ?;
107+ let batch = get_dictionary_batch ( & message) ?;
108+
102109 read_dictionary (
103110 batch,
104111 & metadata. schema ,
@@ -132,6 +139,7 @@ pub fn read_file_dictionaries<R: Read + Seek>(
132139 reader,
133140 metadata,
134141 block,
142+ false ,
135143 & mut dictionaries,
136144 & mut message_scratch,
137145 scratch,
@@ -281,19 +289,19 @@ pub(crate) fn get_record_batch(
281289 }
282290}
283291
284- fn get_message_from_block_offset < ' a , R : Read + Seek > (
292+ pub fn get_message_from_block_offset < ' a , R : Read + Seek > (
285293 reader : & mut R ,
286294 offset : u64 ,
287295 message_scratch : & ' a mut Vec < u8 > ,
288296) -> PolarsResult < arrow_format:: ipc:: MessageRef < ' a > > {
289- // read length
290297 reader. seek ( SeekFrom :: Start ( offset) ) ?;
291298 let mut meta_buf = [ 0 ; 4 ] ;
292299 reader. read_exact ( & mut meta_buf) ?;
293300 if meta_buf == CONTINUATION_MARKER {
294301 // continuation marker encountered, read message next
295302 reader. read_exact ( & mut meta_buf) ?;
296303 }
304+
297305 let meta_len = i32:: from_le_bytes ( meta_buf)
298306 . try_into ( )
299307 . map_err ( |_| polars_err ! ( oos = OutOfSpecKind :: UnexpectedNegativeInteger ) ) ?;
@@ -337,15 +345,21 @@ pub fn read_batch<R: Read + Seek>(
337345 projection : Option < & [ usize ] > ,
338346 limit : Option < usize > ,
339347 index : usize ,
348+ // When true, the reader object is handled as an IPC Block.
349+ block_mode : bool ,
340350 message_scratch : & mut Vec < u8 > ,
341351 data_scratch : & mut Vec < u8 > ,
342352) -> PolarsResult < RecordBatchT < Box < dyn Array > > > {
343353 let block = metadata. blocks [ index] ;
344354
345- let offset: u64 = block
346- . offset
347- . try_into ( )
348- . map_err ( |_| polars_err ! ( oos = OutOfSpecKind :: NegativeFooterLength ) ) ?;
355+ let offset: u64 = if block_mode {
356+ 0
357+ } else {
358+ block
359+ . offset
360+ . try_into ( )
361+ . map_err ( |_| polars_err ! ( oos = OutOfSpecKind :: NegativeFooterLength ) ) ?
362+ } ;
349363
350364 let length: u64 = block
351365 . meta_data_length
0 commit comments