diff --git a/src/bin/orc/export.rs b/src/bin/orc/export.rs index edc1e56..ef5ddc5 100644 --- a/src/bin/orc/export.rs +++ b/src/bin/orc/export.rs @@ -90,11 +90,10 @@ fn run_export( output: Box, ) -> Result<()> { // Build projection mask if columns are specified - let projection = if args.columns.is_some() { + let projection = if let Some(selected) = &args.columns { // Need to read metadata to build projection let metadata = read_metadata(&mut source)?; - let selected = args.columns.as_ref().unwrap(); let root_children = metadata.root_data_type().children(); let mut missing: Vec<&str> = selected .iter() diff --git a/src/encoding/integer/rle_v2/patched_base.rs b/src/encoding/integer/rle_v2/patched_base.rs index 2d4980f..c252fe2 100644 --- a/src/encoding/integer/rle_v2/patched_base.rs +++ b/src/encoding/integer/rle_v2/patched_base.rs @@ -69,8 +69,11 @@ pub fn read_patched_base( let patch_list_length = (fourth_byte & 0x1f) as usize; - let base = N::read_big_endian(reader, base_byte_width)?; + // Read base value as i64 directly + // This matches Java ORC's implementation which always uses long for base values + let base = i64::read_big_endian(reader, base_byte_width)?; let base = S::decode_signed_msb(base, base_byte_width); + let base = N::from_i64(base); // Get data values // TODO: this should read into Vec diff --git a/tests/basic/data/patched_int.orc b/tests/basic/data/patched_int.orc new file mode 100644 index 0000000..4f6355e Binary files /dev/null and b/tests/basic/data/patched_int.orc differ diff --git a/tests/basic/main.rs b/tests/basic/main.rs index 0ba3e31..7bca3a6 100644 --- a/tests/basic/main.rs +++ b/tests/basic/main.rs @@ -275,6 +275,26 @@ pub fn basic_test_bigint() { assert_batches_eq(&[last_3_rows], &expected); } +#[test] +pub fn basic_test_patched_int() { + let path = basic_path("patched_int.orc"); + let reader = new_arrow_reader(&path, &["c1"]); + let batch = reader.collect::, _>>().unwrap(); + + let total_rows: usize = batch.iter().map(|b| b.num_rows()).sum(); + assert_eq!(999596, total_rows); + + let last_batch_idx = batch.len() - 1; + let total_rows = batch[last_batch_idx].num_rows(); + let last_3_rows = batch[last_batch_idx].slice(total_rows - 3, 3); + + let expected = [ + "+----+", "| c1 |", "+----+", "| 1 |", "| 1 |", "| 1 |", "+----+", + ]; + + assert_batches_eq(&[last_3_rows], &expected); +} + #[test] pub fn basic_test_nested_struct() { let path = basic_path("nested_struct.orc");