Skip to content

Commit 1b7a553

Browse files
authored
fix: patched-base Int range start index out of range for slice of length (#77)
fix
1 parent d00ecdc commit 1b7a553

4 files changed

Lines changed: 25 additions & 3 deletions

File tree

src/bin/orc/export.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,11 +90,10 @@ fn run_export<R: ChunkReader>(
9090
output: Box<dyn io::Write>,
9191
) -> Result<()> {
9292
// Build projection mask if columns are specified
93-
let projection = if args.columns.is_some() {
93+
let projection = if let Some(selected) = &args.columns {
9494
// Need to read metadata to build projection
9595
let metadata = read_metadata(&mut source)?;
9696

97-
let selected = args.columns.as_ref().unwrap();
9897
let root_children = metadata.root_data_type().children();
9998
let mut missing: Vec<&str> = selected
10099
.iter()

src/encoding/integer/rle_v2/patched_base.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,11 @@ pub fn read_patched_base<N: NInt, R: Read, S: EncodingSign>(
6969

7070
let patch_list_length = (fourth_byte & 0x1f) as usize;
7171

72-
let base = N::read_big_endian(reader, base_byte_width)?;
72+
// Read base value as i64 directly
73+
// This matches Java ORC's implementation which always uses long for base values
74+
let base = i64::read_big_endian(reader, base_byte_width)?;
7375
let base = S::decode_signed_msb(base, base_byte_width);
76+
let base = N::from_i64(base);
7477

7578
// Get data values
7679
// TODO: this should read into Vec<i64>

tests/basic/data/patched_int.orc

154 KB
Binary file not shown.

tests/basic/main.rs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,26 @@ pub fn basic_test_bigint() {
275275
assert_batches_eq(&[last_3_rows], &expected);
276276
}
277277

278+
#[test]
279+
pub fn basic_test_patched_int() {
280+
let path = basic_path("patched_int.orc");
281+
let reader = new_arrow_reader(&path, &["c1"]);
282+
let batch = reader.collect::<Result<Vec<_>, _>>().unwrap();
283+
284+
let total_rows: usize = batch.iter().map(|b| b.num_rows()).sum();
285+
assert_eq!(999596, total_rows);
286+
287+
let last_batch_idx = batch.len() - 1;
288+
let total_rows = batch[last_batch_idx].num_rows();
289+
let last_3_rows = batch[last_batch_idx].slice(total_rows - 3, 3);
290+
291+
let expected = [
292+
"+----+", "| c1 |", "+----+", "| 1 |", "| 1 |", "| 1 |", "+----+",
293+
];
294+
295+
assert_batches_eq(&[last_3_rows], &expected);
296+
}
297+
278298
#[test]
279299
pub fn basic_test_nested_struct() {
280300
let path = basic_path("nested_struct.orc");

0 commit comments

Comments
 (0)