Skip to content

Commit 03429d6

Browse files
authored
fix: Fix bug with decoding timestamp as decimal (#36)
1 parent 7618add commit 03429d6

File tree

3 files changed

+43
-1
lines changed

3 files changed

+43
-1
lines changed

src/array_decoder/timestamp.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ fn decimal128_decoder(
103103
let data = get_rle_reader(column, data)?;
104104

105105
let secondary = stripe.stream_map().get(column, Kind::Secondary);
106-
let secondary = get_rle_reader(column, secondary)?;
106+
let secondary = get_unsigned_rle_reader(column, secondary);
107107

108108
let present = PresentDecoder::from_stripe(stripe, column);
109109

src/encoding/timestamp.rs

+2
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,8 @@ fn decode(base: i64, seconds_since_orc_base: i64, nanoseconds: i64) -> (i128, i6
133133
// while we encode them as a single i64 of nanoseconds in Arrow.
134134
let nanoseconds_since_epoch =
135135
(seconds as i128 * NANOSECONDS_IN_SECOND as i128) + (nanoseconds as i128);
136+
// Returning seconds & nanoseconds only for error message
137+
// TODO: does the error message really need those details? Can simplify by removing.
136138
(nanoseconds_since_epoch, seconds, nanoseconds)
137139
}
138140

tests/basic/main.rs

+40
Original file line numberDiff line numberDiff line change
@@ -615,6 +615,46 @@ pub fn decimal128_timestamps_test() {
615615
assert_batches_eq(&batch, &expected);
616616
}
617617

618+
fn integration_path(path: &str) -> String {
619+
let dir = env!("CARGO_MANIFEST_DIR");
620+
format!("{}/tests/integration/data/{}", dir, path)
621+
}
622+
623+
// TODO: Move this to integration test file. Placed here because it has access to assert_batches_eq.
624+
// Should make that function available to both basics & integration.
625+
#[test]
626+
pub fn decimal128_timestamps_1900_test() {
627+
let path = integration_path("TestOrcFile.testDate1900.orc");
628+
let f = File::open(path).expect("no file found");
629+
let mut reader = ArrowReaderBuilder::try_new(f)
630+
.unwrap()
631+
.with_batch_size(11) // it's a big file, we don't want to test more than that
632+
.with_schema(Arc::new(Schema::new(vec![
633+
Field::new("time", DataType::Decimal128(38, 9), true),
634+
Field::new("date", DataType::Date32, true),
635+
])))
636+
.build();
637+
let batch = reader.next().unwrap().unwrap();
638+
let expected = [
639+
"+-----------------------+------------+",
640+
"| time | date |",
641+
"+-----------------------+------------+",
642+
"| -2198229903.900000000 | 1900-12-25 |",
643+
"| -2198229903.899900000 | 1900-12-25 |",
644+
"| -2198229903.899800000 | 1900-12-25 |",
645+
"| -2198229903.899700000 | 1900-12-25 |",
646+
"| -2198229903.899600000 | 1900-12-25 |",
647+
"| -2198229903.899500000 | 1900-12-25 |",
648+
"| -2198229903.899400000 | 1900-12-25 |",
649+
"| -2198229903.899300000 | 1900-12-25 |",
650+
"| -2198229903.899200000 | 1900-12-25 |",
651+
"| -2198229903.899100000 | 1900-12-25 |",
652+
"| -2198229903.899000000 | 1900-12-25 |",
653+
"+-----------------------+------------+",
654+
];
655+
assert_batches_eq(&[batch], &expected);
656+
}
657+
618658
// From https://github.com/apache/arrow-rs/blob/7705acad845e8b2a366a08640f7acb4033ed7049/arrow-flight/src/sql/metadata/mod.rs#L67-L75
619659
pub fn assert_batches_eq(batches: &[RecordBatch], expected_lines: &[&str]) {
620660
let formatted = pretty::pretty_format_batches(batches).unwrap().to_string();

0 commit comments

Comments
 (0)