Skip to content

Commit bd34142

Browse files
authored
fix: use checked arithmetic for Turso integer-millis timestamp read path (spiceai#10786)
The Turso connector's read path converts stored integer milliseconds to the target timestamp unit using plain `*` multiplication. For Nanosecond and Microsecond units, this silently wraps on overflow in release builds for dates outside ~1677-2262, producing corrupt timestamp values. The write path already uses `checked_mul` (line 1902-1917), but the read path at line 691-692 was missed. The RFC3339 text read path also handles this correctly via `timestamp_nanos_opt()`. Replace unchecked `*` with `checked_mul`, returning NULL on overflow (consistent with how `timestamp_nanos_opt()` returns `None`).
1 parent aa05406 commit bd34142

1 file changed

Lines changed: 76 additions & 7 deletions

File tree

crates/data_components/src/turso.rs

Lines changed: 76 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -684,13 +684,15 @@ impl TursoTableProvider {
684684
.iter()
685685
.map(|row| match &row[col_idx] {
686686
TursoValue::Integer(millis) => {
687-
// Integer format: Convert from stored milliseconds to the target unit
688-
Some(match unit {
689-
TimeUnit::Second => millis / MILLIS_PER_SECOND,
690-
TimeUnit::Millisecond => *millis,
691-
TimeUnit::Microsecond => millis * MICROS_PER_MILLI,
692-
TimeUnit::Nanosecond => millis * NANOS_PER_MILLI,
693-
})
687+
// Integer format: Convert from stored milliseconds to the target unit.
688+
// Use checked arithmetic for Nanosecond to avoid silent overflow
689+
// for dates outside ~1677-2262 (same range as timestamp_nanos_opt).
690+
match unit {
691+
TimeUnit::Second => Some(millis / MILLIS_PER_SECOND),
692+
TimeUnit::Millisecond => Some(*millis),
693+
TimeUnit::Microsecond => millis.checked_mul(MICROS_PER_MILLI),
694+
TimeUnit::Nanosecond => millis.checked_mul(NANOS_PER_MILLI),
695+
}
694696
}
695697
TursoValue::Text(rfc3339_str) => {
696698
// RFC3339 TEXT format: Parse and convert to target unit.
@@ -2628,4 +2630,71 @@ mod tests {
26282630
"TimestampMillisecond i64::MAX should overflow when converting to nanoseconds"
26292631
);
26302632
}
2633+
2634+
#[test]
2635+
fn test_integer_millis_timestamp_outside_nanos_range() {
2636+
use arrow::datatypes::{DataType, Field, TimeUnit};
2637+
2638+
// Year 2300 as milliseconds since epoch — outside the i64 nanosecond range (~1677-2262).
2639+
// millis * 1_000_000 would overflow i64 and silently wrap in release builds.
2640+
let millis_2300: i64 = 10_413_792_000_000;
2641+
let rows = vec![vec![TursoValue::Integer(millis_2300)]];
2642+
2643+
// Nanosecond unit: millis * 1_000_000 overflows — should produce NULL, not a wrapped value
2644+
let schema_nanos = Arc::new(arrow::datatypes::Schema::new(vec![Field::new(
2645+
"ts",
2646+
DataType::Timestamp(TimeUnit::Nanosecond, None),
2647+
true,
2648+
)]));
2649+
let batch = TursoTableProvider::values_to_record_batch(&rows, &schema_nanos)
2650+
.expect("should succeed (overflow produces NULL, not error)");
2651+
let arr = batch
2652+
.column(0)
2653+
.as_any()
2654+
.downcast_ref::<arrow::array::TimestampNanosecondArray>()
2655+
.expect("should be TimestampNanosecondArray");
2656+
assert!(
2657+
arr.is_null(0),
2658+
"Nanosecond-precision integer millis timestamp should be NULL for year 2300 (overflow)"
2659+
);
2660+
2661+
// Microsecond unit: millis * 1_000 is well within range — should produce a valid value
2662+
let schema_micros = Arc::new(arrow::datatypes::Schema::new(vec![Field::new(
2663+
"ts",
2664+
DataType::Timestamp(TimeUnit::Microsecond, None),
2665+
true,
2666+
)]));
2667+
let batch = TursoTableProvider::values_to_record_batch(&rows, &schema_micros)
2668+
.expect("should succeed for Microsecond unit");
2669+
let arr = batch
2670+
.column(0)
2671+
.as_any()
2672+
.downcast_ref::<arrow::array::TimestampMicrosecondArray>()
2673+
.expect("should be TimestampMicrosecondArray");
2674+
assert!(
2675+
!arr.is_null(0),
2676+
"Microsecond-precision integer millis should not be NULL for year 2300"
2677+
);
2678+
assert_eq!(
2679+
arr.value(0),
2680+
millis_2300 * 1_000,
2681+
"Microsecond value should be millis * 1000"
2682+
);
2683+
2684+
// Millisecond unit: identity — should always work
2685+
let schema_millis = Arc::new(arrow::datatypes::Schema::new(vec![Field::new(
2686+
"ts",
2687+
DataType::Timestamp(TimeUnit::Millisecond, None),
2688+
true,
2689+
)]));
2690+
let batch = TursoTableProvider::values_to_record_batch(&rows, &schema_millis)
2691+
.expect("should succeed for Millisecond unit");
2692+
let arr = batch
2693+
.column(0)
2694+
.as_any()
2695+
.downcast_ref::<arrow::array::TimestampMillisecondArray>()
2696+
.expect("should be TimestampMillisecondArray");
2697+
assert!(!arr.is_null(0), "Millisecond identity should not be NULL");
2698+
assert_eq!(arr.value(0), millis_2300);
2699+
}
26312700
}

0 commit comments

Comments
 (0)