Skip to content

Commit 7e3e414

Browse files
authored
Merge pull request #67 from hotdata-dev/fix/column-id-overflow
fix: reject column_id values exceeding i32 range
2 parents 424ac81 + 5e4e7ba commit 7e3e414

2 files changed

Lines changed: 91 additions & 9 deletions

File tree

src/types.rs

Lines changed: 71 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -293,7 +293,12 @@ pub fn build_read_schema_with_field_id_mapping(
293293
.iter()
294294
.map(|col| {
295295
let data_type = ducklake_to_arrow_type(&col.column_type)?;
296-
let field_id = col.column_id as i32;
296+
let field_id = i32::try_from(col.column_id).map_err(|_| {
297+
DuckLakeError::Internal(format!(
298+
"column_id {} for column '{}' exceeds i32 range for Parquet field_id",
299+
col.column_id, col.column_name
300+
))
301+
})?;
297302

298303
let (read_name, needs_rename) =
299304
if let Some(parquet_name) = parquet_field_ids.get(&field_id) {
@@ -746,4 +751,69 @@ mod tests {
746751
_ => panic!("Expected UnsupportedType error when building schema with complex type"),
747752
}
748753
}
754+
755+
#[test]
756+
fn test_column_id_i32_max_succeeds() {
757+
let columns = vec![DuckLakeTableColumn {
758+
column_id: i32::MAX as i64,
759+
column_name: "id".to_string(),
760+
column_type: "int32".to_string(),
761+
is_nullable: true,
762+
}];
763+
764+
let mut parquet_field_ids = HashMap::new();
765+
parquet_field_ids.insert(i32::MAX, "id".to_string());
766+
767+
let result = build_read_schema_with_field_id_mapping(&columns, &parquet_field_ids);
768+
assert!(result.is_ok(), "column_id = i32::MAX should succeed");
769+
}
770+
771+
#[test]
772+
fn test_column_id_overflow_returns_error() {
773+
let columns = vec![DuckLakeTableColumn {
774+
column_id: i32::MAX as i64 + 1, // 2147483648, exceeds i32 range
775+
column_name: "id".to_string(),
776+
column_type: "int32".to_string(),
777+
is_nullable: true,
778+
}];
779+
780+
let parquet_field_ids = HashMap::new();
781+
782+
let result = build_read_schema_with_field_id_mapping(&columns, &parquet_field_ids);
783+
assert!(result.is_err(), "column_id > i32::MAX should fail");
784+
match result {
785+
Err(DuckLakeError::Internal(msg)) => {
786+
assert!(
787+
msg.contains("2147483648"),
788+
"Error should contain the overflowing value: {}",
789+
msg
790+
);
791+
assert!(
792+
msg.contains("exceeds i32 range"),
793+
"Error should explain the issue: {}",
794+
msg
795+
);
796+
},
797+
_ => panic!("Expected Internal error for column_id overflow"),
798+
}
799+
}
800+
801+
#[test]
802+
fn test_column_id_negative_within_i32_range_succeeds() {
803+
let columns = vec![DuckLakeTableColumn {
804+
column_id: -1,
805+
column_name: "id".to_string(),
806+
column_type: "int32".to_string(),
807+
is_nullable: true,
808+
}];
809+
810+
let mut parquet_field_ids = HashMap::new();
811+
parquet_field_ids.insert(-1_i32, "id".to_string());
812+
813+
let result = build_read_schema_with_field_id_mapping(&columns, &parquet_field_ids);
814+
assert!(
815+
result.is_ok(),
816+
"Negative column_id within i32 range should succeed"
817+
);
818+
}
749819
}

tests/common/mod.rs

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,18 @@
1010

1111
use anyhow::Result;
1212
use std::path::Path;
13+
use std::sync::Once;
14+
15+
static INSTALL_DUCKLAKE: Once = Once::new();
16+
17+
/// Install the ducklake extension exactly once (thread-safe across parallel tests).
18+
fn ensure_ducklake_installed() {
19+
INSTALL_DUCKLAKE.call_once(|| {
20+
let conn = duckdb::Connection::open_in_memory().expect("open in-memory duckdb");
21+
conn.execute("INSTALL ducklake;", [])
22+
.expect("install ducklake extension");
23+
});
24+
}
1325

1426
/// Creates a catalog with a simple table (no deletes)
1527
///
@@ -29,7 +41,7 @@ pub fn create_catalog_no_deletes(catalog_path: &Path) -> Result<()> {
2941
// Use in-memory database to avoid file locking issues
3042
let conn = duckdb::Connection::open_in_memory()?;
3143

32-
conn.execute("INSTALL ducklake;", [])?;
44+
ensure_ducklake_installed();
3345
conn.execute("LOAD ducklake;", [])?;
3446

3547
let ducklake_path = format!("ducklake:{}", catalog_path.display());
@@ -69,7 +81,7 @@ pub fn create_catalog_with_deletes(catalog_path: &Path) -> Result<()> {
6981
// Use in-memory database to avoid file locking issues
7082
let conn = duckdb::Connection::open_in_memory()?;
7183

72-
conn.execute("INSTALL ducklake;", [])?;
84+
ensure_ducklake_installed();
7385
conn.execute("LOAD ducklake;", [])?;
7486

7587
let ducklake_path = format!("ducklake:{}", catalog_path.display());
@@ -115,7 +127,7 @@ pub fn create_catalog_with_updates(catalog_path: &Path) -> Result<()> {
115127
// Use in-memory database to avoid file locking issues
116128
let conn = duckdb::Connection::open_in_memory()?;
117129

118-
conn.execute("INSTALL ducklake;", [])?;
130+
ensure_ducklake_installed();
119131
conn.execute("LOAD ducklake;", [])?;
120132

121133
let ducklake_path = format!("ducklake:{}", catalog_path.display());
@@ -170,7 +182,7 @@ pub fn create_catalog_filter_pushdown(catalog_path: &Path) -> Result<()> {
170182
// Use in-memory database to avoid file locking issues
171183
let conn = duckdb::Connection::open_in_memory()?;
172184

173-
conn.execute("INSTALL ducklake;", [])?;
185+
ensure_ducklake_installed();
174186
conn.execute("LOAD ducklake;", [])?;
175187

176188
let ducklake_path = format!("ducklake:{}", catalog_path.display());
@@ -208,7 +220,7 @@ pub fn create_catalog_filter_pushdown(catalog_path: &Path) -> Result<()> {
208220
pub fn create_catalog_empty_table(catalog_path: &Path) -> Result<()> {
209221
let conn = duckdb::Connection::open_in_memory()?;
210222

211-
conn.execute("INSTALL ducklake;", [])?;
223+
ensure_ducklake_installed();
212224
conn.execute("LOAD ducklake;", [])?;
213225

214226
let ducklake_path = format!("ducklake:{}", catalog_path.display());
@@ -238,7 +250,7 @@ pub fn create_catalog_empty_table(catalog_path: &Path) -> Result<()> {
238250
pub fn create_catalog_basic_test(catalog_path: &Path) -> Result<()> {
239251
let conn = duckdb::Connection::open_in_memory()?;
240252

241-
conn.execute("INSTALL ducklake;", [])?;
253+
ensure_ducklake_installed();
242254
conn.execute("LOAD ducklake;", [])?;
243255

244256
let ducklake_path = format!("ducklake:{}", catalog_path.display());
@@ -290,7 +302,7 @@ pub fn to_datafusion_error(e: anyhow::Error) -> datafusion::error::DataFusionErr
290302
pub fn create_catalog_complex_deletions(catalog_path: &Path) -> Result<()> {
291303
let conn = duckdb::Connection::open_in_memory()?;
292304

293-
conn.execute("INSTALL ducklake;", [])?;
305+
ensure_ducklake_installed();
294306
conn.execute("LOAD ducklake;", [])?;
295307

296308
let ducklake_path = format!("ducklake:{}", catalog_path.display());
@@ -336,7 +348,7 @@ pub fn create_catalog_complex_deletions(catalog_path: &Path) -> Result<()> {
336348
pub fn create_catalog_multiple_snapshots(catalog_path: &Path) -> Result<()> {
337349
let conn = duckdb::Connection::open_in_memory()?;
338350

339-
conn.execute("INSTALL ducklake;", [])?;
351+
ensure_ducklake_installed();
340352
conn.execute("LOAD ducklake;", [])?;
341353

342354
let ducklake_path = format!("ducklake:{}", catalog_path.display());

0 commit comments

Comments
 (0)