Skip to content

Commit 3f142f0

Browse files
authored
fix: validate type strings in ColumnDef constructor to reject invalid types early (#78)
Change ColumnDef::new() to return Result<Self> and validate the ducklake_type string by calling ducklake_to_arrow_type() before constructing. This catches invalid type strings at write time rather than deferring the error to read time, where it would surface as a confusing failure. Make ColumnDef fields pub(crate) instead of pub to prevent bypassing validation via struct literal construction. Add public getter methods (name(), ducklake_type(), is_nullable()) for external consumers. Simplify arrow_schema_to_column_defs() in table_writer.rs to use ColumnDef::from_arrow() directly, removing the redundant arrow_to_ducklake_type + ColumnDef::new round-trip. Update all call sites to handle the Result, and add tests verifying that valid types are accepted while invalid and empty types are rejected.
1 parent 3b0d87e commit 3f142f0

3 files changed

Lines changed: 82 additions & 21 deletions

File tree

src/metadata_writer.rs

Lines changed: 77 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ pub enum WriteMode {
1313
/// Keep existing data and append new records
1414
Append,
1515
}
16-
use crate::types::arrow_to_ducklake_type;
16+
use crate::types::{arrow_to_ducklake_type, ducklake_to_arrow_type};
1717
use arrow::datatypes::DataType;
1818

1919
/// Column definition for creating or updating a table's schema.
@@ -23,37 +23,67 @@ use arrow::datatypes::DataType;
2323
#[derive(Debug, Clone)]
2424
pub struct ColumnDef {
2525
/// Column name
26-
pub name: String,
26+
pub(crate) name: String,
2727
/// DuckLake type string (e.g., "varchar", "int64", "decimal(10,2)")
28-
pub ducklake_type: String,
28+
pub(crate) ducklake_type: String,
2929
/// Whether this column allows NULL values
30-
pub is_nullable: bool,
30+
pub(crate) is_nullable: bool,
3131
}
3232

3333
impl ColumnDef {
34+
/// Returns the column name.
35+
pub fn name(&self) -> &str {
36+
&self.name
37+
}
38+
39+
/// Returns the DuckLake type string.
40+
pub fn ducklake_type(&self) -> &str {
41+
&self.ducklake_type
42+
}
43+
44+
/// Returns whether this column allows NULL values.
45+
pub fn is_nullable(&self) -> bool {
46+
self.is_nullable
47+
}
48+
3449
/// Create a new column definition.
50+
///
51+
/// Validates that `ducklake_type` is a recognized DuckLake type string by converting
52+
/// it to an Arrow DataType. Returns an error if the type is invalid or unsupported.
3553
pub fn new(
3654
name: impl Into<String>,
3755
ducklake_type: impl Into<String>,
3856
is_nullable: bool,
39-
) -> Self {
40-
Self {
57+
) -> Result<Self> {
58+
let ducklake_type = ducklake_type.into();
59+
// Validate the type string by attempting to convert it to an Arrow type.
60+
// We discard the result; we only care that the conversion succeeds.
61+
ducklake_to_arrow_type(&ducklake_type)?;
62+
Ok(Self {
4163
name: name.into(),
42-
ducklake_type: ducklake_type.into(),
64+
ducklake_type,
4365
is_nullable,
44-
}
66+
})
4567
}
4668

4769
/// Create a column definition from an Arrow DataType.
4870
///
4971
/// This is a convenience constructor that converts the Arrow type to a DuckLake type string.
72+
/// The resulting DuckLake type is guaranteed to be valid since it was derived from a known
73+
/// Arrow type.
5074
pub fn from_arrow(
5175
name: impl Into<String>,
5276
data_type: &DataType,
5377
is_nullable: bool,
5478
) -> Result<Self> {
5579
let ducklake_type = arrow_to_ducklake_type(data_type)?;
56-
Ok(Self::new(name, ducklake_type, is_nullable))
80+
// We use direct struct construction here since the ducklake_type was just
81+
// produced by arrow_to_ducklake_type, so it is guaranteed to be valid.
82+
Ok(Self {
83+
name: name.into(),
84+
ducklake_type,
85+
is_nullable,
86+
})
5787
}
5888
}
5989

@@ -195,15 +225,52 @@ pub trait MetadataWriter: Send + Sync + std::fmt::Debug {
195225
#[cfg(test)]
196226
mod tests {
197227
use super::*;
228+
use crate::DuckLakeError;
198229

199230
#[test]
200231
fn test_column_def_new() {
201-
let col = ColumnDef::new("test_col", "int32", true);
232+
let col = ColumnDef::new("test_col", "int32", true).unwrap();
202233
assert_eq!(col.name, "test_col");
203234
assert_eq!(col.ducklake_type, "int32");
204235
assert!(col.is_nullable);
205236
}
206237

238+
#[test]
239+
fn test_column_def_new_valid_types() {
240+
// Various valid type strings should be accepted
241+
assert!(ColumnDef::new("a", "int32", true).is_ok());
242+
assert!(ColumnDef::new("b", "varchar", false).is_ok());
243+
assert!(ColumnDef::new("c", "boolean", true).is_ok());
244+
assert!(ColumnDef::new("d", "float64", true).is_ok());
245+
assert!(ColumnDef::new("e", "decimal(10,2)", true).is_ok());
246+
assert!(ColumnDef::new("f", "timestamp", true).is_ok());
247+
assert!(ColumnDef::new("g", "date", true).is_ok());
248+
assert!(ColumnDef::new("h", "bigint", true).is_ok());
249+
assert!(ColumnDef::new("i", "text", true).is_ok());
250+
}
251+
252+
#[test]
253+
fn test_column_def_new_invalid_type_rejected() {
254+
let result = ColumnDef::new("col", "not_a_type", true);
255+
assert!(result.is_err());
256+
match result {
257+
Err(DuckLakeError::UnsupportedType(msg)) => {
258+
assert_eq!(msg, "not_a_type");
259+
},
260+
other => panic!("Expected UnsupportedType error, got {:?}", other),
261+
}
262+
}
263+
264+
#[test]
265+
fn test_column_def_new_empty_type_rejected() {
266+
let result = ColumnDef::new("col", "", true);
267+
assert!(result.is_err());
268+
match result {
269+
Err(DuckLakeError::UnsupportedType(_)) => {},
270+
other => panic!("Expected UnsupportedType error, got {:?}", other),
271+
}
272+
}
273+
207274
#[test]
208275
fn test_column_def_from_arrow() {
209276
let col = ColumnDef::from_arrow("id", &DataType::Int64, false).unwrap();

src/metadata_writer_sqlite.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -577,8 +577,10 @@ mod tests {
577577
.get_or_create_table(schema_id, "users", None, snapshot_id)
578578
.unwrap();
579579

580-
let columns =
581-
vec![ColumnDef::new("id", "int64", false), ColumnDef::new("name", "varchar", true)];
580+
let columns = vec![
581+
ColumnDef::new("id", "int64", false).unwrap(),
582+
ColumnDef::new("name", "varchar", true).unwrap(),
583+
];
582584

583585
let column_ids = writer.set_columns(table_id, &columns, snapshot_id).unwrap();
584586
assert_eq!(column_ids.len(), 2);

src/table_writer.rs

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ use uuid::Uuid;
1414
use crate::Result;
1515
use crate::metadata_writer::{ColumnDef, DataFileInfo, MetadataWriter, WriteMode, WriteResult};
1616
use crate::path_resolver::join_paths;
17-
use crate::types::arrow_to_ducklake_type;
1817

1918
/// High-level writer for DuckLake tables.
2019
#[derive(Debug)]
@@ -297,14 +296,7 @@ fn arrow_schema_to_column_defs(schema: &Schema) -> Result<Vec<ColumnDef>> {
297296
schema
298297
.fields()
299298
.iter()
300-
.map(|field| {
301-
let ducklake_type = arrow_to_ducklake_type(field.data_type())?;
302-
Ok(ColumnDef::new(
303-
field.name().clone(),
304-
ducklake_type,
305-
field.is_nullable(),
306-
))
307-
})
299+
.map(|field| ColumnDef::from_arrow(field.name(), field.data_type(), field.is_nullable()))
308300
.collect()
309301
}
310302

0 commit comments

Comments
 (0)