diff --git a/src/metadata_writer.rs b/src/metadata_writer.rs index 816b925..15ca7b6 100644 --- a/src/metadata_writer.rs +++ b/src/metadata_writer.rs @@ -13,7 +13,7 @@ pub enum WriteMode { /// Keep existing data and append new records Append, } -use crate::types::arrow_to_ducklake_type; +use crate::types::{arrow_to_ducklake_type, ducklake_to_arrow_type}; use arrow::datatypes::DataType; /// Column definition for creating or updating a table's schema. @@ -23,37 +23,67 @@ use arrow::datatypes::DataType; #[derive(Debug, Clone)] pub struct ColumnDef { /// Column name - pub name: String, + pub(crate) name: String, /// DuckLake type string (e.g., "varchar", "int64", "decimal(10,2)") - pub ducklake_type: String, + pub(crate) ducklake_type: String, /// Whether this column allows NULL values - pub is_nullable: bool, + pub(crate) is_nullable: bool, } impl ColumnDef { + /// Returns the column name. + pub fn name(&self) -> &str { + &self.name + } + + /// Returns the DuckLake type string. + pub fn ducklake_type(&self) -> &str { + &self.ducklake_type + } + + /// Returns whether this column allows NULL values. + pub fn is_nullable(&self) -> bool { + self.is_nullable + } + /// Create a new column definition. + /// + /// Validates that `ducklake_type` is a recognized DuckLake type string by converting + /// it to an Arrow DataType. Returns an error if the type is invalid or unsupported. pub fn new( name: impl Into, ducklake_type: impl Into, is_nullable: bool, - ) -> Self { - Self { + ) -> Result { + let ducklake_type = ducklake_type.into(); + // Validate the type string by attempting to convert it to an Arrow type. + // We discard the result; we only care that the conversion succeeds. + ducklake_to_arrow_type(&ducklake_type)?; + Ok(Self { name: name.into(), - ducklake_type: ducklake_type.into(), + ducklake_type, is_nullable, - } + }) } /// Create a column definition from an Arrow DataType. /// /// This is a convenience constructor that converts the Arrow type to a DuckLake type string. + /// The resulting DuckLake type is guaranteed to be valid since it was derived from a known + /// Arrow type. pub fn from_arrow( name: impl Into, data_type: &DataType, is_nullable: bool, ) -> Result { let ducklake_type = arrow_to_ducklake_type(data_type)?; - Ok(Self::new(name, ducklake_type, is_nullable)) + // We use direct struct construction here since the ducklake_type was just + // produced by arrow_to_ducklake_type, so it is guaranteed to be valid. + Ok(Self { + name: name.into(), + ducklake_type, + is_nullable, + }) } } @@ -195,15 +225,52 @@ pub trait MetadataWriter: Send + Sync + std::fmt::Debug { #[cfg(test)] mod tests { use super::*; + use crate::DuckLakeError; #[test] fn test_column_def_new() { - let col = ColumnDef::new("test_col", "int32", true); + let col = ColumnDef::new("test_col", "int32", true).unwrap(); assert_eq!(col.name, "test_col"); assert_eq!(col.ducklake_type, "int32"); assert!(col.is_nullable); } + #[test] + fn test_column_def_new_valid_types() { + // Various valid type strings should be accepted + assert!(ColumnDef::new("a", "int32", true).is_ok()); + assert!(ColumnDef::new("b", "varchar", false).is_ok()); + assert!(ColumnDef::new("c", "boolean", true).is_ok()); + assert!(ColumnDef::new("d", "float64", true).is_ok()); + assert!(ColumnDef::new("e", "decimal(10,2)", true).is_ok()); + assert!(ColumnDef::new("f", "timestamp", true).is_ok()); + assert!(ColumnDef::new("g", "date", true).is_ok()); + assert!(ColumnDef::new("h", "bigint", true).is_ok()); + assert!(ColumnDef::new("i", "text", true).is_ok()); + } + + #[test] + fn test_column_def_new_invalid_type_rejected() { + let result = ColumnDef::new("col", "not_a_type", true); + assert!(result.is_err()); + match result { + Err(DuckLakeError::UnsupportedType(msg)) => { + assert_eq!(msg, "not_a_type"); + }, + other => panic!("Expected UnsupportedType error, got {:?}", other), + } + } + + #[test] + fn test_column_def_new_empty_type_rejected() { + let result = ColumnDef::new("col", "", true); + assert!(result.is_err()); + match result { + Err(DuckLakeError::UnsupportedType(_)) => {}, + other => panic!("Expected UnsupportedType error, got {:?}", other), + } + } + #[test] fn test_column_def_from_arrow() { let col = ColumnDef::from_arrow("id", &DataType::Int64, false).unwrap(); diff --git a/src/metadata_writer_sqlite.rs b/src/metadata_writer_sqlite.rs index 0485837..d5aebdb 100644 --- a/src/metadata_writer_sqlite.rs +++ b/src/metadata_writer_sqlite.rs @@ -577,8 +577,10 @@ mod tests { .get_or_create_table(schema_id, "users", None, snapshot_id) .unwrap(); - let columns = - vec![ColumnDef::new("id", "int64", false), ColumnDef::new("name", "varchar", true)]; + let columns = vec![ + ColumnDef::new("id", "int64", false).unwrap(), + ColumnDef::new("name", "varchar", true).unwrap(), + ]; let column_ids = writer.set_columns(table_id, &columns, snapshot_id).unwrap(); assert_eq!(column_ids.len(), 2); diff --git a/src/table_writer.rs b/src/table_writer.rs index adc04bd..8d54671 100644 --- a/src/table_writer.rs +++ b/src/table_writer.rs @@ -14,7 +14,6 @@ use uuid::Uuid; use crate::Result; use crate::metadata_writer::{ColumnDef, DataFileInfo, MetadataWriter, WriteMode, WriteResult}; use crate::path_resolver::join_paths; -use crate::types::arrow_to_ducklake_type; /// High-level writer for DuckLake tables. #[derive(Debug)] @@ -297,14 +296,7 @@ fn arrow_schema_to_column_defs(schema: &Schema) -> Result> { schema .fields() .iter() - .map(|field| { - let ducklake_type = arrow_to_ducklake_type(field.data_type())?; - Ok(ColumnDef::new( - field.name().clone(), - ducklake_type, - field.is_nullable(), - )) - }) + .map(|field| ColumnDef::from_arrow(field.name(), field.data_type(), field.is_nullable())) .collect() }