@@ -41,7 +41,11 @@ pub fn ducklake_to_arrow_type(ducklake_type: &str) -> Result<DataType> {
4141
4242 // Temporal types
4343 "time" => Ok ( DataType :: Time64 ( TimeUnit :: Microsecond ) ) ,
44+ "time_s" => Ok ( DataType :: Time32 ( TimeUnit :: Second ) ) ,
45+ "time_ms" => Ok ( DataType :: Time32 ( TimeUnit :: Millisecond ) ) ,
46+ "time_ns" => Ok ( DataType :: Time64 ( TimeUnit :: Nanosecond ) ) ,
4447 "date" => Ok ( DataType :: Date32 ) ,
48+ "date64" => Ok ( DataType :: Date64 ) ,
4549 "timestamp" => Ok ( DataType :: Timestamp ( TimeUnit :: Microsecond , None ) ) ,
4650 "timestamptz" | "timestamp with time zone" => Ok ( DataType :: Timestamp (
4751 TimeUnit :: Microsecond ,
@@ -54,10 +58,12 @@ pub fn ducklake_to_arrow_type(ducklake_type: &str) -> Result<DataType> {
5458
5559 // String types
5660 "varchar" | "text" | "string" => Ok ( DataType :: Utf8 ) ,
61+ "large_varchar" => Ok ( DataType :: LargeUtf8 ) ,
5762 "json" => Ok ( DataType :: Utf8 ) , // JSON stored as UTF8 string
5863
5964 // Binary types
6065 "blob" | "binary" | "bytea" => Ok ( DataType :: Binary ) ,
66+ "large_blob" => Ok ( DataType :: LargeBinary ) ,
6167 "uuid" => Ok ( DataType :: FixedSizeBinary ( 16 ) ) ,
6268
6369 // Geometry types (stored as binary WKB format)
@@ -114,7 +120,12 @@ pub fn arrow_to_ducklake_type(arrow_type: &DataType) -> Result<String> {
114120 DataType :: Float64 => Ok ( "float64" . to_string ( ) ) ,
115121
116122 // Temporal types
117- DataType :: Date32 | DataType :: Date64 => Ok ( "date" . to_string ( ) ) ,
123+ DataType :: Date32 => Ok ( "date" . to_string ( ) ) ,
124+ DataType :: Date64 => Ok ( "date64" . to_string ( ) ) ,
125+ DataType :: Time32 ( TimeUnit :: Second ) => Ok ( "time_s" . to_string ( ) ) ,
126+ DataType :: Time32 ( TimeUnit :: Millisecond ) => Ok ( "time_ms" . to_string ( ) ) ,
127+ DataType :: Time64 ( TimeUnit :: Microsecond ) => Ok ( "time" . to_string ( ) ) ,
128+ DataType :: Time64 ( TimeUnit :: Nanosecond ) => Ok ( "time_ns" . to_string ( ) ) ,
118129 DataType :: Time32 ( _) | DataType :: Time64 ( _) => Ok ( "time" . to_string ( ) ) ,
119130 DataType :: Timestamp ( TimeUnit :: Second , None ) => Ok ( "timestamp_s" . to_string ( ) ) ,
120131 DataType :: Timestamp ( TimeUnit :: Millisecond , None ) => Ok ( "timestamp_ms" . to_string ( ) ) ,
@@ -124,10 +135,12 @@ pub fn arrow_to_ducklake_type(arrow_type: &DataType) -> Result<String> {
124135 DataType :: Interval ( _) => Ok ( "interval" . to_string ( ) ) ,
125136
126137 // String types
127- DataType :: Utf8 | DataType :: LargeUtf8 => Ok ( "varchar" . to_string ( ) ) ,
138+ DataType :: Utf8 => Ok ( "varchar" . to_string ( ) ) ,
139+ DataType :: LargeUtf8 => Ok ( "large_varchar" . to_string ( ) ) ,
128140
129141 // Binary types
130- DataType :: Binary | DataType :: LargeBinary => Ok ( "blob" . to_string ( ) ) ,
142+ DataType :: Binary => Ok ( "blob" . to_string ( ) ) ,
143+ DataType :: LargeBinary => Ok ( "large_blob" . to_string ( ) ) ,
131144 DataType :: FixedSizeBinary ( 16 ) => Ok ( "uuid" . to_string ( ) ) ,
132145 DataType :: FixedSizeBinary ( _) => Ok ( "blob" . to_string ( ) ) ,
133146
@@ -275,35 +288,30 @@ mod tests {
275288
276289 #[ test]
277290 fn test_build_read_schema_with_renamed_columns ( ) {
278- // Simulate: column was originally named "user_id", now renamed to "userId"
279291 let current_columns = vec ! [
280292 DuckLakeTableColumn {
281293 column_id: 1 ,
282- column_name: "userId" . to_string( ) , // Current name (renamed)
294+ column_name: "userId" . to_string( ) ,
283295 column_type: "int32" . to_string( ) ,
284296 is_nullable: true ,
285297 } ,
286298 DuckLakeTableColumn {
287299 column_id: 2 ,
288- column_name: "name" . to_string( ) , // Not renamed
300+ column_name: "name" . to_string( ) ,
289301 column_type: "varchar" . to_string( ) ,
290302 is_nullable: true ,
291303 } ,
292304 ] ;
293305
294- // Parquet file has original names
295306 let mut parquet_field_ids = HashMap :: new ( ) ;
296- parquet_field_ids. insert ( 1 , "user_id" . to_string ( ) ) ; // Original name
297- parquet_field_ids. insert ( 2 , "name" . to_string ( ) ) ; // Same name
307+ parquet_field_ids. insert ( 1 , "user_id" . to_string ( ) ) ;
308+ parquet_field_ids. insert ( 2 , "name" . to_string ( ) ) ;
298309
299310 let ( read_schema, name_mapping) =
300311 build_read_schema_with_field_id_mapping ( & current_columns, & parquet_field_ids) . unwrap ( ) ;
301312
302- // Read schema should have original Parquet names
303313 assert_eq ! ( read_schema. field( 0 ) . name( ) , "user_id" ) ;
304314 assert_eq ! ( read_schema. field( 1 ) . name( ) , "name" ) ;
305-
306- // Name mapping should map old name to new name
307315 assert_eq ! ( name_mapping. len( ) , 1 ) ;
308316 assert_eq ! ( name_mapping. get( "user_id" ) , Some ( & "userId" . to_string( ) ) ) ;
309317 }
@@ -318,31 +326,29 @@ mod tests {
318326 } ] ;
319327
320328 let mut parquet_field_ids = HashMap :: new ( ) ;
321- parquet_field_ids. insert ( 1 , "id" . to_string ( ) ) ; // Same name
329+ parquet_field_ids. insert ( 1 , "id" . to_string ( ) ) ;
322330
323331 let ( read_schema, name_mapping) =
324332 build_read_schema_with_field_id_mapping ( & current_columns, & parquet_field_ids) . unwrap ( ) ;
325333
326334 assert_eq ! ( read_schema. field( 0 ) . name( ) , "id" ) ;
327- assert ! ( name_mapping. is_empty( ) ) ; // No rename needed
335+ assert ! ( name_mapping. is_empty( ) ) ;
328336 }
329337
330338 #[ test]
331339 fn test_build_read_schema_no_field_ids ( ) {
332- // External file without field_ids
333340 let current_columns = vec ! [ DuckLakeTableColumn {
334341 column_id: 1 ,
335342 column_name: "id" . to_string( ) ,
336343 column_type: "int32" . to_string( ) ,
337344 is_nullable: true ,
338345 } ] ;
339346
340- let parquet_field_ids = HashMap :: new ( ) ; // No field_ids in Parquet
347+ let parquet_field_ids = HashMap :: new ( ) ;
341348
342349 let ( read_schema, name_mapping) =
343350 build_read_schema_with_field_id_mapping ( & current_columns, & parquet_field_ids) . unwrap ( ) ;
344351
345- // Falls back to current column name
346352 assert_eq ! ( read_schema. field( 0 ) . name( ) , "id" ) ;
347353 assert ! ( name_mapping. is_empty( ) ) ;
348354 }
@@ -363,6 +369,34 @@ mod tests {
363369 assert_eq ! ( ducklake_to_arrow_type( "blob" ) . unwrap( ) , DataType :: Binary ) ;
364370 }
365371
372+ #[ test]
373+ fn test_precision_preserving_types ( ) {
374+ assert_eq ! (
375+ ducklake_to_arrow_type( "date64" ) . unwrap( ) ,
376+ DataType :: Date64
377+ ) ;
378+ assert_eq ! (
379+ ducklake_to_arrow_type( "large_varchar" ) . unwrap( ) ,
380+ DataType :: LargeUtf8
381+ ) ;
382+ assert_eq ! (
383+ ducklake_to_arrow_type( "large_blob" ) . unwrap( ) ,
384+ DataType :: LargeBinary
385+ ) ;
386+ assert_eq ! (
387+ ducklake_to_arrow_type( "time_s" ) . unwrap( ) ,
388+ DataType :: Time32 ( TimeUnit :: Second )
389+ ) ;
390+ assert_eq ! (
391+ ducklake_to_arrow_type( "time_ms" ) . unwrap( ) ,
392+ DataType :: Time32 ( TimeUnit :: Millisecond )
393+ ) ;
394+ assert_eq ! (
395+ ducklake_to_arrow_type( "time_ns" ) . unwrap( ) ,
396+ DataType :: Time64 ( TimeUnit :: Nanosecond )
397+ ) ;
398+ }
399+
366400 #[ test]
367401 fn test_decimal_types ( ) {
368402 assert_eq ! (
@@ -386,7 +420,6 @@ mod tests {
386420
387421 #[ test]
388422 fn test_unsupported_list_type_errors ( ) {
389- // Test list type returns error
390423 let result = ducklake_to_arrow_type ( "list<int32>" ) ;
391424 assert ! ( result. is_err( ) ) ;
392425 match result {
@@ -401,7 +434,6 @@ mod tests {
401434
402435 #[ test]
403436 fn test_unsupported_array_type_errors ( ) {
404- // Test array type returns error
405437 let result = ducklake_to_arrow_type ( "array<varchar>" ) ;
406438 assert ! ( result. is_err( ) ) ;
407439 match result {
@@ -415,7 +447,6 @@ mod tests {
415447
416448 #[ test]
417449 fn test_unsupported_struct_type_errors ( ) {
418- // Test struct type returns error
419450 let result = ducklake_to_arrow_type ( "struct<a:int32,b:varchar>" ) ;
420451 assert ! ( result. is_err( ) ) ;
421452 match result {
@@ -430,7 +461,6 @@ mod tests {
430461
431462 #[ test]
432463 fn test_unsupported_map_type_errors ( ) {
433- // Test map type returns error
434464 let result = ducklake_to_arrow_type ( "map<varchar,int32>" ) ;
435465 assert ! ( result. is_err( ) ) ;
436466 match result {
@@ -445,7 +475,6 @@ mod tests {
445475
446476 #[ test]
447477 fn test_nested_complex_types_error ( ) {
448- // Test nested complex types return error
449478 let result = ducklake_to_arrow_type ( "list<struct<a:int32,b:varchar>>" ) ;
450479 assert ! ( result. is_err( ) ) ;
451480 match result {
@@ -459,7 +488,6 @@ mod tests {
459488
460489 #[ test]
461490 fn test_unknown_type_error ( ) {
462- // Test completely unknown types also return error
463491 let result = ducklake_to_arrow_type ( "completely_unknown_type" ) ;
464492 assert ! ( result. is_err( ) ) ;
465493 match result {
@@ -493,17 +521,40 @@ mod tests {
493521 "float64"
494522 ) ;
495523 assert_eq ! ( arrow_to_ducklake_type( & DataType :: Utf8 ) . unwrap( ) , "varchar" ) ;
524+ assert_eq ! (
525+ arrow_to_ducklake_type( & DataType :: LargeUtf8 ) . unwrap( ) ,
526+ "large_varchar"
527+ ) ;
496528 assert_eq ! ( arrow_to_ducklake_type( & DataType :: Binary ) . unwrap( ) , "blob" ) ;
529+ assert_eq ! (
530+ arrow_to_ducklake_type( & DataType :: LargeBinary ) . unwrap( ) ,
531+ "large_blob"
532+ ) ;
497533 }
498534
499535 #[ test]
500536 fn test_arrow_to_ducklake_temporal_types ( ) {
501537 assert_eq ! ( arrow_to_ducklake_type( & DataType :: Date32 ) . unwrap( ) , "date" ) ;
502- assert_eq ! ( arrow_to_ducklake_type( & DataType :: Date64 ) . unwrap( ) , "date" ) ;
538+ assert_eq ! (
539+ arrow_to_ducklake_type( & DataType :: Date64 ) . unwrap( ) ,
540+ "date64"
541+ ) ;
542+ assert_eq ! (
543+ arrow_to_ducklake_type( & DataType :: Time32 ( TimeUnit :: Second ) ) . unwrap( ) ,
544+ "time_s"
545+ ) ;
546+ assert_eq ! (
547+ arrow_to_ducklake_type( & DataType :: Time32 ( TimeUnit :: Millisecond ) ) . unwrap( ) ,
548+ "time_ms"
549+ ) ;
503550 assert_eq ! (
504551 arrow_to_ducklake_type( & DataType :: Time64 ( TimeUnit :: Microsecond ) ) . unwrap( ) ,
505552 "time"
506553 ) ;
554+ assert_eq ! (
555+ arrow_to_ducklake_type( & DataType :: Time64 ( TimeUnit :: Nanosecond ) ) . unwrap( ) ,
556+ "time_ns"
557+ ) ;
507558 assert_eq ! (
508559 arrow_to_ducklake_type( & DataType :: Timestamp ( TimeUnit :: Microsecond , None ) ) . unwrap( ) ,
509560 "timestamp"
@@ -536,7 +587,6 @@ mod tests {
536587 arrow_to_ducklake_type( & DataType :: FixedSizeBinary ( 16 ) ) . unwrap( ) ,
537588 "uuid"
538589 ) ;
539- // Non-16 byte fixed size binary becomes blob
540590 assert_eq ! (
541591 arrow_to_ducklake_type( & DataType :: FixedSizeBinary ( 32 ) ) . unwrap( ) ,
542592 "blob"
@@ -545,16 +595,26 @@ mod tests {
545595
546596 #[ test]
547597 fn test_arrow_to_ducklake_roundtrip ( ) {
548- // Verify roundtrip: arrow -> ducklake -> arrow for common types
598+ // Verify roundtrip: arrow -> ducklake -> arrow for all supported types
549599 let test_types = vec ! [
550600 DataType :: Boolean ,
551601 DataType :: Int32 ,
552602 DataType :: Int64 ,
553603 DataType :: Float64 ,
554604 DataType :: Utf8 ,
605+ DataType :: LargeUtf8 ,
555606 DataType :: Binary ,
607+ DataType :: LargeBinary ,
556608 DataType :: Date32 ,
609+ DataType :: Date64 ,
610+ DataType :: Time32 ( TimeUnit :: Second ) ,
611+ DataType :: Time32 ( TimeUnit :: Millisecond ) ,
612+ DataType :: Time64 ( TimeUnit :: Microsecond ) ,
613+ DataType :: Time64 ( TimeUnit :: Nanosecond ) ,
614+ DataType :: Timestamp ( TimeUnit :: Second , None ) ,
615+ DataType :: Timestamp ( TimeUnit :: Millisecond , None ) ,
557616 DataType :: Timestamp ( TimeUnit :: Microsecond , None ) ,
617+ DataType :: Timestamp ( TimeUnit :: Nanosecond , None ) ,
558618 DataType :: Decimal128 ( 10 , 2 ) ,
559619 ] ;
560620
@@ -595,7 +655,6 @@ mod tests {
595655
596656 #[ test]
597657 fn test_build_schema_with_unsupported_type ( ) {
598- // Test that build_arrow_schema propagates complex type errors
599658 let columns = vec ! [
600659 DuckLakeTableColumn {
601660 column_id: 1 ,
0 commit comments