@@ -17,7 +17,8 @@ pub const SQL_LIST_TABLES: &str =
1717 AND ? >= begin_snapshot
1818 AND (? < end_snapshot OR end_snapshot IS NULL)" ;
1919
20- pub const SQL_GET_TABLE_COLUMNS : & str = "SELECT column_id, column_name, column_type, nulls_allowed
20+ pub const SQL_GET_TABLE_COLUMNS : & str =
21+ "SELECT column_id, column_name, column_type, nulls_allowed, parent_column
2122 FROM ducklake_column
2223 WHERE table_id = ? AND end_snapshot IS NULL
2324 ORDER BY column_order" ;
@@ -218,7 +219,8 @@ pub const SQL_LIST_ALL_COLUMNS: &str = "
218219 c.column_id,
219220 c.column_name,
220221 c.column_type,
221- c.nulls_allowed
222+ c.nulls_allowed,
223+ c.parent_column
222224 FROM ducklake_schema s
223225 JOIN ducklake_table t ON s.schema_id = t.schema_id
224226 JOIN ducklake_column c ON t.table_id = c.table_id
@@ -356,6 +358,96 @@ impl DuckLakeTableColumn {
356358 }
357359}
358360
361+ /// Reconstruct list types from parent-child column rows.
362+ ///
363+ /// DuckLake stores list columns as two rows in `ducklake_column`:
364+ /// - Parent row: `column_type = "list"`, `parent_column = NULL`
365+ /// - Child row: `column_type = "<element_type>"`, `parent_column = <parent_column_id>`
366+ ///
367+ /// This function rewrites the parent's `column_type` to `list<element_type>`
368+ /// and removes child rows from the result.
369+ ///
370+ /// Only handles `list` parent types. Struct, map, etc. are left unchanged.
371+ pub fn reconstruct_list_columns (
372+ rows : Vec < ( DuckLakeTableColumn , Option < i64 > ) > ,
373+ ) -> Vec < DuckLakeTableColumn > {
374+ use std:: collections:: HashMap ;
375+
376+ // Index: column_id -> position in rows
377+ let id_to_index: HashMap < i64 , usize > = rows
378+ . iter ( )
379+ . enumerate ( )
380+ . map ( |( i, ( col, _) ) | ( col. column_id , i) )
381+ . collect ( ) ;
382+
383+ // Separate into columns and parent_column arrays
384+ let mut columns: Vec < DuckLakeTableColumn > = Vec :: with_capacity ( rows. len ( ) ) ;
385+ let mut parent_columns: Vec < Option < i64 > > = Vec :: with_capacity ( rows. len ( ) ) ;
386+ for ( col, parent) in rows {
387+ columns. push ( col) ;
388+ parent_columns. push ( parent) ;
389+ }
390+
391+ // Find children of list parents and rewrite parent types
392+ let mut skip: std:: collections:: HashSet < usize > = std:: collections:: HashSet :: new ( ) ;
393+ for ( i, parent_id) in parent_columns. iter ( ) . enumerate ( ) {
394+ if let Some ( pid) = parent_id
395+ && let Some ( & parent_idx) = id_to_index. get ( pid)
396+ && columns[ parent_idx] . column_type == "list"
397+ {
398+ columns[ parent_idx] . column_type = format ! ( "list<{}>" , columns[ i] . column_type) ;
399+ skip. insert ( i) ;
400+ }
401+ }
402+
403+ // Return only top-level columns (not children)
404+ columns
405+ . into_iter ( )
406+ . enumerate ( )
407+ . filter ( |( i, _) | !skip. contains ( i) )
408+ . map ( |( _, col) | col)
409+ . collect ( )
410+ }
411+
412+ /// Same as [`reconstruct_list_columns`] but for [`ColumnWithTable`] rows.
413+ pub fn reconstruct_list_columns_with_table (
414+ rows : Vec < ( ColumnWithTable , Option < i64 > ) > ,
415+ ) -> Vec < ColumnWithTable > {
416+ use std:: collections:: HashMap ;
417+
418+ let id_to_index: HashMap < i64 , usize > = rows
419+ . iter ( )
420+ . enumerate ( )
421+ . map ( |( i, ( cwt, _) ) | ( cwt. column . column_id , i) )
422+ . collect ( ) ;
423+
424+ let mut entries: Vec < ColumnWithTable > = Vec :: with_capacity ( rows. len ( ) ) ;
425+ let mut parent_columns: Vec < Option < i64 > > = Vec :: with_capacity ( rows. len ( ) ) ;
426+ for ( cwt, parent) in rows {
427+ entries. push ( cwt) ;
428+ parent_columns. push ( parent) ;
429+ }
430+
431+ let mut skip: std:: collections:: HashSet < usize > = std:: collections:: HashSet :: new ( ) ;
432+ for ( i, parent_id) in parent_columns. iter ( ) . enumerate ( ) {
433+ if let Some ( pid) = parent_id
434+ && let Some ( & parent_idx) = id_to_index. get ( pid)
435+ && entries[ parent_idx] . column . column_type == "list"
436+ {
437+ entries[ parent_idx] . column . column_type =
438+ format ! ( "list<{}>" , entries[ i] . column. column_type) ;
439+ skip. insert ( i) ;
440+ }
441+ }
442+
443+ entries
444+ . into_iter ( )
445+ . enumerate ( )
446+ . filter ( |( i, _) | !skip. contains ( i) )
447+ . map ( |( _, e) | e)
448+ . collect ( )
449+ }
450+
359451/// Metadata for a data file or delete file in DuckLake
360452#[ derive( Debug , Clone ) ]
361453pub struct DuckLakeFileData {
@@ -534,3 +626,124 @@ where
534626{
535627 tokio:: task:: block_in_place ( || tokio:: runtime:: Handle :: current ( ) . block_on ( f) )
536628}
629+
630+ #[ cfg( test) ]
631+ mod tests {
632+ use super :: * ;
633+
634+ #[ test]
635+ fn test_reconstruct_list_columns_basic ( ) {
636+ let rows = vec ! [
637+ (
638+ DuckLakeTableColumn :: new( 1 , "id" . into( ) , "int64" . into( ) , false ) ,
639+ None ,
640+ ) ,
641+ (
642+ DuckLakeTableColumn :: new( 6 , "vector" . into( ) , "list" . into( ) , true ) ,
643+ None ,
644+ ) ,
645+ (
646+ DuckLakeTableColumn :: new( 7 , "element" . into( ) , "float64" . into( ) , true ) ,
647+ Some ( 6 ) ,
648+ ) ,
649+ ] ;
650+
651+ let result = reconstruct_list_columns ( rows) ;
652+ assert_eq ! ( result. len( ) , 2 ) ;
653+ assert_eq ! ( result[ 0 ] . column_name, "id" ) ;
654+ assert_eq ! ( result[ 0 ] . column_type, "int64" ) ;
655+ assert_eq ! ( result[ 1 ] . column_name, "vector" ) ;
656+ assert_eq ! ( result[ 1 ] . column_type, "list<float64>" ) ;
657+ }
658+
659+ #[ test]
660+ fn test_reconstruct_list_columns_no_lists ( ) {
661+ let rows = vec ! [
662+ (
663+ DuckLakeTableColumn :: new( 1 , "id" . into( ) , "int64" . into( ) , false ) ,
664+ None ,
665+ ) ,
666+ (
667+ DuckLakeTableColumn :: new( 2 , "name" . into( ) , "varchar" . into( ) , true ) ,
668+ None ,
669+ ) ,
670+ ] ;
671+
672+ let result = reconstruct_list_columns ( rows) ;
673+ assert_eq ! ( result. len( ) , 2 ) ;
674+ assert_eq ! ( result[ 0 ] . column_type, "int64" ) ;
675+ assert_eq ! ( result[ 1 ] . column_type, "varchar" ) ;
676+ }
677+
678+ #[ test]
679+ fn test_reconstruct_list_columns_struct_parent_unchanged ( ) {
680+ // Struct parents should NOT be rewritten — child stays in result
681+ let rows = vec ! [
682+ (
683+ DuckLakeTableColumn :: new( 1 , "data" . into( ) , "struct" . into( ) , true ) ,
684+ None ,
685+ ) ,
686+ (
687+ DuckLakeTableColumn :: new( 2 , "field_a" . into( ) , "int32" . into( ) , true ) ,
688+ Some ( 1 ) ,
689+ ) ,
690+ ] ;
691+
692+ let result = reconstruct_list_columns ( rows) ;
693+ assert_eq ! ( result. len( ) , 2 ) ; // both remain
694+ assert_eq ! ( result[ 0 ] . column_type, "struct" ) ; // unchanged
695+ }
696+
697+ #[ test]
698+ fn test_reconstruct_list_columns_multiple_lists ( ) {
699+ let rows = vec ! [
700+ (
701+ DuckLakeTableColumn :: new( 1 , "tags" . into( ) , "list" . into( ) , true ) ,
702+ None ,
703+ ) ,
704+ (
705+ DuckLakeTableColumn :: new( 2 , "element" . into( ) , "varchar" . into( ) , true ) ,
706+ Some ( 1 ) ,
707+ ) ,
708+ (
709+ DuckLakeTableColumn :: new( 3 , "scores" . into( ) , "list" . into( ) , true ) ,
710+ None ,
711+ ) ,
712+ (
713+ DuckLakeTableColumn :: new( 4 , "element" . into( ) , "float64" . into( ) , true ) ,
714+ Some ( 3 ) ,
715+ ) ,
716+ ] ;
717+
718+ let result = reconstruct_list_columns ( rows) ;
719+ assert_eq ! ( result. len( ) , 2 ) ;
720+ assert_eq ! ( result[ 0 ] . column_type, "list<varchar>" ) ;
721+ assert_eq ! ( result[ 1 ] . column_type, "list<float64>" ) ;
722+ }
723+
724+ #[ test]
725+ fn test_reconstruct_list_columns_with_table_basic ( ) {
726+ let rows = vec ! [
727+ (
728+ ColumnWithTable {
729+ schema_name: "main" . into( ) ,
730+ table_name: "t" . into( ) ,
731+ column: DuckLakeTableColumn :: new( 6 , "vector" . into( ) , "list" . into( ) , true ) ,
732+ } ,
733+ None ,
734+ ) ,
735+ (
736+ ColumnWithTable {
737+ schema_name: "main" . into( ) ,
738+ table_name: "t" . into( ) ,
739+ column: DuckLakeTableColumn :: new( 7 , "element" . into( ) , "float64" . into( ) , true ) ,
740+ } ,
741+ Some ( 6 ) ,
742+ ) ,
743+ ] ;
744+
745+ let result = reconstruct_list_columns_with_table ( rows) ;
746+ assert_eq ! ( result. len( ) , 1 ) ;
747+ assert_eq ! ( result[ 0 ] . column. column_type, "list<float64>" ) ;
748+ }
749+ }
0 commit comments