@@ -250,6 +250,128 @@ fn parse_decimal(type_str: &str) -> Result<Option<DataType>> {
250250 }
251251}
252252
253+ /// Normalize a DuckLake type string to its canonical form.
254+ ///
255+ /// Converts aliases and case variants to the canonical DuckLake type string.
256+ /// For example: "int" -> "int32", "INTEGER" -> "int32", "text" -> "varchar".
257+ ///
258+ /// Returns the canonical type string, or an error if the type is unrecognized.
259+ pub fn normalize_ducklake_type ( ducklake_type : & str ) -> Result < String > {
260+ let arrow_type = ducklake_to_arrow_type ( ducklake_type) ?;
261+ arrow_to_ducklake_type ( & arrow_type)
262+ }
263+
264+ /// Check if a type can be safely promoted (widened) to another type.
265+ ///
266+ /// Type promotion allows safe widening of numeric types during schema evolution.
267+ /// Both type strings are normalized before comparison.
268+ ///
269+ /// Supported promotions:
270+ /// - Signed integer widening: int8 -> int16 -> int32 -> int64
271+ /// - Unsigned integer widening: uint8 -> uint16 -> uint32 -> uint64
272+ /// - Float widening: float32 -> float64
273+ /// - Integer to float: any int -> float64
274+ /// - Timestamp: timestamp -> timestamptz
275+ /// - Decimal: smaller precision/scale -> larger precision/scale
276+ pub fn is_promotable ( from : & str , to : & str ) -> bool {
277+ let from_arrow = match ducklake_to_arrow_type ( from) {
278+ Ok ( t) => t,
279+ Err ( _) => return false ,
280+ } ;
281+ let to_arrow = match ducklake_to_arrow_type ( to) {
282+ Ok ( t) => t,
283+ Err ( _) => return false ,
284+ } ;
285+
286+ is_arrow_promotable ( & from_arrow, & to_arrow)
287+ }
288+
289+ /// Check if one Arrow DataType can be safely promoted to another.
290+ fn is_arrow_promotable ( from : & DataType , to : & DataType ) -> bool {
291+ use DataType :: * ;
292+
293+ // Same type is trivially promotable
294+ if from == to {
295+ return true ;
296+ }
297+
298+ fn signed_int_rank ( dt : & DataType ) -> Option < u8 > {
299+ match dt {
300+ Int8 => Some ( 0 ) ,
301+ Int16 => Some ( 1 ) ,
302+ Int32 => Some ( 2 ) ,
303+ Int64 => Some ( 3 ) ,
304+ _ => None ,
305+ }
306+ }
307+
308+ fn unsigned_int_rank ( dt : & DataType ) -> Option < u8 > {
309+ match dt {
310+ UInt8 => Some ( 0 ) ,
311+ UInt16 => Some ( 1 ) ,
312+ UInt32 => Some ( 2 ) ,
313+ UInt64 => Some ( 3 ) ,
314+ _ => None ,
315+ }
316+ }
317+
318+ // Signed integer widening
319+ if let ( Some ( from_rank) , Some ( to_rank) ) = ( signed_int_rank ( from) , signed_int_rank ( to) ) {
320+ return from_rank < to_rank;
321+ }
322+
323+ // Unsigned integer widening
324+ if let ( Some ( from_rank) , Some ( to_rank) ) = ( unsigned_int_rank ( from) , unsigned_int_rank ( to) ) {
325+ return from_rank < to_rank;
326+ }
327+
328+ // Float widening
329+ if matches ! ( from, Float32 ) && matches ! ( to, Float64 ) {
330+ return true ;
331+ }
332+
333+ // Integer to float64 (safe for reasonable values)
334+ if signed_int_rank ( from) . is_some ( ) && matches ! ( to, Float64 ) {
335+ return true ;
336+ }
337+
338+ // Timestamp -> TimestampTZ
339+ if matches ! ( from, Timestamp ( _, None ) ) && matches ! ( to, Timestamp ( _, Some ( _) ) ) {
340+ return true ;
341+ }
342+
343+ // Decimal widening: larger precision/scale
344+ match ( from, to) {
345+ ( Decimal128 ( fp, fs) | Decimal256 ( fp, fs) , Decimal128 ( tp, ts) | Decimal256 ( tp, ts) ) => {
346+ tp >= fp && ts >= fs
347+ }
348+ _ => false ,
349+ }
350+ }
351+
352+ /// Check if two DuckLake type strings are compatible for schema evolution.
353+ ///
354+ /// Types are compatible if they normalize to the same canonical type,
355+ /// or if the existing type can be safely promoted to the new type.
356+ pub fn types_compatible ( existing_type : & str , new_type : & str ) -> bool {
357+ // First try normalization: if both normalize to the same canonical form, they match
358+ let existing_normalized = match normalize_ducklake_type ( existing_type) {
359+ Ok ( t) => t,
360+ Err ( _) => return false ,
361+ } ;
362+ let new_normalized = match normalize_ducklake_type ( new_type) {
363+ Ok ( t) => t,
364+ Err ( _) => return false ,
365+ } ;
366+
367+ if existing_normalized == new_normalized {
368+ return true ;
369+ }
370+
371+ // Then check if promotion is allowed
372+ is_promotable ( existing_type, new_type)
373+ }
374+
253375/// Build an Arrow schema from a list of DuckLake table columns
254376pub fn build_arrow_schema ( columns : & [ DuckLakeTableColumn ] ) -> Result < Schema > {
255377 let fields: Result < Vec < Field > > = columns
@@ -850,4 +972,233 @@ mod tests {
850972 "Negative column_id within i32 range should succeed"
851973 ) ;
852974 }
975+
976+ // ── normalize_ducklake_type tests ──
977+
978+ #[ test]
979+ fn test_normalize_int_aliases ( ) {
980+ assert_eq ! ( normalize_ducklake_type( "int" ) . unwrap( ) , "int32" ) ;
981+ assert_eq ! ( normalize_ducklake_type( "integer" ) . unwrap( ) , "int32" ) ;
982+ assert_eq ! ( normalize_ducklake_type( "INT" ) . unwrap( ) , "int32" ) ;
983+ assert_eq ! ( normalize_ducklake_type( "Integer" ) . unwrap( ) , "int32" ) ;
984+ assert_eq ! ( normalize_ducklake_type( "int32" ) . unwrap( ) , "int32" ) ;
985+ }
986+
987+ #[ test]
988+ fn test_normalize_bigint_aliases ( ) {
989+ assert_eq ! ( normalize_ducklake_type( "bigint" ) . unwrap( ) , "int64" ) ;
990+ assert_eq ! ( normalize_ducklake_type( "long" ) . unwrap( ) , "int64" ) ;
991+ assert_eq ! ( normalize_ducklake_type( "BIGINT" ) . unwrap( ) , "int64" ) ;
992+ assert_eq ! ( normalize_ducklake_type( "int64" ) . unwrap( ) , "int64" ) ;
993+ }
994+
995+ #[ test]
996+ fn test_normalize_string_aliases ( ) {
997+ assert_eq ! ( normalize_ducklake_type( "text" ) . unwrap( ) , "varchar" ) ;
998+ assert_eq ! ( normalize_ducklake_type( "string" ) . unwrap( ) , "varchar" ) ;
999+ assert_eq ! ( normalize_ducklake_type( "varchar" ) . unwrap( ) , "varchar" ) ;
1000+ assert_eq ! ( normalize_ducklake_type( "TEXT" ) . unwrap( ) , "varchar" ) ;
1001+ assert_eq ! ( normalize_ducklake_type( "STRING" ) . unwrap( ) , "varchar" ) ;
1002+ }
1003+
1004+ #[ test]
1005+ fn test_normalize_float_aliases ( ) {
1006+ assert_eq ! ( normalize_ducklake_type( "float" ) . unwrap( ) , "float32" ) ;
1007+ assert_eq ! ( normalize_ducklake_type( "real" ) . unwrap( ) , "float32" ) ;
1008+ assert_eq ! ( normalize_ducklake_type( "FLOAT" ) . unwrap( ) , "float32" ) ;
1009+ assert_eq ! ( normalize_ducklake_type( "float32" ) . unwrap( ) , "float32" ) ;
1010+ }
1011+
1012+ #[ test]
1013+ fn test_normalize_double_aliases ( ) {
1014+ assert_eq ! ( normalize_ducklake_type( "double" ) . unwrap( ) , "float64" ) ;
1015+ assert_eq ! ( normalize_ducklake_type( "DOUBLE" ) . unwrap( ) , "float64" ) ;
1016+ assert_eq ! ( normalize_ducklake_type( "float64" ) . unwrap( ) , "float64" ) ;
1017+ }
1018+
1019+ #[ test]
1020+ fn test_normalize_bool_aliases ( ) {
1021+ assert_eq ! ( normalize_ducklake_type( "bool" ) . unwrap( ) , "boolean" ) ;
1022+ assert_eq ! ( normalize_ducklake_type( "boolean" ) . unwrap( ) , "boolean" ) ;
1023+ assert_eq ! ( normalize_ducklake_type( "BOOLEAN" ) . unwrap( ) , "boolean" ) ;
1024+ }
1025+
1026+ #[ test]
1027+ fn test_normalize_smallint_aliases ( ) {
1028+ assert_eq ! ( normalize_ducklake_type( "smallint" ) . unwrap( ) , "int16" ) ;
1029+ assert_eq ! ( normalize_ducklake_type( "SMALLINT" ) . unwrap( ) , "int16" ) ;
1030+ assert_eq ! ( normalize_ducklake_type( "int16" ) . unwrap( ) , "int16" ) ;
1031+ }
1032+
1033+ #[ test]
1034+ fn test_normalize_tinyint_aliases ( ) {
1035+ assert_eq ! ( normalize_ducklake_type( "tinyint" ) . unwrap( ) , "int8" ) ;
1036+ assert_eq ! ( normalize_ducklake_type( "TINYINT" ) . unwrap( ) , "int8" ) ;
1037+ assert_eq ! ( normalize_ducklake_type( "int8" ) . unwrap( ) , "int8" ) ;
1038+ }
1039+
1040+ #[ test]
1041+ fn test_normalize_unknown_type_errors ( ) {
1042+ assert ! ( normalize_ducklake_type( "foobar" ) . is_err( ) ) ;
1043+ }
1044+
1045+ // ── is_promotable tests ──
1046+
1047+ #[ test]
1048+ fn test_promotable_same_type ( ) {
1049+ assert ! ( is_promotable( "int32" , "int32" ) ) ;
1050+ assert ! ( is_promotable( "varchar" , "varchar" ) ) ;
1051+ assert ! ( is_promotable( "float64" , "float64" ) ) ;
1052+ }
1053+
1054+ #[ test]
1055+ fn test_promotable_signed_int_widening ( ) {
1056+ assert ! ( is_promotable( "int8" , "int16" ) ) ;
1057+ assert ! ( is_promotable( "int8" , "int32" ) ) ;
1058+ assert ! ( is_promotable( "int8" , "int64" ) ) ;
1059+ assert ! ( is_promotable( "int16" , "int32" ) ) ;
1060+ assert ! ( is_promotable( "int16" , "int64" ) ) ;
1061+ assert ! ( is_promotable( "int32" , "int64" ) ) ;
1062+ }
1063+
1064+ #[ test]
1065+ fn test_promotable_signed_int_narrowing_rejected ( ) {
1066+ assert ! ( !is_promotable( "int64" , "int32" ) ) ;
1067+ assert ! ( !is_promotable( "int32" , "int16" ) ) ;
1068+ assert ! ( !is_promotable( "int16" , "int8" ) ) ;
1069+ }
1070+
1071+ #[ test]
1072+ fn test_promotable_unsigned_int_widening ( ) {
1073+ assert ! ( is_promotable( "uint8" , "uint16" ) ) ;
1074+ assert ! ( is_promotable( "uint8" , "uint32" ) ) ;
1075+ assert ! ( is_promotable( "uint8" , "uint64" ) ) ;
1076+ assert ! ( is_promotable( "uint16" , "uint32" ) ) ;
1077+ assert ! ( is_promotable( "uint32" , "uint64" ) ) ;
1078+ }
1079+
1080+ #[ test]
1081+ fn test_promotable_unsigned_narrowing_rejected ( ) {
1082+ assert ! ( !is_promotable( "uint64" , "uint32" ) ) ;
1083+ assert ! ( !is_promotable( "uint32" , "uint16" ) ) ;
1084+ }
1085+
1086+ #[ test]
1087+ fn test_promotable_float_widening ( ) {
1088+ assert ! ( is_promotable( "float32" , "float64" ) ) ;
1089+ }
1090+
1091+ #[ test]
1092+ fn test_promotable_float_narrowing_rejected ( ) {
1093+ assert ! ( !is_promotable( "float64" , "float32" ) ) ;
1094+ }
1095+
1096+ #[ test]
1097+ fn test_promotable_int_to_float64 ( ) {
1098+ assert ! ( is_promotable( "int8" , "float64" ) ) ;
1099+ assert ! ( is_promotable( "int16" , "float64" ) ) ;
1100+ assert ! ( is_promotable( "int32" , "float64" ) ) ;
1101+ assert ! ( is_promotable( "int64" , "float64" ) ) ;
1102+ }
1103+
1104+ #[ test]
1105+ fn test_promotable_int_to_float32_rejected ( ) {
1106+ // We only allow int -> float64, not int -> float32
1107+ assert ! ( !is_promotable( "int32" , "float32" ) ) ;
1108+ }
1109+
1110+ #[ test]
1111+ fn test_promotable_timestamp_to_timestamptz ( ) {
1112+ assert ! ( is_promotable( "timestamp" , "timestamptz" ) ) ;
1113+ }
1114+
1115+ #[ test]
1116+ fn test_promotable_timestamptz_to_timestamp_rejected ( ) {
1117+ assert ! ( !is_promotable( "timestamptz" , "timestamp" ) ) ;
1118+ }
1119+
1120+ #[ test]
1121+ fn test_promotable_decimal_widening ( ) {
1122+ assert ! ( is_promotable( "decimal(10, 2)" , "decimal(18, 4)" ) ) ;
1123+ assert ! ( is_promotable( "decimal(10, 2)" , "decimal(10, 2)" ) ) ; // same
1124+ assert ! ( is_promotable( "decimal(10, 2)" , "decimal(20, 2)" ) ) ; // wider precision
1125+ assert ! ( is_promotable( "decimal(10, 2)" , "decimal(10, 4)" ) ) ; // wider scale
1126+ }
1127+
1128+ #[ test]
1129+ fn test_promotable_decimal_narrowing_rejected ( ) {
1130+ assert ! ( !is_promotable( "decimal(18, 4)" , "decimal(10, 2)" ) ) ;
1131+ assert ! ( !is_promotable( "decimal(20, 2)" , "decimal(10, 2)" ) ) ; // narrower precision
1132+ }
1133+
1134+ #[ test]
1135+ fn test_promotable_incompatible_types ( ) {
1136+ assert ! ( !is_promotable( "int32" , "varchar" ) ) ;
1137+ assert ! ( !is_promotable( "varchar" , "int32" ) ) ;
1138+ assert ! ( !is_promotable( "boolean" , "int32" ) ) ;
1139+ assert ! ( !is_promotable( "date" , "timestamp" ) ) ;
1140+ }
1141+
1142+ #[ test]
1143+ fn test_promotable_unknown_types ( ) {
1144+ assert ! ( !is_promotable( "foobar" , "int32" ) ) ;
1145+ assert ! ( !is_promotable( "int32" , "foobar" ) ) ;
1146+ }
1147+
1148+ #[ test]
1149+ fn test_promotable_with_aliases ( ) {
1150+ // Uses normalized forms internally
1151+ assert ! ( is_promotable( "int" , "bigint" ) ) ; // int32 -> int64
1152+ assert ! ( is_promotable( "tinyint" , "integer" ) ) ; // int8 -> int32
1153+ assert ! ( is_promotable( "float" , "double" ) ) ; // float32 -> float64
1154+ }
1155+
1156+ // ── types_compatible tests ──
1157+
1158+ #[ test]
1159+ fn test_types_compatible_same_canonical ( ) {
1160+ assert ! ( types_compatible( "int" , "int32" ) ) ;
1161+ assert ! ( types_compatible( "int32" , "int" ) ) ;
1162+ assert ! ( types_compatible( "integer" , "int" ) ) ;
1163+ assert ! ( types_compatible( "text" , "varchar" ) ) ;
1164+ assert ! ( types_compatible( "string" , "text" ) ) ;
1165+ assert ! ( types_compatible( "bigint" , "int64" ) ) ;
1166+ assert ! ( types_compatible( "float" , "real" ) ) ;
1167+ assert ! ( types_compatible( "double" , "float64" ) ) ;
1168+ assert ! ( types_compatible( "bool" , "boolean" ) ) ;
1169+ }
1170+
1171+ #[ test]
1172+ fn test_types_compatible_case_insensitive ( ) {
1173+ assert ! ( types_compatible( "INT" , "int32" ) ) ;
1174+ assert ! ( types_compatible( "VARCHAR" , "text" ) ) ;
1175+ assert ! ( types_compatible( "BIGINT" , "int64" ) ) ;
1176+ }
1177+
1178+ #[ test]
1179+ fn test_types_compatible_with_promotion ( ) {
1180+ assert ! ( types_compatible( "int32" , "int64" ) ) ;
1181+ assert ! ( types_compatible( "float32" , "float64" ) ) ;
1182+ assert ! ( types_compatible( "timestamp" , "timestamptz" ) ) ;
1183+ }
1184+
1185+ #[ test]
1186+ fn test_types_compatible_narrowing_rejected ( ) {
1187+ assert ! ( !types_compatible( "int64" , "int32" ) ) ;
1188+ assert ! ( !types_compatible( "float64" , "float32" ) ) ;
1189+ }
1190+
1191+ #[ test]
1192+ fn test_types_compatible_incompatible ( ) {
1193+ assert ! ( !types_compatible( "int32" , "varchar" ) ) ;
1194+ assert ! ( !types_compatible( "varchar" , "int32" ) ) ;
1195+ assert ! ( !types_compatible( "boolean" , "float64" ) ) ;
1196+ }
1197+
1198+ #[ test]
1199+ fn test_types_compatible_unknown ( ) {
1200+ assert ! ( !types_compatible( "foobar" , "int32" ) ) ;
1201+ assert ! ( !types_compatible( "int32" , "foobar" ) ) ;
1202+ assert ! ( !types_compatible( "foobar" , "bazqux" ) ) ;
1203+ }
8531204}
0 commit comments