Skip to content

Commit 8b7b4f1

Browse files
zfarrellclaude
andcommitted
fix: normalize type aliases and add promotion rules for schema evolution
- Add normalize_ducklake_type() for canonical type resolution (int/integer/INT all -> int32) - Add is_promotable() for safe type widening (int->bigint, float->double, timestamp->timestamptz) - Add types_compatible() combining normalization + promotion for schema evolution checks - Replace exact string equality with types_compatible() in schema evolution Found during Feb 2026 security review Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 3f142f0 commit 8b7b4f1

2 files changed

Lines changed: 353 additions & 2 deletions

File tree

src/metadata_writer_sqlite.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -427,8 +427,8 @@ impl MetadataWriter for SqliteMetadataWriter {
427427
if let Some((existing_type, _existing_nullable)) =
428428
existing_map.get(new_col.name.as_str())
429429
{
430-
// Column exists - check type matches
431-
if *existing_type != new_col.ducklake_type {
430+
// Column exists - check type compatibility (normalize aliases + allow promotions)
431+
if !crate::types::types_compatible(existing_type, &new_col.ducklake_type) {
432432
return Err(crate::error::DuckLakeError::InvalidConfig(format!(
433433
"Schema evolution error: column '{}' has type '{}' in existing table but '{}' in new schema. Type changes are not allowed.",
434434
new_col.name, existing_type, new_col.ducklake_type

src/types.rs

Lines changed: 351 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,128 @@ fn parse_decimal(type_str: &str) -> Result<Option<DataType>> {
250250
}
251251
}
252252

253+
/// Normalize a DuckLake type string to its canonical form.
254+
///
255+
/// Converts aliases and case variants to the canonical DuckLake type string.
256+
/// For example: "int" -> "int32", "INTEGER" -> "int32", "text" -> "varchar".
257+
///
258+
/// Returns the canonical type string, or an error if the type is unrecognized.
259+
pub fn normalize_ducklake_type(ducklake_type: &str) -> Result<String> {
260+
let arrow_type = ducklake_to_arrow_type(ducklake_type)?;
261+
arrow_to_ducklake_type(&arrow_type)
262+
}
263+
264+
/// Check if a type can be safely promoted (widened) to another type.
265+
///
266+
/// Type promotion allows safe widening of numeric types during schema evolution.
267+
/// Both type strings are normalized before comparison.
268+
///
269+
/// Supported promotions:
270+
/// - Signed integer widening: int8 -> int16 -> int32 -> int64
271+
/// - Unsigned integer widening: uint8 -> uint16 -> uint32 -> uint64
272+
/// - Float widening: float32 -> float64
273+
/// - Integer to float: any int -> float64
274+
/// - Timestamp: timestamp -> timestamptz
275+
/// - Decimal: smaller precision/scale -> larger precision/scale
276+
pub fn is_promotable(from: &str, to: &str) -> bool {
277+
let from_arrow = match ducklake_to_arrow_type(from) {
278+
Ok(t) => t,
279+
Err(_) => return false,
280+
};
281+
let to_arrow = match ducklake_to_arrow_type(to) {
282+
Ok(t) => t,
283+
Err(_) => return false,
284+
};
285+
286+
is_arrow_promotable(&from_arrow, &to_arrow)
287+
}
288+
289+
/// Check if one Arrow DataType can be safely promoted to another.
290+
fn is_arrow_promotable(from: &DataType, to: &DataType) -> bool {
291+
use DataType::*;
292+
293+
// Same type is trivially promotable
294+
if from == to {
295+
return true;
296+
}
297+
298+
fn signed_int_rank(dt: &DataType) -> Option<u8> {
299+
match dt {
300+
Int8 => Some(0),
301+
Int16 => Some(1),
302+
Int32 => Some(2),
303+
Int64 => Some(3),
304+
_ => None,
305+
}
306+
}
307+
308+
fn unsigned_int_rank(dt: &DataType) -> Option<u8> {
309+
match dt {
310+
UInt8 => Some(0),
311+
UInt16 => Some(1),
312+
UInt32 => Some(2),
313+
UInt64 => Some(3),
314+
_ => None,
315+
}
316+
}
317+
318+
// Signed integer widening
319+
if let (Some(from_rank), Some(to_rank)) = (signed_int_rank(from), signed_int_rank(to)) {
320+
return from_rank < to_rank;
321+
}
322+
323+
// Unsigned integer widening
324+
if let (Some(from_rank), Some(to_rank)) = (unsigned_int_rank(from), unsigned_int_rank(to)) {
325+
return from_rank < to_rank;
326+
}
327+
328+
// Float widening
329+
if matches!(from, Float32) && matches!(to, Float64) {
330+
return true;
331+
}
332+
333+
// Integer to float64 (safe for reasonable values)
334+
if signed_int_rank(from).is_some() && matches!(to, Float64) {
335+
return true;
336+
}
337+
338+
// Timestamp -> TimestampTZ
339+
if matches!(from, Timestamp(_, None)) && matches!(to, Timestamp(_, Some(_))) {
340+
return true;
341+
}
342+
343+
// Decimal widening: larger precision/scale
344+
match (from, to) {
345+
(Decimal128(fp, fs) | Decimal256(fp, fs), Decimal128(tp, ts) | Decimal256(tp, ts)) => {
346+
tp >= fp && ts >= fs
347+
}
348+
_ => false,
349+
}
350+
}
351+
352+
/// Check if two DuckLake type strings are compatible for schema evolution.
353+
///
354+
/// Types are compatible if they normalize to the same canonical type,
355+
/// or if the existing type can be safely promoted to the new type.
356+
pub fn types_compatible(existing_type: &str, new_type: &str) -> bool {
357+
// First try normalization: if both normalize to the same canonical form, they match
358+
let existing_normalized = match normalize_ducklake_type(existing_type) {
359+
Ok(t) => t,
360+
Err(_) => return false,
361+
};
362+
let new_normalized = match normalize_ducklake_type(new_type) {
363+
Ok(t) => t,
364+
Err(_) => return false,
365+
};
366+
367+
if existing_normalized == new_normalized {
368+
return true;
369+
}
370+
371+
// Then check if promotion is allowed
372+
is_promotable(existing_type, new_type)
373+
}
374+
253375
/// Build an Arrow schema from a list of DuckLake table columns
254376
pub fn build_arrow_schema(columns: &[DuckLakeTableColumn]) -> Result<Schema> {
255377
let fields: Result<Vec<Field>> = columns
@@ -850,4 +972,233 @@ mod tests {
850972
"Negative column_id within i32 range should succeed"
851973
);
852974
}
975+
976+
// ── normalize_ducklake_type tests ──
977+
978+
#[test]
979+
fn test_normalize_int_aliases() {
980+
assert_eq!(normalize_ducklake_type("int").unwrap(), "int32");
981+
assert_eq!(normalize_ducklake_type("integer").unwrap(), "int32");
982+
assert_eq!(normalize_ducklake_type("INT").unwrap(), "int32");
983+
assert_eq!(normalize_ducklake_type("Integer").unwrap(), "int32");
984+
assert_eq!(normalize_ducklake_type("int32").unwrap(), "int32");
985+
}
986+
987+
#[test]
988+
fn test_normalize_bigint_aliases() {
989+
assert_eq!(normalize_ducklake_type("bigint").unwrap(), "int64");
990+
assert_eq!(normalize_ducklake_type("long").unwrap(), "int64");
991+
assert_eq!(normalize_ducklake_type("BIGINT").unwrap(), "int64");
992+
assert_eq!(normalize_ducklake_type("int64").unwrap(), "int64");
993+
}
994+
995+
#[test]
996+
fn test_normalize_string_aliases() {
997+
assert_eq!(normalize_ducklake_type("text").unwrap(), "varchar");
998+
assert_eq!(normalize_ducklake_type("string").unwrap(), "varchar");
999+
assert_eq!(normalize_ducklake_type("varchar").unwrap(), "varchar");
1000+
assert_eq!(normalize_ducklake_type("TEXT").unwrap(), "varchar");
1001+
assert_eq!(normalize_ducklake_type("STRING").unwrap(), "varchar");
1002+
}
1003+
1004+
#[test]
1005+
fn test_normalize_float_aliases() {
1006+
assert_eq!(normalize_ducklake_type("float").unwrap(), "float32");
1007+
assert_eq!(normalize_ducklake_type("real").unwrap(), "float32");
1008+
assert_eq!(normalize_ducklake_type("FLOAT").unwrap(), "float32");
1009+
assert_eq!(normalize_ducklake_type("float32").unwrap(), "float32");
1010+
}
1011+
1012+
#[test]
1013+
fn test_normalize_double_aliases() {
1014+
assert_eq!(normalize_ducklake_type("double").unwrap(), "float64");
1015+
assert_eq!(normalize_ducklake_type("DOUBLE").unwrap(), "float64");
1016+
assert_eq!(normalize_ducklake_type("float64").unwrap(), "float64");
1017+
}
1018+
1019+
#[test]
1020+
fn test_normalize_bool_aliases() {
1021+
assert_eq!(normalize_ducklake_type("bool").unwrap(), "boolean");
1022+
assert_eq!(normalize_ducklake_type("boolean").unwrap(), "boolean");
1023+
assert_eq!(normalize_ducklake_type("BOOLEAN").unwrap(), "boolean");
1024+
}
1025+
1026+
#[test]
1027+
fn test_normalize_smallint_aliases() {
1028+
assert_eq!(normalize_ducklake_type("smallint").unwrap(), "int16");
1029+
assert_eq!(normalize_ducklake_type("SMALLINT").unwrap(), "int16");
1030+
assert_eq!(normalize_ducklake_type("int16").unwrap(), "int16");
1031+
}
1032+
1033+
#[test]
1034+
fn test_normalize_tinyint_aliases() {
1035+
assert_eq!(normalize_ducklake_type("tinyint").unwrap(), "int8");
1036+
assert_eq!(normalize_ducklake_type("TINYINT").unwrap(), "int8");
1037+
assert_eq!(normalize_ducklake_type("int8").unwrap(), "int8");
1038+
}
1039+
1040+
#[test]
1041+
fn test_normalize_unknown_type_errors() {
1042+
assert!(normalize_ducklake_type("foobar").is_err());
1043+
}
1044+
1045+
// ── is_promotable tests ──
1046+
1047+
#[test]
1048+
fn test_promotable_same_type() {
1049+
assert!(is_promotable("int32", "int32"));
1050+
assert!(is_promotable("varchar", "varchar"));
1051+
assert!(is_promotable("float64", "float64"));
1052+
}
1053+
1054+
#[test]
1055+
fn test_promotable_signed_int_widening() {
1056+
assert!(is_promotable("int8", "int16"));
1057+
assert!(is_promotable("int8", "int32"));
1058+
assert!(is_promotable("int8", "int64"));
1059+
assert!(is_promotable("int16", "int32"));
1060+
assert!(is_promotable("int16", "int64"));
1061+
assert!(is_promotable("int32", "int64"));
1062+
}
1063+
1064+
#[test]
1065+
fn test_promotable_signed_int_narrowing_rejected() {
1066+
assert!(!is_promotable("int64", "int32"));
1067+
assert!(!is_promotable("int32", "int16"));
1068+
assert!(!is_promotable("int16", "int8"));
1069+
}
1070+
1071+
#[test]
1072+
fn test_promotable_unsigned_int_widening() {
1073+
assert!(is_promotable("uint8", "uint16"));
1074+
assert!(is_promotable("uint8", "uint32"));
1075+
assert!(is_promotable("uint8", "uint64"));
1076+
assert!(is_promotable("uint16", "uint32"));
1077+
assert!(is_promotable("uint32", "uint64"));
1078+
}
1079+
1080+
#[test]
1081+
fn test_promotable_unsigned_narrowing_rejected() {
1082+
assert!(!is_promotable("uint64", "uint32"));
1083+
assert!(!is_promotable("uint32", "uint16"));
1084+
}
1085+
1086+
#[test]
1087+
fn test_promotable_float_widening() {
1088+
assert!(is_promotable("float32", "float64"));
1089+
}
1090+
1091+
#[test]
1092+
fn test_promotable_float_narrowing_rejected() {
1093+
assert!(!is_promotable("float64", "float32"));
1094+
}
1095+
1096+
#[test]
1097+
fn test_promotable_int_to_float64() {
1098+
assert!(is_promotable("int8", "float64"));
1099+
assert!(is_promotable("int16", "float64"));
1100+
assert!(is_promotable("int32", "float64"));
1101+
assert!(is_promotable("int64", "float64"));
1102+
}
1103+
1104+
#[test]
1105+
fn test_promotable_int_to_float32_rejected() {
1106+
// We only allow int -> float64, not int -> float32
1107+
assert!(!is_promotable("int32", "float32"));
1108+
}
1109+
1110+
#[test]
1111+
fn test_promotable_timestamp_to_timestamptz() {
1112+
assert!(is_promotable("timestamp", "timestamptz"));
1113+
}
1114+
1115+
#[test]
1116+
fn test_promotable_timestamptz_to_timestamp_rejected() {
1117+
assert!(!is_promotable("timestamptz", "timestamp"));
1118+
}
1119+
1120+
#[test]
1121+
fn test_promotable_decimal_widening() {
1122+
assert!(is_promotable("decimal(10, 2)", "decimal(18, 4)"));
1123+
assert!(is_promotable("decimal(10, 2)", "decimal(10, 2)")); // same
1124+
assert!(is_promotable("decimal(10, 2)", "decimal(20, 2)")); // wider precision
1125+
assert!(is_promotable("decimal(10, 2)", "decimal(10, 4)")); // wider scale
1126+
}
1127+
1128+
#[test]
1129+
fn test_promotable_decimal_narrowing_rejected() {
1130+
assert!(!is_promotable("decimal(18, 4)", "decimal(10, 2)"));
1131+
assert!(!is_promotable("decimal(20, 2)", "decimal(10, 2)")); // narrower precision
1132+
}
1133+
1134+
#[test]
1135+
fn test_promotable_incompatible_types() {
1136+
assert!(!is_promotable("int32", "varchar"));
1137+
assert!(!is_promotable("varchar", "int32"));
1138+
assert!(!is_promotable("boolean", "int32"));
1139+
assert!(!is_promotable("date", "timestamp"));
1140+
}
1141+
1142+
#[test]
1143+
fn test_promotable_unknown_types() {
1144+
assert!(!is_promotable("foobar", "int32"));
1145+
assert!(!is_promotable("int32", "foobar"));
1146+
}
1147+
1148+
#[test]
1149+
fn test_promotable_with_aliases() {
1150+
// Uses normalized forms internally
1151+
assert!(is_promotable("int", "bigint")); // int32 -> int64
1152+
assert!(is_promotable("tinyint", "integer")); // int8 -> int32
1153+
assert!(is_promotable("float", "double")); // float32 -> float64
1154+
}
1155+
1156+
// ── types_compatible tests ──
1157+
1158+
#[test]
1159+
fn test_types_compatible_same_canonical() {
1160+
assert!(types_compatible("int", "int32"));
1161+
assert!(types_compatible("int32", "int"));
1162+
assert!(types_compatible("integer", "int"));
1163+
assert!(types_compatible("text", "varchar"));
1164+
assert!(types_compatible("string", "text"));
1165+
assert!(types_compatible("bigint", "int64"));
1166+
assert!(types_compatible("float", "real"));
1167+
assert!(types_compatible("double", "float64"));
1168+
assert!(types_compatible("bool", "boolean"));
1169+
}
1170+
1171+
#[test]
1172+
fn test_types_compatible_case_insensitive() {
1173+
assert!(types_compatible("INT", "int32"));
1174+
assert!(types_compatible("VARCHAR", "text"));
1175+
assert!(types_compatible("BIGINT", "int64"));
1176+
}
1177+
1178+
#[test]
1179+
fn test_types_compatible_with_promotion() {
1180+
assert!(types_compatible("int32", "int64"));
1181+
assert!(types_compatible("float32", "float64"));
1182+
assert!(types_compatible("timestamp", "timestamptz"));
1183+
}
1184+
1185+
#[test]
1186+
fn test_types_compatible_narrowing_rejected() {
1187+
assert!(!types_compatible("int64", "int32"));
1188+
assert!(!types_compatible("float64", "float32"));
1189+
}
1190+
1191+
#[test]
1192+
fn test_types_compatible_incompatible() {
1193+
assert!(!types_compatible("int32", "varchar"));
1194+
assert!(!types_compatible("varchar", "int32"));
1195+
assert!(!types_compatible("boolean", "float64"));
1196+
}
1197+
1198+
#[test]
1199+
fn test_types_compatible_unknown() {
1200+
assert!(!types_compatible("foobar", "int32"));
1201+
assert!(!types_compatible("int32", "foobar"));
1202+
assert!(!types_compatible("foobar", "bazqux"));
1203+
}
8531204
}

0 commit comments

Comments
 (0)