Skip to content

Commit 4ad04d8

Browse files
committed
fix(duckdb): use actual DuckDB schema for read provider instead of cmd.schema
DuckDB may silently change types during table creation (e.g. Timestamp(ns, tz) becomes Timestamp(µs, tz) for TIMESTAMPTZ). The read provider must advertise the actual storage types so downstream operators (SortExec, RowConverter) receive batches matching the advertised schema. Query DuckDB via get_schema() after table creation to obtain the true storage schema, consistent with DuckDBTableFactory::table_provider() which already does this correctly. Fixes RowConverter column schema mismatch on ORDER BY with partitioned DuckDB acceleration.
1 parent b798c39 commit 4ad04d8

1 file changed

Lines changed: 63 additions & 2 deletions

File tree

core/src/duckdb.rs

Lines changed: 63 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -510,9 +510,15 @@ impl TableProviderFactory for DuckDBTableProviderFactory {
510510
self.settings_registry
511511
.apply_settings(conn, &options, DuckDBSettingScope::Global)?;
512512

513+
// Use actual DuckDB storage schema for the read provider (may differ from cmd.schema).
514+
let schema_conn = dyn_pool.connect().await?;
515+
let read_schema = get_schema(schema_conn, &TableReference::bare(name.clone()))
516+
.await
517+
.map_err(|e| DataFusionError::External(Box::new(e)))?;
518+
513519
let read_provider = Arc::new(DuckDBTable::new_with_schema(
514520
&dyn_pool,
515-
Arc::clone(&schema),
521+
read_schema,
516522
TableReference::bare(name.clone()),
517523
None,
518524
Some(self.dialect.clone()),
@@ -798,7 +804,7 @@ pub(crate) mod tests {
798804
use crate::duckdb::write::DuckDBTableWriter;
799805

800806
use super::*;
801-
use arrow::datatypes::{DataType, Field, Schema};
807+
use arrow::datatypes::{DataType, Field, Schema, TimeUnit};
802808
use datafusion::common::{Constraints, ToDFSchema};
803809
use datafusion::logical_expr::CreateExternalTable;
804810
use datafusion::prelude::SessionContext;
@@ -1117,4 +1123,59 @@ pub(crate) mod tests {
11171123
assert_eq!(e.to_string(), "External error: Query execution failed.\nInvalid Input Error: Failed to cast value: Could not convert string 'invalid' to BOOL\nFor details, refer to the DuckDB manual: https://duckdb.org/docs/");
11181124
}
11191125
}
1126+
1127+
/// Verifies the read provider advertises actual DuckDB storage types,
1128+
/// not the requested cmd.schema types.
1129+
#[tokio::test]
1130+
async fn test_read_provider_schema_reflects_actual_duckdb_types() {
1131+
let table_name = TableReference::bare("test_timestamp_schema");
1132+
let schema = Schema::new(vec![
1133+
Field::new("id", DataType::Int32, false),
1134+
Field::new(
1135+
"created_at",
1136+
DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into())),
1137+
false,
1138+
),
1139+
]);
1140+
1141+
let mut options = HashMap::new();
1142+
options.insert("mode".to_string(), "memory".to_string());
1143+
1144+
let factory = DuckDBTableProviderFactory::new(duckdb::AccessMode::ReadWrite);
1145+
let ctx = SessionContext::new();
1146+
let cmd = CreateExternalTable {
1147+
schema: Arc::new(schema.to_dfschema().expect("to df schema")),
1148+
name: table_name,
1149+
location: "".to_string(),
1150+
file_type: "".to_string(),
1151+
table_partition_cols: vec![],
1152+
if_not_exists: false,
1153+
definition: None,
1154+
order_exprs: vec![],
1155+
unbounded: false,
1156+
options,
1157+
constraints: Constraints::default(),
1158+
column_defaults: HashMap::new(),
1159+
temporary: false,
1160+
or_replace: false,
1161+
};
1162+
1163+
let table_provider = factory
1164+
.create(&ctx.state(), &cmd)
1165+
.await
1166+
.expect("table provider created");
1167+
1168+
let read_schema = table_provider.schema();
1169+
let ts_field = read_schema
1170+
.field_with_name("created_at")
1171+
.expect("created_at field exists");
1172+
1173+
// DuckDB stores TIMESTAMPTZ as Microsecond regardless of requested precision.
1174+
match ts_field.data_type() {
1175+
DataType::Timestamp(TimeUnit::Microsecond, _) => {}
1176+
other => panic!(
1177+
"Expected Timestamp(Microsecond, _), got {other:?}"
1178+
),
1179+
}
1180+
}
11201181
}

0 commit comments

Comments
 (0)