Skip to content

Commit 5bc8412

Browse files
Upgrade delta_kernel to 0.9 (spiceai#5343)
* Upgrade delta_kernel to 0.7 * Upgrade to delta_kernel 0.9
1 parent 1498cdf commit 5bc8412

3 files changed

Lines changed: 38 additions & 38 deletions

File tree

Cargo.lock

Lines changed: 16 additions & 26 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/data_components/Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,10 @@ datafusion-federation = { workspace = true }
2929
datafusion-federation-sql = { workspace = true }
3030
datafusion-table-providers = { workspace = true }
3131
db_connection_pool = { path = "../db_connection_pool" }
32-
delta_kernel = { version = "0.6.1", features = [
32+
delta_kernel = { version = "0.9", features = [
3333
"default-engine",
3434
"cloud",
35+
"arrow_54",
3536
], optional = true }
3637
document_parse = { path = "../document_parse" }
3738
duckdb = { workspace = true, features = [

crates/data_components/src/delta_lake.rs

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ use datafusion::physical_plan::metrics::ExecutionPlanMetricsSet;
3737
use datafusion::physical_plan::{ExecutionPlan, PhysicalExpr};
3838
use datafusion::scalar::ScalarValue;
3939
use datafusion::sql::TableReference;
40+
use delta_kernel::ExpressionRef;
4041
use delta_kernel::Table;
4142
use delta_kernel::engine::default::DefaultEngine;
4243
use delta_kernel::engine::default::executor::tokio::TokioBackgroundExecutor;
@@ -144,13 +145,13 @@ impl DeltaTable {
144145
.map_err(handle_delta_error)?;
145146

146147
let arrow_schema = Self::get_schema(&snapshot);
147-
let delta_schema = snapshot.schema().clone();
148+
let delta_schema = snapshot.schema();
148149

149150
Ok(Self {
150151
table,
151152
engine,
152153
arrow_schema: Arc::new(arrow_schema),
153-
delta_schema: Arc::new(delta_schema),
154+
delta_schema,
154155
})
155156
}
156157

@@ -359,18 +360,14 @@ impl TableProvider for DeltaTable {
359360
let mut scan_context = ScanContext::new(scan_state, Arc::clone(&engine));
360361

361362
let scan_iter = scan
362-
.scan_data(engine.as_ref())
363+
.scan_metadata(engine.as_ref())
363364
.map_err(map_delta_error_to_datafusion_err)?;
364365

365366
for scan_result in scan_iter {
366-
let data = scan_result.map_err(map_delta_error_to_datafusion_err)?;
367-
scan_context = delta_kernel::scan::state::visit_scan_files(
368-
data.0.as_ref(),
369-
data.1.as_ref(),
370-
scan_context,
371-
handle_scan_file,
372-
)
373-
.map_err(map_delta_error_to_datafusion_err)?;
367+
let scan = scan_result.map_err(map_delta_error_to_datafusion_err)?;
368+
scan_context = scan
369+
.visit_scan_files(scan_context, handle_scan_file)
370+
.map_err(map_delta_error_to_datafusion_err)?;
374371
}
375372

376373
Ok::<_, datafusion::error::DataFusionError>((
@@ -546,6 +543,16 @@ struct PartitionFileContext {
546543
partitioned_file: PartitionedFile,
547544
selection_vector: Option<Vec<bool>>,
548545
partition_values: HashMap<String, String>,
546+
547+
/// These are transforms that Delta wants to apply to the physical data read from the Parquet files.
548+
/// Currently this is only used for adding partition columns and mapping the columns read from the Parquet files
549+
/// into the correct place in the output schema.
550+
///
551+
/// Both of these functions are already handled for us by the `DataFusion` `ParquetExec`. However, we may need to
552+
/// revisit this if more complex transformations are required.
553+
///
554+
/// See: <https://github.com/delta-io/delta-kernel-rs/blob/7e62d12def00f248eccef23e7672fd4db553274f/kernel/src/scan/mod.rs#L444>
555+
_transform: Option<ExpressionRef>,
549556
}
550557

551558
#[allow(clippy::needless_pass_by_value)]
@@ -557,6 +564,7 @@ fn handle_scan_file(
557564
size: i64,
558565
_stats: Option<Stats>,
559566
dv_info: DvInfo,
567+
transform: Option<ExpressionRef>,
560568
partition_values: HashMap<String, String>,
561569
) {
562570
let root_url = match Url::parse(&scan_context.scan_state.table_root) {
@@ -619,6 +627,7 @@ fn handle_scan_file(
619627
partitioned_file,
620628
selection_vector,
621629
partition_values,
630+
_transform: transform,
622631
});
623632
}
624633

0 commit comments

Comments
 (0)