Skip to content

Commit 6ee079a

Browse files
committed
cosmetic & fmt
Signed-off-by: Adrian Tanase <[email protected]>
1 parent 6dcfbc9 commit 6ee079a

File tree

3 files changed

+41
-44
lines changed

3 files changed

+41
-44
lines changed

crates/core/src/delta_datafusion/mod.rs

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,10 @@ use datafusion_expr::execution_props::ExecutionProps;
5858
use datafusion_expr::logical_plan::CreateExternalTable;
5959
use datafusion_expr::simplify::SimplifyContext;
6060
use datafusion_expr::utils::conjunction;
61-
use datafusion_expr::{col, BinaryExpr, Expr, Extension, LogicalPlan, Operator, TableProviderFilterPushDown, Volatility};
61+
use datafusion_expr::{
62+
col, BinaryExpr, Expr, Extension, LogicalPlan, Operator, TableProviderFilterPushDown,
63+
Volatility,
64+
};
6265
use datafusion_physical_expr::{create_physical_expr, PhysicalExpr};
6366
use datafusion_physical_plan::filter::FilterExec;
6467
use datafusion_physical_plan::limit::{GlobalLimitExec, LocalLimitExec};
@@ -867,13 +870,13 @@ pub fn expr_applicable_for_cols(col_names: &[&str], expr: &Expr) -> bool {
867870
| Expr::IsNotNull(_)
868871
| Expr::IsNull(_)
869872
| Expr::Between(_)
870-
| Expr::InList(_) => Ok(TreeNodeRecursion::Continue),
873+
| Expr::InList(_) => Ok(TreeNodeRecursion::Continue),
871874
_ => {
872875
is_applicable = false;
873876
Ok(TreeNodeRecursion::Stop)
874877
}
875878
})
876-
.unwrap();
879+
.unwrap();
877880
is_applicable
878881
}
879882

crates/core/src/kernel/snapshot/log_data.rs

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -891,7 +891,7 @@ mod datafusion {
891891
arrow_cast::cast(batch.column_by_name("output")?, &ArrowDataType::UInt64).ok()
892892
}
893893

894-
// This function is required for partition column pruning to be executed correctly
894+
// This function is optional but will optimize partition column pruning
895895
fn contained(&self, column: &Column, value: &HashSet<ScalarValue>) -> Option<BooleanArray> {
896896
if value.is_empty() || !self.metadata.partition_columns.contains(&column.name) {
897897
return None;
@@ -918,9 +918,7 @@ mod datafusion {
918918
| ScalarValue::Utf8View(Some(v))
919919
| ScalarValue::LargeUtf8(Some(v)) => pv == v,
920920

921-
ScalarValue::Dictionary(_, inner) => {
922-
check_scalar(pv, inner)
923-
}
921+
ScalarValue::Dictionary(_, inner) => check_scalar(pv, inner),
924922
// FIXME: is this a good enough default or should we sync this with
925923
// expr_applicable_for_cols and bail out with None
926924
_ => value.to_string() == pv,
@@ -931,9 +929,11 @@ mod datafusion {
931929
if partition_values.is_null(i) {
932930
contains.push(false);
933931
} else {
934-
contains.push(value.iter().any(|scalar| {
935-
check_scalar(partition_values.value(i), scalar)
936-
}));
932+
contains.push(
933+
value
934+
.iter()
935+
.any(|scalar| check_scalar(partition_values.value(i), scalar)),
936+
);
937937
}
938938
}
939939

crates/core/tests/integration_datafusion.rs

Lines changed: 28 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -39,17 +39,19 @@ use serial_test::serial;
3939
use url::Url;
4040

4141
mod local {
42+
use super::*;
4243
use datafusion::datasource::source::DataSourceExec;
4344
use datafusion::{common::stats::Precision, datasource::provider_as_source};
4445
use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion};
45-
use datafusion_expr::{LogicalPlan, LogicalPlanBuilder, TableProviderFilterPushDown, TableScan};
46+
use datafusion_expr::{
47+
LogicalPlan, LogicalPlanBuilder, TableProviderFilterPushDown, TableScan,
48+
};
4649
use deltalake_core::{
4750
delta_datafusion::DeltaLogicalCodec, logstore::default_logstore, writer::JsonWriter,
4851
};
4952
use itertools::Itertools;
5053
use object_store::local::LocalFileSystem;
5154
use TableProviderFilterPushDown::{Exact, Inexact};
52-
use super::*;
5355
#[tokio::test]
5456
#[serial]
5557
async fn test_datafusion_local() -> TestResult {
@@ -210,26 +212,10 @@ mod local {
210212
}
211213
}
212214

213-
fn find_table_scan(plan: &LogicalPlan) -> Vec<&Expr> {
214-
let mut result = vec![];
215-
216-
plan.apply(|node| {
217-
if let LogicalPlan::TableScan(TableScan { ref filters, ..}) = node {
218-
result = filters.iter().collect();
219-
Ok(TreeNodeRecursion::Stop) // Stop traversal once found
220-
} else {
221-
Ok(TreeNodeRecursion::Continue) // Continue traversal
222-
}
223-
}).expect("Traversal should not fail");
224-
225-
result
226-
}
227-
228215
#[tokio::test]
229216
async fn test_datafusion_query_partitioned_pushdown() -> Result<()> {
230217
let ctx = SessionContext::new();
231-
let table = open_table("../test/tests/data/delta-0.8.0-partitioned")
232-
.await?;
218+
let table = open_table("../test/tests/data/delta-0.8.0-partitioned").await?;
233219
ctx.register_table("demo", Arc::new(table.clone()))?;
234220

235221
let pruning_predicates = [
@@ -238,30 +224,40 @@ mod local {
238224
PruningTestCase::new("year = '2021'"),
239225
PruningTestCase::with_push_down(
240226
"year = '2021' AND day IS NOT NULL",
241-
vec![Exact, Exact]
227+
vec![Exact, Exact],
242228
),
243229
PruningTestCase::new("year IN ('2021', '2022')"),
244230
// NOT IN (a, b) is rewritten as (col != a AND col != b)
245-
PruningTestCase::with_push_down(
246-
"year NOT IN ('2020', '2022')",
247-
vec![Exact, Exact]
248-
),
231+
PruningTestCase::with_push_down("year NOT IN ('2020', '2022')", vec![Exact, Exact]),
249232
// BETWEEN a AND b is rewritten as (col >= a AND col < b)
250-
PruningTestCase::with_push_down(
251-
"year BETWEEN '2021' AND '2022'",
252-
vec![Exact, Exact]
253-
),
233+
PruningTestCase::with_push_down("year BETWEEN '2021' AND '2022'", vec![Exact, Exact]),
254234
PruningTestCase::new("year NOT BETWEEN '2019' AND '2020'"),
255235
PruningTestCase::with_push_down(
256236
"year = '2021' AND day IN ('4', '5', '20')",
257-
vec![Exact, Exact]
237+
vec![Exact, Exact],
258238
),
259239
PruningTestCase::with_push_down(
260240
"year = '2021' AND cast(day as int) <= 20",
261-
vec![Exact, Inexact]
241+
vec![Exact, Inexact],
262242
),
263243
];
264244

245+
fn find_scan_filters(plan: &LogicalPlan) -> Vec<&Expr> {
246+
let mut result = vec![];
247+
248+
plan.apply(|node| {
249+
if let LogicalPlan::TableScan(TableScan { ref filters, .. }) = node {
250+
result = filters.iter().collect();
251+
Ok(TreeNodeRecursion::Stop) // Stop traversal once found
252+
} else {
253+
Ok(TreeNodeRecursion::Continue) // Continue traversal
254+
}
255+
})
256+
.expect("Traversal should not fail");
257+
258+
result
259+
}
260+
265261
for pp in pruning_predicates {
266262
let pred = pp.sql;
267263
let sql = format!("SELECT CAST( day as int ) as my_day FROM demo WHERE {pred} ORDER BY CAST( day as int ) ASC");
@@ -271,14 +267,12 @@ mod local {
271267

272268
// validate that we are correctly qualifying filters as Exact or Inexact
273269
let plan = df.clone().into_optimized_plan()?;
274-
let filters = find_table_scan(&plan);
270+
let filters = find_scan_filters(&plan);
275271
let push_down = table.supports_filters_pushdown(&filters)?;
276272

277273
assert_eq!(push_down, pp.push_down);
278274

279-
let batches = df
280-
.collect()
281-
.await?;
275+
let batches = df.collect().await?;
282276

283277
let batch = &batches[0];
284278

0 commit comments

Comments
 (0)