Skip to content

Commit 87c652d

Browse files
committed
test: Add more push down support
1 parent 1e2f7ba commit 87c652d

2 files changed

Lines changed: 55 additions & 13 deletions

File tree

src/lance_scan.cpp

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ struct LanceScanBindData : public TableFunctionData {
9090
ArrowTableSchema arrow_table;
9191
vector<string> names;
9292
vector<LogicalType> types;
93-
string lance_complex_filter_sql;
93+
string lance_pushed_filter_sql;
9494

9595
~LanceScanBindData() override {
9696
if (dataset) {
@@ -413,7 +413,7 @@ static bool TrySerializeLanceExpr(const LogicalGet &get,
413413
}
414414
out_sql = "(" + child_sql +
415415
(op.type == ExpressionType::OPERATOR_IS_NULL ? " IS NULL)"
416-
: " IS NOT NULL)");
416+
: " IS NOT NULL)");
417417
return true;
418418
}
419419
if (op.type == ExpressionType::COMPARE_IN ||
@@ -467,7 +467,7 @@ static bool TrySerializeLanceExpr(const LogicalGet &get,
467467
}
468468

469469
static void
470-
LancePushdownComplexFilter(ClientContext &, LogicalGet &get,
470+
LancePushdownComplexFilter(ClientContext &context, LogicalGet &get,
471471
FunctionData *bind_data,
472472
vector<unique_ptr<Expression>> &filters) {
473473
if (!bind_data || filters.empty()) {
@@ -493,11 +493,11 @@ LancePushdownComplexFilter(ClientContext &, LogicalGet &get,
493493
return;
494494
}
495495
auto pushed_sql = StringUtil::Join(predicates, " AND ");
496-
if (scan_bind.lance_complex_filter_sql.empty()) {
497-
scan_bind.lance_complex_filter_sql = std::move(pushed_sql);
496+
if (scan_bind.lance_pushed_filter_sql.empty()) {
497+
scan_bind.lance_pushed_filter_sql = std::move(pushed_sql);
498498
} else {
499-
scan_bind.lance_complex_filter_sql =
500-
"(" + scan_bind.lance_complex_filter_sql + ") AND (" + pushed_sql + ")";
499+
scan_bind.lance_pushed_filter_sql =
500+
"(" + scan_bind.lance_pushed_filter_sql + ") AND (" + pushed_sql + ")";
501501
}
502502
}
503503

@@ -583,13 +583,15 @@ LanceScanInitGlobal(ClientContext &context, TableFunctionInitInput &input) {
583583
}
584584

585585
auto table_filter_sql = BuildLanceFilterSQL(bind_data, input);
586-
if (bind_data.lance_complex_filter_sql.empty()) {
586+
auto pushed_filter_sql = bind_data.lance_pushed_filter_sql;
587+
588+
if (pushed_filter_sql.empty()) {
587589
scan_state.lance_filter_sql = std::move(table_filter_sql);
588590
} else if (table_filter_sql.empty()) {
589-
scan_state.lance_filter_sql = bind_data.lance_complex_filter_sql;
591+
scan_state.lance_filter_sql = std::move(pushed_filter_sql);
590592
} else {
591-
scan_state.lance_filter_sql = "(" + bind_data.lance_complex_filter_sql +
592-
") AND (" + table_filter_sql + ")";
593+
scan_state.lance_filter_sql =
594+
"(" + pushed_filter_sql + ") AND (" + table_filter_sql + ")";
593595
}
594596
return state;
595597
}

test/sql/lance.test

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,11 @@ SELECT 1
1414
statement ok
1515
SELECT * FROM lance_scan('test/test_data.lance') LIMIT 1
1616

17-
# Test parallel scan + filter pushdown
17+
# Baseline: materialize once and evaluate filters in DuckDB
18+
statement ok
19+
CREATE TEMP TABLE baseline AS SELECT * FROM 'test/test_data.lance'
20+
21+
# Test parallel scan + filter pushdown correctness
1822
statement ok
1923
PRAGMA threads=4
2024

@@ -33,7 +37,7 @@ SELECT count(*) FROM 'test/test_data.lance' WHERE age > 30 AND score < 90
3337
----
3438
2
3539

36-
# Test complex filter forms (OR across columns, NOT, BETWEEN, NOT IN)
40+
# Test complex filter forms
3741
query I
3842
SELECT count(*) FROM 'test/test_data.lance' WHERE age > 30 OR score < 90
3943
----
@@ -62,6 +66,42 @@ Charlie
6266
David
6367
Eve
6468

69+
# Equivalence: baseline table (DuckDB evaluation) matches expected results
70+
query I
71+
SELECT count(*) FROM baseline WHERE age > 30
72+
----
73+
3
74+
75+
query I
76+
SELECT count(*) FROM baseline WHERE name IN ('Alice', 'Eve')
77+
----
78+
2
79+
80+
query I
81+
SELECT count(*) FROM baseline WHERE age > 30 AND score < 90
82+
----
83+
2
84+
85+
query I
86+
SELECT count(*) FROM baseline WHERE age > 30 OR score < 90
87+
----
88+
4
89+
90+
query I
91+
SELECT count(*) FROM baseline WHERE NOT (age > 30)
92+
----
93+
2
94+
95+
query I
96+
SELECT count(*) FROM baseline WHERE age BETWEEN 26 AND 40
97+
----
98+
3
99+
100+
query I
101+
SELECT count(*) FROM baseline WHERE name NOT IN ('Alice', 'Eve')
102+
----
103+
3
104+
65105
# Test replacement scan for SELECT * FROM '.../dataset.lance'
66106
statement ok
67107
SELECT * FROM 'test/test_data.lance' LIMIT 1

0 commit comments

Comments
 (0)