Skip to content

Commit 30ea0f5

Browse files
committed
Fix mis-pruned data files
1 parent e6a3bd0 commit 30ea0f5

2 files changed

Lines changed: 43 additions & 5 deletions

File tree

src/storage/ducklake_metadata_manager.cpp

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1306,16 +1306,23 @@ FilterSQLResult DuckLakeMetadataManager::ConvertFilterPushdownToSQL(const Filter
13061306
if (!conditions.empty()) {
13071307
conditions += " AND ";
13081308
}
1309+
// Files that have no stats entry for this column (i.e., written before the column was added) must
1310+
// NOT be pruned, we cannot determine filter satisfaction without stats.
13091311
if (needs_value_count_guard) {
1310-
conditions += StringUtil::Format("data.data_file_id IN (SELECT data_file_id FROM %s WHERE "
1311-
"(value_count IS NULL OR value_count > 0) AND (%s(%s)))",
1312-
cte_name, null_checks.c_str(), filter_condition.c_str());
1312+
conditions += StringUtil::Format(
1313+
"(data.data_file_id NOT IN (SELECT data_file_id FROM %s) OR "
1314+
"data.data_file_id IN (SELECT data_file_id FROM %s WHERE "
1315+
"(value_count IS NULL OR value_count > 0) AND (%s(%s))))",
1316+
cte_name, cte_name, null_checks.c_str(), filter_condition.c_str());
13131317
} else {
1314-
conditions += StringUtil::Format("data.data_file_id IN (SELECT data_file_id FROM %s WHERE %s(%s))",
1315-
cte_name, null_checks.c_str(), filter_condition.c_str());
1318+
conditions += StringUtil::Format(
1319+
"(data.data_file_id NOT IN (SELECT data_file_id FROM %s) OR "
1320+
"data.data_file_id IN (SELECT data_file_id FROM %s WHERE %s(%s)))",
1321+
cte_name, cte_name, null_checks.c_str(), filter_condition.c_str());
13161322
}
13171323

13181324
CTERequirement req(column_filter.column_field_index, referenced_stats);
1325+
req.reference_count = 2;
13191326
result.required_ctes.emplace(column_filter.column_field_index, std::move(req));
13201327
}
13211328

test/sql/issues/issue_1135.test

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# name: test/sql/issues/issue_1135.test
2+
# description: Test that filter pushdown works correctly with columns added via ALTER TABLE ADD COLUMN with DEFAULT
3+
# group: [issues]
4+
5+
require ducklake
6+
7+
require parquet
8+
9+
test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db
10+
11+
test-env DATA_PATH __TEST_DIR__
12+
13+
statement ok
14+
ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS duck (DATA_PATH '${DATA_PATH}/issue_1135')
15+
16+
statement ok
17+
CREATE TABLE duck.tbl (a INT);
18+
19+
statement ok
20+
INSERT INTO duck.tbl SELECT i FROM range(10) t(i);
21+
22+
statement ok
23+
CALL ducklake_flush_inlined_data('duck')
24+
25+
statement ok
26+
ALTER TABLE duck.tbl ADD COLUMN b INT DEFAULT 42;
27+
28+
query I
29+
SELECT count(*) FROM duck.tbl WHERE b = 42;
30+
----
31+
10

0 commit comments

Comments
 (0)