@@ -1306,13 +1306,19 @@ FilterSQLResult DuckLakeMetadataManager::ConvertFilterPushdownToSQL(const Filter
13061306 if (!conditions.empty ()) {
13071307 conditions += " AND " ;
13081308 }
1309+ // Files that have no stats entry for this column (i.e., written before the column was added) must
1310+ // NOT be pruned, we cannot determine filter satisfaction without stats.
13091311 if (needs_value_count_guard) {
1310- conditions += StringUtil::Format (" data.data_file_id IN (SELECT data_file_id FROM %s WHERE "
1311- " (value_count IS NULL OR value_count > 0) AND (%s(%s)))" ,
1312- cte_name, null_checks.c_str (), filter_condition.c_str ());
1312+ conditions += StringUtil::Format (
1313+ " (NOT EXISTS (SELECT 1 FROM %s s WHERE s.data_file_id = data.data_file_id) OR "
1314+ " EXISTS (SELECT 1 FROM %s s WHERE s.data_file_id = data.data_file_id "
1315+ " AND (value_count IS NULL OR value_count > 0) AND (%s(%s))))" ,
1316+ cte_name, cte_name, null_checks.c_str (), filter_condition.c_str ());
13131317 } else {
1314- conditions += StringUtil::Format (" data.data_file_id IN (SELECT data_file_id FROM %s WHERE %s(%s))" ,
1315- cte_name, null_checks.c_str (), filter_condition.c_str ());
1318+ conditions += StringUtil::Format (
1319+ " (NOT EXISTS (SELECT 1 FROM %s s WHERE s.data_file_id = data.data_file_id) OR "
1320+ " EXISTS (SELECT 1 FROM %s s WHERE s.data_file_id = data.data_file_id AND %s(%s)))" ,
1321+ cte_name, cte_name, null_checks.c_str (), filter_condition.c_str ());
13161322 }
13171323
13181324 CTERequirement req (column_filter.column_field_index , referenced_stats);
0 commit comments