@@ -1306,16 +1306,23 @@ FilterSQLResult DuckLakeMetadataManager::ConvertFilterPushdownToSQL(const Filter
13061306 if (!conditions.empty ()) {
13071307 conditions += " AND " ;
13081308 }
1309+ // Files that have no stats entry for this column (i.e., written before the column was added) must
1310+ // NOT be pruned, we cannot determine filter satisfaction without stats.
13091311 if (needs_value_count_guard) {
1310- conditions += StringUtil::Format (" data.data_file_id IN (SELECT data_file_id FROM %s WHERE "
1311- " (value_count IS NULL OR value_count > 0) AND (%s(%s)))" ,
1312- cte_name, null_checks.c_str (), filter_condition.c_str ());
1312+ conditions += StringUtil::Format (
1313+ " (data.data_file_id NOT IN (SELECT data_file_id FROM %s) OR "
1314+ " data.data_file_id IN (SELECT data_file_id FROM %s WHERE "
1315+ " (value_count IS NULL OR value_count > 0) AND (%s(%s))))" ,
1316+ cte_name, cte_name, null_checks.c_str (), filter_condition.c_str ());
13131317 } else {
1314- conditions += StringUtil::Format (" data.data_file_id IN (SELECT data_file_id FROM %s WHERE %s(%s))" ,
1315- cte_name, null_checks.c_str (), filter_condition.c_str ());
1318+ conditions += StringUtil::Format (
1319+ " (data.data_file_id NOT IN (SELECT data_file_id FROM %s) OR "
1320+ " data.data_file_id IN (SELECT data_file_id FROM %s WHERE %s(%s)))" ,
1321+ cte_name, cte_name, null_checks.c_str (), filter_condition.c_str ());
13161322 }
13171323
13181324 CTERequirement req (column_filter.column_field_index , referenced_stats);
1325+ req.reference_count = 2 ;
13191326 result.required_ctes .emplace (column_filter.column_field_index , std::move (req));
13201327 }
13211328
0 commit comments