@@ -250,6 +250,56 @@ static void ApplyPartitionConstants(const IcebergMultiFileList &multi_file_list,
250250 }
251251}
252252
253+ ReaderInitializeType IcebergMultiFileReader::InitializeReader (MultiFileReaderData &reader_data,
254+ const MultiFileBindData &bind_data,
255+ const vector<MultiFileColumnDefinition> &global_columns,
256+ const vector<ColumnIndex> &global_column_ids,
257+ optional_ptr<TableFilterSet> table_filters,
258+ ClientContext &context, MultiFileGlobalState &gstate) {
259+
260+ FinalizeBind (reader_data, bind_data.file_options , bind_data.reader_bind , global_columns, global_column_ids, context,
261+ gstate.multi_file_reader_state .get ());
262+
263+ unordered_map<int32_t , column_t > id_to_global_column;
264+ for (column_t i = 0 ; i < global_columns.size (); i++) {
265+ auto &col = global_columns[i];
266+ D_ASSERT (!col.identifier .IsNull ());
267+ id_to_global_column[col.identifier .GetValue <int32_t >()] = i;
268+ }
269+
270+ set<int32_t > equality_delete_ids;
271+ const auto &multi_file_list = dynamic_cast <const IcebergMultiFileList &>(gstate.file_list );
272+ auto &reader = *reader_data.reader ;
273+ auto file_id = reader.file_list_idx .GetIndex ();
274+ auto &data_file = multi_file_list.data_files [file_id];
275+
276+ auto delete_data_it = multi_file_list.equality_delete_data .upper_bound (data_file.sequence_number );
277+ for (; delete_data_it != multi_file_list.equality_delete_data .end (); delete_data_it++) {
278+ auto &files = delete_data_it->second ->files ;
279+ for (auto &file : files) {
280+ auto &rows = file.rows ;
281+ for (auto &row : rows) {
282+ auto &filters = row.filters ;
283+ for (auto &filter : filters) {
284+ equality_delete_ids.insert (filter.first );
285+ }
286+ }
287+ }
288+ }
289+
290+ vector<ColumnIndex> new_global_column_ids = global_column_ids;
291+ for (auto field_id : equality_delete_ids) {
292+ auto global_column_id = id_to_global_column[field_id];
293+ ColumnIndex equality_index (global_column_id);
294+ if (std::find (global_column_ids.begin (), global_column_ids.end (), equality_index) == global_column_ids.end ()) {
295+ new_global_column_ids.push_back (equality_index);
296+ }
297+ }
298+
299+ return CreateMapping (context, reader_data, global_columns, new_global_column_ids, table_filters, gstate.file_list ,
300+ bind_data.reader_bind , bind_data.virtual_columns );
301+ }
302+
253303void IcebergMultiFileReader::FinalizeBind (MultiFileReaderData &reader_data, const MultiFileOptions &file_options,
254304 const MultiFileReaderBindData &options,
255305 const vector<MultiFileColumnDefinition> &global_columns,
@@ -291,7 +341,8 @@ void IcebergMultiFileReader::FinalizeBind(MultiFileReaderData &reader_data, cons
291341void IcebergMultiFileReader::ApplyEqualityDeletes (ClientContext &context, DataChunk &output_chunk,
292342 const IcebergMultiFileList &multi_file_list,
293343 const IcebergManifestEntry &data_file,
294- const vector<MultiFileColumnDefinition> &local_columns) {
344+ const vector<MultiFileColumnDefinition> &local_columns,
345+ unordered_map<idx_t , idx_t > field_id_to_result_id) {
295346 vector<reference<IcebergEqualityDeleteRow>> delete_rows;
296347
297348 auto &metadata = multi_file_list.GetMetadata ();
@@ -360,7 +411,25 @@ void IcebergMultiFileReader::ApplyEqualityDeletes(ClientContext &context, DataCh
360411 equalities.push_back (make_uniq<BoundConstantExpression>(Value::BOOLEAN (true )));
361412 }
362413 } else {
363- equalities.push_back (expression->Copy ());
414+ if (field_id_to_result_id.empty ()) {
415+ equalities.push_back (expression->Copy ());
416+ } else {
417+ idx_t index = field_id_to_result_id[field_id];
418+ if (expression->type == ExpressionType::COMPARE_NOTEQUAL) {
419+ auto &expr = expression->Cast <BoundComparisonExpression>();
420+ auto bound_ref = make_uniq<BoundReferenceExpression>(expr.left ->return_type , index);
421+ unique_ptr<Expression> equality_filter = make_uniq<BoundComparisonExpression>(
422+ ExpressionType::COMPARE_NOTEQUAL, std::move (bound_ref), expr.right ->Copy ());
423+ equalities.push_back (std::move (equality_filter));
424+ } else if (expression->type == ExpressionType::OPERATOR_IS_NOT_NULL) {
425+ auto &expr = expression->Cast <BoundOperatorExpression>();
426+ auto bound_ref = make_uniq<BoundReferenceExpression>(expr.children [0 ]->return_type , index);
427+ auto is_not_null = make_uniq<BoundOperatorExpression>(ExpressionType::OPERATOR_IS_NOT_NULL,
428+ LogicalType::BOOLEAN);
429+ is_not_null->children .push_back (std::move (bound_ref));
430+ equalities.push_back (std::move (is_not_null));
431+ }
432+ }
364433 }
365434 }
366435
@@ -399,17 +468,46 @@ void IcebergMultiFileReader::FinalizeChunk(ClientContext &context, const MultiFi
399468 DataChunk &input_chunk, DataChunk &output_chunk,
400469 ExpressionExecutor &executor,
401470 optional_ptr<MultiFileReaderGlobalState> global_state) {
471+
472+ // add the extra equality delete fields to output chunk.
473+ int32_t diff = 0 ;
474+ if (executor.expressions .size () != output_chunk.ColumnCount ()) {
475+ diff = executor.expressions .size () - output_chunk.ColumnCount ();
476+ for (int32_t i = diff; i > 0 ; i--) {
477+ int32_t index = input_chunk.ColumnCount () - i;
478+ output_chunk.data .emplace_back (input_chunk.data [index]);
479+ }
480+ }
481+
402482 // Base class finalization first
403483 MultiFileReader::FinalizeChunk (context, bind_data, reader, reader_data, input_chunk, output_chunk, executor,
404484 global_state);
405485
486+ auto &local_columns = reader.columns ;
487+ unordered_map<idx_t , idx_t > column_index_to_field_id;
488+ for (idx_t i = 0 ; i < local_columns.size (); i++) {
489+ auto &col = local_columns[i];
490+ column_index_to_field_id[i] = col.identifier .GetValue <int32_t >();
491+ }
492+ unordered_map<idx_t , idx_t > field_id_to_result_id;
493+ vector<ColumnIndex> column_indexes = reader.column_indexes ;
494+ int32_t result_id = executor.expressions .size () - 1 ;
495+ for (int32_t i = column_indexes.size () - 1 ; i >= 0 ; i--) {
496+ ColumnIndex column_index = column_indexes[i];
497+ field_id_to_result_id[column_index_to_field_id[column_index.GetPrimaryIndex ()]] = result_id--;
498+ }
499+
406500 D_ASSERT (global_state);
407501 // Get the metadata for this file
408502 const auto &multi_file_list = dynamic_cast <const IcebergMultiFileList &>(*global_state->file_list );
409503 auto file_id = reader.file_list_idx .GetIndex ();
410504 auto &data_file = multi_file_list.data_files [file_id];
411- auto &local_columns = reader.columns ;
412- ApplyEqualityDeletes (context, output_chunk, multi_file_list, data_file, local_columns);
505+ ApplyEqualityDeletes (context, output_chunk, multi_file_list, data_file, local_columns, field_id_to_result_id);
506+
507+ // delete the equality delete fields for result
508+ for (idx_t i = 0 ; i < diff; i++) {
509+ output_chunk.data .pop_back ();
510+ }
413511}
414512
415513bool IcebergMultiFileReader::ParseOption (const string &key, const Value &val, MultiFileOptions &options,
0 commit comments