3131#include " duckdb/common/types/value_map.hpp"
3232#include " duckdb/main/settings.hpp"
3333#include " duckdb/transaction/duck_transaction_manager.hpp"
34+ #include < limits>
35+ #include < list>
36+ #include < utility>
3437
3538namespace duckdb {
3639
@@ -558,70 +561,127 @@ vector<unique_ptr<Expression>> ExtractFilterExpressions(const ColumnDefinition &
558561
559562bool TryScanIndex (ART &art, IndexEntry &entry, const ColumnList &column_list, TableFunctionInitInput &input,
560563 TableFilterSet &filter_set, idx_t max_count, set<row_t > &row_ids) {
561- // FIXME: No support for index scans on compound ARTs.
562- // See note above on multi-filter support.
563- if (art.unbound_expressions .size () > 1 ) {
564- return false ;
564+ vector<unique_ptr<Expression>> index_exprs;
565+ for (const auto &expr : art.unbound_expressions ) {
566+ index_exprs.push_back (expr->Copy ());
565567 }
566568
567- auto index_expr = art. unbound_expressions [ 0 ]-> Copy ();
569+ // If this is a view, the column IDs are (may be?) relative to the view projection
568570 auto &indexed_columns = art.GetColumnIds ();
569571
570- // NOTE: We do not push down multi-column filters, e.g., 42 = a + b.
571- if (indexed_columns.size () != 1 ) {
572+ // Allow composite ART scans
573+ if (indexed_columns.size () != index_exprs. size () ) {
572574 return false ;
573575 }
574576
575577 // Resolve bound column references in the index_expr against the current input projection
576- column_t updated_index_column;
577- bool found_index_column_in_input = false ;
578-
579- // Find the indexed column amongst the input columns
580- for (idx_t i = 0 ; i < input.column_ids .size (); ++i) {
581- if (input.column_ids [i] == indexed_columns[0 ]) {
582- updated_index_column = i;
583- found_index_column_in_input = true ;
584- break ;
578+ bool rewrite_index_exprs = false ;
579+ vector<column_t > index_column_to_input_pos;
580+ index_column_to_input_pos.resize (indexed_columns.size (), std::numeric_limits<idx_t >::max ());
581+
582+ // Associate indexed columns to input columns
583+ for (idx_t i = 0 ; i < indexed_columns.size (); ++i) {
584+ for (idx_t j = 0 ; j < input.column_ids .size (); ++j) {
585+ if (indexed_columns[i] == input.column_ids [j]) {
586+ rewrite_index_exprs = i != j;
587+ index_column_to_input_pos.at (i) = j;
588+ break ;
589+ }
585590 }
586591 }
587592
588- // If found, update the bound column ref within index_expr
589- if (found_index_column_in_input) {
590- ExpressionIterator::EnumerateExpression (index_expr, [&](Expression &expr) {
591- if (expr.GetExpressionClass () != ExpressionClass::BOUND_COLUMN_REF) {
592- return ;
593- }
594-
595- auto &bound_column_ref_expr = expr.Cast <BoundColumnRefExpression>();
593+ // Make sure that all indexed_columns were bound, or bail out
594+ for (auto col : index_column_to_input_pos) {
595+ if (col == std::numeric_limits<idx_t >::max ()) {
596+ return false ;
597+ }
598+ }
596599
597- // If the bound column references the index column, use updated_index_column
598- if (bound_column_ref_expr.binding .column_index == indexed_columns[0 ]) {
599- bound_column_ref_expr.binding .column_index = updated_index_column;
600+ // Allow scan only if index expressions reference ONE column each, and that column
601+ // is associated with an indexed_column
602+ // NOTE: We do not push down multi-column filters, e.g., 42 = a + b.
603+ for (idx_t i = 0 ; i < index_exprs.size (); ++i) {
604+ unordered_set<column_t > referenced_columns;
605+ auto expr = &index_exprs[i];
606+
607+ // Walk the expr in case of nesting (e.g. function)
608+ ExpressionIterator::EnumerateExpression (*expr, [&](Expression &child_expr) {
609+ if (child_expr.GetExpressionClass () == ExpressionClass::BOUND_COLUMN_REF) {
610+ auto &col_ref = child_expr.Cast <BoundColumnRefExpression>();
611+ referenced_columns.insert (col_ref.binding .column_index );
600612 }
601613 });
614+
615+ if (referenced_columns.size () != 1 ) {
616+ return false ;
617+ }
618+
619+ // Make sure the column reference can be looked up
620+ auto ref_col_idx = *referenced_columns.begin ();
621+ if (ref_col_idx >= index_column_to_input_pos.size () || ref_col_idx >= input.column_ids .size ()) {
622+ return false ;
623+ }
624+
625+ // The column for this position matches the indexed_column ID for this position directly
626+ auto direct_match = input.column_ids [ref_col_idx] == indexed_columns[i];
627+
628+ // We should know if there is a different mapping for this reference.
629+ // If there is not, it won't match, so it is not worth trying.
630+ if (!direct_match && !rewrite_index_exprs) {
631+ return false ;
632+ }
633+
634+ auto remapped_cid_position = index_column_to_input_pos[ref_col_idx];
635+ auto remapped_match = remapped_cid_position < input.column_ids .size () &&
636+ input.column_ids [remapped_cid_position] == indexed_columns[i];
637+
638+ if (!(direct_match || remapped_match)) {
639+ return false ;
640+ }
602641 }
603642
604- // Get ART column.
605- auto &col = column_list.GetColumn (LogicalIndex (indexed_columns[0 ]));
643+ // If the position of the indexed_columns differs from the order of the input, remap the index expressions
644+ if (rewrite_index_exprs) {
645+ for (auto &index_expr : index_exprs) {
646+ ExpressionIterator::EnumerateExpression (index_expr, [&](Expression &expr) {
647+ if (expr.GetExpressionClass () != ExpressionClass::BOUND_COLUMN_REF) {
648+ return ;
649+ }
606650
607- // The indexes of the filters match input.column_indexes, which are: i -> column_index.
608- // Try to find a filter on the ART column.
609- optional_idx storage_index;
610- for (idx_t i = 0 ; i < input.column_indexes .size (); i++) {
611- if (input.column_indexes [i].ToLogical () == col.Logical ()) {
612- storage_index = i;
613- break ;
651+ auto &bound_column_ref_expr = expr.Cast <BoundColumnRefExpression>();
652+
653+ // If the bound column references an indexed column, update it
654+ for (idx_t i = 0 ; i < indexed_columns.size (); ++i) {
655+ auto remapped_index = index_column_to_input_pos[bound_column_ref_expr.binding .column_index ];
656+ if (input.column_ids [remapped_index] == indexed_columns[i]) {
657+ bound_column_ref_expr.binding .column_index = index_column_to_input_pos[i];
658+ break ;
659+ }
660+ }
661+ });
614662 }
615663 }
616664
617- // No filter matches the ART column.
618- if (!storage_index.IsValid ()) {
619- return false ;
665+ // The indexes of the filters match input.column_indexes, which are: i -> column_index.
666+ // Reuse the index <-> projection mappings from index expr rebinding (which are canonical even if not rewriting)
667+ vector<vector<unique_ptr<Expression>>> index_filters;
668+
669+ for (idx_t i = 0 ; i < index_column_to_input_pos.size (); ++i) {
670+ auto column_def = &column_list.GetColumn (LogicalIndex (indexed_columns[i]));
671+ auto maybe_filter = filter_set.filters .find (index_column_to_input_pos[i]);
672+ if (maybe_filter != filter_set.filters .end ()) {
673+ auto filter = &maybe_filter->second ;
674+ auto filter_expressions = ExtractFilterExpressions (*column_def, *filter, index_column_to_input_pos[i]);
675+
676+ index_filters.push_back (std::move (filter_expressions));
677+ }
620678 }
621679
622- // Try to find a matching filter for the column.
623- auto filter = filter_set.filters .find (storage_index.GetIndex ());
624- if (filter == filter_set.filters .end ()) {
680+ // Index filters must:
681+ // - Match ART column count 1:1
682+ // - Match filter expression set 1:1 (there may be filters on non-indexed columns, bail out if so)
683+ if (index_filters.size () != indexed_columns.size () || filter_set.filters .size () != index_filters.size () ||
684+ index_filters.empty ()) {
625685 return false ;
626686 }
627687
@@ -641,22 +701,40 @@ bool TryScanIndex(ART &art, IndexEntry &entry, const ColumnList &column_list, Ta
641701 arts_to_scan.push_back (entry.added_data_during_checkpoint ->Cast <ART>());
642702 }
643703
644- auto expressions = ExtractFilterExpressions (col, filter-> second , storage_index. GetIndex ());
645- for ( const auto &filter_expr : expressions ) {
704+ // Do a compound scan if we have filter exprs bound for several columns
705+ if (index_filters. size () > 1 ) {
646706 for (auto &art_ref : arts_to_scan) {
647707 auto &art_to_scan = art_ref.get ();
648- auto scan_state = art_to_scan.TryInitializeScan (*index_expr, *filter_expr );
708+ auto scan_state = art_to_scan.TryInitializeCompoundKeyScan (index_exprs, index_filters );
649709 if (!scan_state) {
650710 return false ;
651711 }
652712
653- // Check if we can use an index scan, and already retrieve the matching row ids.
654- if (!art_to_scan.Scan (*scan_state, max_count, row_ids)) {
713+ if (!art_to_scan.CompoundKeyScan (*scan_state, max_count, row_ids)) {
655714 row_ids.clear ();
656715 return false ;
657716 }
658717 }
659718 }
719+ // Original single column index scan
720+ else {
721+ for (const auto &filter_expr : index_filters[0 ]) {
722+ for (auto &art_ref : arts_to_scan) {
723+ auto &art_to_scan = art_ref.get ();
724+ auto scan_state = art_to_scan.TryInitializeScan (*index_exprs[0 ], *filter_expr);
725+ if (!scan_state) {
726+ return false ;
727+ }
728+
729+ // Check if we can use an index scan, and already retrieve the matching row ids.
730+ if (!art_to_scan.Scan (*scan_state, max_count, row_ids)) {
731+ row_ids.clear ();
732+ return false ;
733+ }
734+ }
735+ }
736+ }
737+
660738 return true ;
661739}
662740
@@ -679,9 +757,9 @@ unique_ptr<GlobalTableFunctionState> TableScanInitGlobal(ClientContext &context,
679757 // 1.2. Find + scan one ART for b = 24.
680758 // 1.3. Return the intersecting row IDs.
681759 // 2. (Reorder and) scan a single ART with a compound key of (a, b).
682- if (filter_set.filters .size () != 1 ) {
683- return DuckTableScanInitGlobal (context, input, storage, bind_data);
684- }
760+ // if (filter_set.filters.size() != 1) {
761+ // return DuckTableScanInitGlobal(context, input, storage, bind_data);
762+ // }
685763
686764 auto &info = storage.GetDataTableInfo ();
687765 auto &indexes = info->GetIndexes ();
0 commit comments