3030#include " duckdb/planner/filter/conjunction_filter.hpp"
3131#include " duckdb/common/types/value_map.hpp"
3232#include " duckdb/main/settings.hpp"
33+ #include < limits>
3334#include < list>
35+ #include < utility>
3436
3537namespace duckdb {
3638
@@ -520,86 +522,158 @@ vector<unique_ptr<Expression>> ExtractFilterExpressions(const ColumnDefinition &
520522
521523bool TryScanIndex (ART &art, const ColumnList &column_list, TableFunctionInitInput &input, TableFilterSet &filter_set,
522524 idx_t max_count, set<row_t > &row_ids) {
523- // FIXME: No support for index scans on compound ARTs.
524- // See note above on multi-filter support.
525- if (art.unbound_expressions .size () > 1 ) {
526- return false ;
525+ vector<unique_ptr<Expression>> index_exprs;
526+ for (const auto &expr : art.unbound_expressions ) {
527+ index_exprs.push_back (expr->Copy ());
527528 }
528529
529- auto index_expr = art. unbound_expressions [ 0 ]-> Copy ();
530+ // If this is a view, the column IDs are (may be?) relative to the view projection
530531 auto &indexed_columns = art.GetColumnIds ();
531532
532- // NOTE: We do not push down multi-column filters, e.g., 42 = a + b.
533- if (indexed_columns.size () != 1 ) {
533+ // Allow composite ART scans
534+ if (indexed_columns.size () != index_exprs. size () ) {
534535 return false ;
535536 }
536537
537538 // Resolve bound column references in the index_expr against the current input projection
538- column_t updated_index_column;
539- bool found_index_column_in_input = false ;
540-
541- // Find the indexed column amongst the input columns
542- for (idx_t i = 0 ; i < input.column_ids .size (); ++i) {
543- if (input.column_ids [i] == indexed_columns[0 ]) {
544- updated_index_column = i;
545- found_index_column_in_input = true ;
546- break ;
539+ bool rewrite_index_exprs = false ;
540+ vector<column_t > index_column_to_input_pos;
541+ index_column_to_input_pos.resize (indexed_columns.size (), std::numeric_limits<idx_t >::max ());
542+
543+ // Associate indexed columns to input columns
544+ for (idx_t i = 0 ; i < indexed_columns.size (); ++i) {
545+ for (idx_t j = 0 ; j < input.column_ids .size (); ++j) {
546+ if (indexed_columns[i] == input.column_ids [j]) {
547+ rewrite_index_exprs = i != j;
548+ index_column_to_input_pos.at (i) = j;
549+ break ;
550+ }
547551 }
548552 }
549553
550- // If found, update the bound column ref within index_expr
551- if (found_index_column_in_input) {
552- ExpressionIterator::EnumerateExpression (index_expr, [&](Expression &expr) {
553- if (expr.GetExpressionClass () != ExpressionClass::BOUND_COLUMN_REF) {
554- return ;
555- }
556-
557- auto &bound_column_ref_expr = expr.Cast <BoundColumnRefExpression>();
554+ // Make sure that all indexed_columns were bound, or bail out
555+ for (auto col : index_column_to_input_pos) {
556+ if (col == std::numeric_limits<idx_t >::max ()) {
557+ return false ;
558+ }
559+ }
558560
559- // If the bound column references the index column, use updated_index_column
560- if (bound_column_ref_expr.binding .column_index == indexed_columns[0 ]) {
561- bound_column_ref_expr.binding .column_index = updated_index_column;
561+ // Allow scan only if index expressions reference ONE column each, and that column
562+ // is associated with an indexed_column
563+ // NOTE: We do not push down multi-column filters, e.g., 42 = a + b.
564+ for (idx_t i = 0 ; i < index_exprs.size (); ++i) {
565+ unordered_set<column_t > referenced_columns;
566+ auto expr = &index_exprs[i];
567+
568+ // Walk the expr in case of nesting (e.g. function)
569+ ExpressionIterator::EnumerateExpression (*expr, [&](Expression &child_expr) {
570+ if (child_expr.GetExpressionClass () == ExpressionClass::BOUND_COLUMN_REF) {
571+ auto &col_ref = child_expr.Cast <BoundColumnRefExpression>();
572+ referenced_columns.insert (col_ref.binding .column_index );
562573 }
563574 });
575+
576+ if (referenced_columns.size () != 1 ) {
577+ return false ;
578+ }
579+
580+ // Make sure the column reference can be looked up
581+ auto ref_col_idx = *referenced_columns.begin ();
582+ if (ref_col_idx >= index_column_to_input_pos.size () || ref_col_idx >= input.column_ids .size ()) {
583+ return false ;
584+ }
585+
586+ // The column for this position matches the indexed_column ID for this position directly
587+ auto direct_match = input.column_ids [ref_col_idx] == indexed_columns[i];
588+
589+ // We should know if there is a different mapping for this reference.
590+ // If there is not, it won't match, so it is not worth trying.
591+ if (!direct_match && !rewrite_index_exprs) {
592+ return false ;
593+ }
594+
595+ auto remapped_cid_position = index_column_to_input_pos[ref_col_idx];
596+ auto remapped_match = remapped_cid_position < input.column_ids .size () &&
597+ input.column_ids [remapped_cid_position] == indexed_columns[i];
598+
599+ if (!(direct_match || remapped_match)) {
600+ return false ;
601+ }
564602 }
565603
566- // Get ART column.
567- auto &col = column_list.GetColumn (LogicalIndex (indexed_columns[0 ]));
604+ // If the position of the indexed_columns differs from the order of the input, remap the index expressions
605+ if (rewrite_index_exprs) {
606+ for (auto &index_expr : index_exprs) {
607+ ExpressionIterator::EnumerateExpression (index_expr, [&](Expression &expr) {
608+ if (expr.GetExpressionClass () != ExpressionClass::BOUND_COLUMN_REF) {
609+ return ;
610+ }
611+
612+ auto &bound_column_ref_expr = expr.Cast <BoundColumnRefExpression>();
568613
569- // The indexes of the filters match input.column_indexes, which are: i -> column_index.
570- // Try to find a filter on the ART column.
571- optional_idx storage_index;
572- for (idx_t i = 0 ; i < input.column_indexes .size (); i++) {
573- if (input.column_indexes [i].ToLogical () == col.Logical ()) {
574- storage_index = i;
575- break ;
614+ // If the bound column references an indexed column, update it
615+ for (idx_t i = 0 ; i < indexed_columns.size (); ++i) {
616+ auto remapped_index = index_column_to_input_pos[bound_column_ref_expr.binding .column_index ];
617+ if (input.column_ids [remapped_index] == indexed_columns[i]) {
618+ bound_column_ref_expr.binding .column_index = index_column_to_input_pos[i];
619+ break ;
620+ }
621+ }
622+ });
576623 }
577624 }
578625
579- // No filter matches the ART column.
580- if (!storage_index.IsValid ()) {
581- return false ;
626+ // The indexes of the filters match input.column_indexes, which are: i -> column_index.
627+ // Reuse the index <-> projection mappings from index expr rebinding (which are canonical even if not rewriting)
628+ vector<vector<unique_ptr<Expression>>> index_filters;
629+
630+ for (idx_t i = 0 ; i < index_column_to_input_pos.size (); ++i) {
631+ auto column_def = &column_list.GetColumn (LogicalIndex (indexed_columns[i]));
632+ auto maybe_filter = filter_set.filters .find (index_column_to_input_pos[i]);
633+ if (maybe_filter != filter_set.filters .end ()) {
634+ auto filter = &maybe_filter->second ;
635+ auto filter_expressions = ExtractFilterExpressions (*column_def, *filter, index_column_to_input_pos[i]);
636+
637+ index_filters.push_back (std::move (filter_expressions));
638+ }
582639 }
583640
584- // Try to find a matching filter for the column.
585- auto filter = filter_set.filters .find (storage_index.GetIndex ());
586- if (filter == filter_set.filters .end ()) {
641+ // Index filters must:
642+ // - Match ART column count 1:1
643+ // - Match filter expression set 1:1 (there may be filters on non-indexed columns, bail out if so)
644+ if (index_filters.size () != indexed_columns.size () || filter_set.filters .size () != index_filters.size () ||
645+ index_filters.empty ()) {
587646 return false ;
588647 }
589648
590- auto expressions = ExtractFilterExpressions (col, filter-> second , storage_index. GetIndex ());
591- for ( const auto &filter_expr : expressions ) {
592- auto scan_state = art.TryInitializeScan (*index_expr, *filter_expr );
649+ // Do a compound scan if we have filter exprs bound for several columns
650+ if (index_filters. size () > 1 ) {
651+ auto scan_state = art.TryInitializeCompoundKeyScan (index_exprs, index_filters );
593652 if (!scan_state) {
594653 return false ;
595654 }
596655
597- // Check if we can use an index scan, and already retrieve the matching row ids.
598- if (!art.Scan (*scan_state, max_count, row_ids)) {
656+ if (!art.CompoundKeyScan (*scan_state, max_count, row_ids)) {
599657 row_ids.clear ();
600658 return false ;
601659 }
602660 }
661+ // Original single column index scan
662+ else {
663+ for (const auto &filter_expr : index_filters[0 ]) {
664+ auto scan_state = art.TryInitializeScan (*index_exprs[0 ], *filter_expr);
665+ if (!scan_state) {
666+ return false ;
667+ }
668+
669+ // Check if we can use an index scan, and already retrieve the matching row ids.
670+ if (!art.Scan (*scan_state, max_count, row_ids)) {
671+ row_ids.clear ();
672+ return false ;
673+ }
674+ }
675+ }
676+
603677 return true ;
604678}
605679
@@ -622,9 +696,9 @@ unique_ptr<GlobalTableFunctionState> TableScanInitGlobal(ClientContext &context,
622696 // 1.2. Find + scan one ART for b = 24.
623697 // 1.3. Return the intersecting row IDs.
624698 // 2. (Reorder and) scan a single ART with a compound key of (a, b).
625- if (filter_set.filters .size () != 1 ) {
626- return DuckTableScanInitGlobal (context, input, storage, bind_data);
627- }
699+ // if (filter_set.filters.size() != 1) {
700+ // return DuckTableScanInitGlobal(context, input, storage, bind_data);
701+ // }
628702
629703 // The checkpoint lock ensures that we do not checkpoint while scanning this table.
630704 auto &transaction = DuckTransaction::Get (context, storage.db );
0 commit comments