Skip to content

Commit d368ef5

Browse files
committed
Merge pull request #7 from spiceai/mach/compound-art-scan
Squashed commit of the following: commit 36ffa5b Author: David Stancu <david@spice.ai> Date: Mon Nov 3 12:30:28 2025 -0500 tryscanindex sanity check: indexed_columns / art column ids may not need remapping if the scan is not a view scan commit 525f9c7 Author: David Stancu <david@spice.ai> Date: Thu Oct 30 10:42:17 2025 -0400 do not do index scan if there are other non index filters in the predicate (fix shutdown_create_index.test) commit b0a6e2d Author: David Stancu <david@spice.ai> Date: Thu Oct 30 10:04:54 2025 -0400 add test, bail out for eg composite query with IN () list commit a22a430 Author: David Stancu <david@spice.ai> Date: Wed Oct 29 16:37:30 2025 -0400 simplify filter expression storage index bindings (just reuse the ones we made earlier), fix single-ref-per-expr predicate to correctly walk expr tree and yank refs (allowing nesting in fns, etc) commit 9c8c1ed Author: David Stancu <david@spice.ai> Date: Wed Oct 29 15:11:23 2025 -0400 copy index expressions before rewriting column refs commit aff2c98 Author: David Stancu <david@spice.ai> Date: Wed Oct 29 14:36:33 2025 -0400 table scan: rebind projected columns in ALL index exprs do not bail out early if more than one index expr hook up composite key scan commit bfc6f02 Author: David Stancu <david@spice.ai> Date: Wed Oct 29 14:35:09 2025 -0400 make specialized compound key scan state for eq compares, specialized scan using ARTKey::Concat
1 parent b390a7c commit d368ef5

4 files changed

Lines changed: 207 additions & 44 deletions

File tree

src/execution/index/art/art.cpp

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
#include "duckdb/execution/index/art/art.hpp"
22

3+
#include "duckdb/common/assert.hpp"
4+
#include "duckdb/common/helper.hpp"
5+
#include "duckdb/common/typedefs.hpp"
36
#include "duckdb/common/types/conflict_manager.hpp"
47
#include "duckdb/common/unordered_map.hpp"
58
#include "duckdb/common/vector_operations/vector_operations.hpp"
@@ -39,6 +42,17 @@ struct ARTIndexScanState : public IndexScanState {
3942
set<row_t> row_ids;
4043
};
4144

45+
struct ARTIndexCompoundKeyScanState : public IndexScanState {
46+
//! The predicates to scan.
47+
//! A single predicate for each constituent key in a compound index.
48+
vector<Value> values;
49+
//! The expressions over the scan predicates.
50+
vector<ExpressionType> expressions;
51+
bool checked = false;
52+
//! All scanned row IDs.
53+
set<row_t> row_ids;
54+
};
55+
4256
//===--------------------------------------------------------------------===//
4357
// ART
4458
//===--------------------------------------------------------------------===//
@@ -142,6 +156,34 @@ static unique_ptr<IndexScanState> InitializeScanTwoPredicates(const Value &low_v
142156
return std::move(result);
143157
}
144158

159+
// Build compound scan state by building individual index scans and collecting their exprs/values
160+
unique_ptr<IndexScanState> ART::TryInitializeCompoundKeyScan(const vector<unique_ptr<Expression>> &index_exprs,
161+
vector<vector<unique_ptr<Expression>>> &exprs) {
162+
auto compound_scan_state = make_uniq<ARTIndexCompoundKeyScanState>();
163+
164+
for (idx_t i = 0; i < index_exprs.size(); ++i) {
165+
auto index_expr = &index_exprs[i];
166+
auto filter_exprs = &exprs[i];
167+
168+
for (const auto &filter_expr : *filter_exprs) {
169+
auto single_scan = ART::TryInitializeScan(**index_expr, *filter_expr);
170+
if (!single_scan) {
171+
return nullptr;
172+
}
173+
174+
auto single_scan_concrete = single_scan->Cast<ARTIndexScanState>();
175+
if (single_scan_concrete.expressions[0] != ExpressionType::COMPARE_EQUAL) {
176+
return nullptr;
177+
}
178+
179+
compound_scan_state->values.push_back(single_scan_concrete.values[0]);
180+
compound_scan_state->expressions.push_back(single_scan_concrete.expressions[0]);
181+
}
182+
}
183+
184+
return compound_scan_state;
185+
}
186+
145187
unique_ptr<IndexScanState> ART::TryInitializeScan(const Expression &expr, const Expression &filter_expr) {
146188
Value low_value, high_value, equal_value;
147189
ExpressionType low_comparison_type = ExpressionType::INVALID, high_comparison_type = ExpressionType::INVALID;
@@ -675,6 +717,30 @@ bool ART::SearchCloseRange(ARTKey &lower_bound, ARTKey &upper_bound, bool left_e
675717
return it.Scan(upper_bound, max_count, row_ids, right_equal);
676718
}
677719

720+
bool ART::CompoundKeyScan(IndexScanState &state, const idx_t max_count, set<row_t> &row_ids) {
721+
auto &scan_state = state.Cast<ARTIndexCompoundKeyScanState>();
722+
723+
if (scan_state.values.size() != types.size()) {
724+
return false;
725+
}
726+
727+
for (idx_t i = 0; i < scan_state.values.size(); ++i) {
728+
D_ASSERT(scan_state.values[i].type().InternalType() == types[i]);
729+
}
730+
731+
ArenaAllocator arena_allocator(Allocator::Get(db));
732+
733+
// Make a compound key from the collected state values
734+
auto compound_key = ARTKey::CreateKey(arena_allocator, types[0], scan_state.values[0]);
735+
for (idx_t i = 1; i < scan_state.values.size(); ++i) {
736+
auto part_key = ARTKey::CreateKey(arena_allocator, types[i], scan_state.values[i]);
737+
compound_key.Concat(arena_allocator, part_key);
738+
}
739+
740+
lock_guard<mutex> l(lock);
741+
return SearchEqual(compound_key, max_count, row_ids);
742+
}
743+
678744
bool ART::Scan(IndexScanState &state, const idx_t max_count, set<row_t> &row_ids) {
679745
auto &scan_state = state.Cast<ARTIndexScanState>();
680746
D_ASSERT(scan_state.values[0].type().InternalType() == types[0]);

src/function/table/table_scan.cpp

Lines changed: 104 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,9 @@
3030
#include "duckdb/planner/filter/conjunction_filter.hpp"
3131
#include "duckdb/common/types/value_map.hpp"
3232
#include "duckdb/main/settings.hpp"
33+
#include <limits>
3334
#include <list>
35+
#include <utility>
3436

3537
namespace duckdb {
3638

@@ -486,86 +488,144 @@ vector<unique_ptr<Expression>> ExtractFilterExpressions(const ColumnDefinition &
486488

487489
bool TryScanIndex(ART &art, const ColumnList &column_list, TableFunctionInitInput &input, TableFilterSet &filter_set,
488490
idx_t max_count, set<row_t> &row_ids) {
489-
// FIXME: No support for index scans on compound ARTs.
490-
// See note above on multi-filter support.
491-
if (art.unbound_expressions.size() > 1) {
492-
return false;
491+
vector<unique_ptr<Expression>> index_exprs;
492+
for (const auto &expr : art.unbound_expressions) {
493+
index_exprs.push_back(expr->Copy());
493494
}
494495

495-
auto index_expr = art.unbound_expressions[0]->Copy();
496+
// If this is a view, the column IDs are relative to the view projection
496497
auto &indexed_columns = art.GetColumnIds();
497498

498-
// NOTE: We do not push down multi-column filters, e.g., 42 = a + b.
499-
if (indexed_columns.size() != 1) {
499+
// Allow composite ART scans
500+
if (indexed_columns.size() != index_exprs.size()) {
500501
return false;
501502
}
502503

504+
// ...only if each expression has a single column reference, and
505+
// that single column reference positionally matches that of the index (this is guaranteed? paranoid?)
506+
// NOTE: We do not push down multi-column filters, e.g., 42 = a + b.
507+
for (idx_t i = 0; i < index_exprs.size(); ++i) {
508+
unordered_set<column_t> referenced_columns;
509+
auto expr = &index_exprs[i];
510+
511+
// Walk the expr in case of nesting (e.g. function)
512+
ExpressionIterator::EnumerateExpression(*expr, [&](Expression &child_expr) {
513+
if (child_expr.GetExpressionClass() == ExpressionClass::BOUND_COLUMN_REF) {
514+
auto &col_ref = child_expr.Cast<BoundColumnRefExpression>();
515+
referenced_columns.insert(col_ref.binding.column_index);
516+
}
517+
});
518+
519+
if (referenced_columns.size() != 1) {
520+
return false;
521+
}
522+
523+
auto referenced_column = *referenced_columns.begin();
524+
auto direct_match = referenced_column == indexed_columns[i];
525+
auto remapped_match =
526+
input.column_ids.size() > referenced_column && input.column_ids[referenced_column] == indexed_columns[i];
527+
528+
if (!(direct_match || remapped_match)) {
529+
return false;
530+
}
531+
}
532+
503533
// Resolve bound column references in the index_expr against the current input projection
504-
column_t updated_index_column;
534+
vector<column_t> index_column_to_proj_pos;
535+
index_column_to_proj_pos.resize(indexed_columns.size(), std::numeric_limits<idx_t>::max());
536+
505537
bool found_index_column_in_input = false;
506538

507-
// Find the indexed column amongst the input columns
508-
for (idx_t i = 0; i < input.column_ids.size(); ++i) {
509-
if (input.column_ids[i] == indexed_columns[0]) {
510-
updated_index_column = i;
511-
found_index_column_in_input = true;
512-
break;
539+
// Associate indexed columns to input columns
540+
for (idx_t i = 0; i < indexed_columns.size(); ++i) {
541+
for (idx_t j = 0; j < input.column_ids.size(); ++j) {
542+
if (indexed_columns[i] == input.column_ids[j]) {
543+
index_column_to_proj_pos.at(i) = j;
544+
found_index_column_in_input = true;
545+
}
513546
}
514547
}
515548

516-
// If found, update the bound column ref within index_expr
517-
if (found_index_column_in_input) {
549+
if (!found_index_column_in_input) {
550+
return false;
551+
}
552+
553+
for (auto col : index_column_to_proj_pos) {
554+
if (col == std::numeric_limits<idx_t>::max()) {
555+
return false;
556+
}
557+
}
558+
559+
// Update the bound column refs within all index_exprs
560+
for (auto &index_expr : index_exprs) {
518561
ExpressionIterator::EnumerateExpression(index_expr, [&](Expression &expr) {
519562
if (expr.GetExpressionClass() != ExpressionClass::BOUND_COLUMN_REF) {
520563
return;
521564
}
522565

523566
auto &bound_column_ref_expr = expr.Cast<BoundColumnRefExpression>();
524567

525-
// If the bound column references the index column, use updated_index_column
526-
if (bound_column_ref_expr.binding.column_index == indexed_columns[0]) {
527-
bound_column_ref_expr.binding.column_index = updated_index_column;
568+
// If the bound column references an indexed column, update it
569+
for (idx_t i = 0; i < indexed_columns.size(); ++i) {
570+
if (bound_column_ref_expr.binding.column_index == indexed_columns[i]) {
571+
bound_column_ref_expr.binding.column_index = index_column_to_proj_pos[i];
572+
break;
573+
}
528574
}
529575
});
530576
}
531577

532-
// Get ART column.
533-
auto &col = column_list.GetColumn(LogicalIndex(indexed_columns[0]));
534-
535578
// The indexes of the filters match input.column_indexes, which are: i -> column_index.
536-
// Try to find a filter on the ART column.
537-
optional_idx storage_index;
538-
for (idx_t i = 0; i < input.column_indexes.size(); i++) {
539-
if (input.column_indexes[i].ToLogical() == col.Logical()) {
540-
storage_index = i;
541-
break;
542-
}
543-
}
579+
// Reuse the index <-> projection mappings from index expr rebinding
580+
vector<vector<unique_ptr<Expression>>> index_filters;
544581

545-
// No filter matches the ART column.
546-
if (!storage_index.IsValid()) {
547-
return false;
582+
for (idx_t i = 0; i < index_column_to_proj_pos.size(); ++i) {
583+
auto column_def = &column_list.GetColumn(LogicalIndex(indexed_columns[i]));
584+
auto maybe_filter = filter_set.filters.find(index_column_to_proj_pos[i]);
585+
if (maybe_filter != filter_set.filters.end()) {
586+
auto filter = &maybe_filter->second;
587+
auto filter_expressions = ExtractFilterExpressions(*column_def, *filter, index_column_to_proj_pos[i]);
588+
589+
index_filters.push_back(std::move(filter_expressions));
590+
}
548591
}
549592

550-
// Try to find a matching filter for the column.
551-
auto filter = filter_set.filters.find(storage_index.GetIndex());
552-
if (filter == filter_set.filters.end()) {
593+
// Index filters must:
594+
// - Match ART column count 1:1
595+
// - Match filter expression set 1:1 (there may be filters on non-indexed columns, bail out if so)
596+
if (index_filters.size() != indexed_columns.size() || filter_set.filters.size() != index_filters.size() ||
597+
index_filters.empty()) {
553598
return false;
554599
}
555600

556-
auto expressions = ExtractFilterExpressions(col, filter->second, storage_index.GetIndex());
557-
for (const auto &filter_expr : expressions) {
558-
auto scan_state = art.TryInitializeScan(*index_expr, *filter_expr);
601+
// Do a compound scan if we have filter exprs bound for several columns
602+
if (index_filters.size() > 1) {
603+
auto scan_state = art.TryInitializeCompoundKeyScan(index_exprs, index_filters);
559604
if (!scan_state) {
560605
return false;
561606
}
562607

563-
// Check if we can use an index scan, and already retrieve the matching row ids.
564-
if (!art.Scan(*scan_state, max_count, row_ids)) {
608+
if (!art.CompoundKeyScan(*scan_state, max_count, row_ids)) {
565609
row_ids.clear();
566610
return false;
567611
}
568612
}
613+
// Original single column index scan
614+
else {
615+
for (const auto &filter_expr : index_filters[0]) {
616+
auto scan_state = art.TryInitializeScan(*index_exprs[0], *filter_expr);
617+
if (!scan_state) {
618+
return false;
619+
}
620+
621+
// Check if we can use an index scan, and already retrieve the matching row ids.
622+
if (!art.Scan(*scan_state, max_count, row_ids)) {
623+
row_ids.clear();
624+
return false;
625+
}
626+
}
627+
}
628+
569629
return true;
570630
}
571631

@@ -588,9 +648,9 @@ unique_ptr<GlobalTableFunctionState> TableScanInitGlobal(ClientContext &context,
588648
// 1.2. Find + scan one ART for b = 24.
589649
// 1.3. Return the intersecting row IDs.
590650
// 2. (Reorder and) scan a single ART with a compound key of (a, b).
591-
if (filter_set.filters.size() != 1) {
592-
return DuckTableScanInitGlobal(context, input, storage, bind_data);
593-
}
651+
// if (filter_set.filters.size() != 1) {
652+
// return DuckTableScanInitGlobal(context, input, storage, bind_data);
653+
// }
594654

595655
// The checkpoint lock ensures that we do not checkpoint while scanning this table.
596656
auto &transaction = DuckTransaction::Get(context, storage.db);

src/include/duckdb/execution/index/art/art.hpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include "duckdb/execution/index/bound_index.hpp"
1212
#include "duckdb/execution/index/art/node.hpp"
1313
#include "duckdb/common/array.hpp"
14+
#include "duckdb/planner/expression.hpp"
1415

1516
namespace duckdb {
1617

@@ -24,6 +25,7 @@ class ARTKeySection;
2425
class FixedSizeAllocator;
2526

2627
struct ARTIndexScanState;
28+
struct ARTIndexCompoundKeyScanState;
2729

2830
class ART : public BoundIndex {
2931
public:
@@ -70,10 +72,20 @@ class ART : public BoundIndex {
7072
public:
7173
//! Try to initialize a scan on the ART with the given expression and filter.
7274
unique_ptr<IndexScanState> TryInitializeScan(const Expression &expr, const Expression &filter_expr);
75+
76+
//! Try to initialize a compound key scan on the ART, using the given index expr -> filter expr mappings.
77+
//! Supports equality comparisons only.
78+
unique_ptr<IndexScanState> TryInitializeCompoundKeyScan(const vector<unique_ptr<Expression>> &index_exprs,
79+
vector<vector<unique_ptr<Expression>>> &exprs);
80+
7381
//! Perform a lookup on the ART, fetching up to max_count row IDs.
7482
//! If all row IDs were fetched, it return true, else false.
7583
bool Scan(IndexScanState &state, idx_t max_count, set<row_t> &row_ids);
7684

85+
//! Like `ART::Scan`, but uses `ARTIndexCompoundKeyScanState` to concatenate multiple
86+
//! values for equality comparisons only.
87+
bool CompoundKeyScan(IndexScanState &state, idx_t max_count, set<row_t> &row_ids);
88+
7789
//! Appends data to the locked index.
7890
ErrorData Append(IndexLock &l, DataChunk &chunk, Vector &row_ids) override;
7991
//! Appends data to the locked index and verifies constraint violations.
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# name: test/sql/index/art/scan/test_art_composite_key_scan.test
2+
# description: Test Index Scan push down against views with reordered projections (issue #17290)
3+
# group: [scan]
4+
5+
statement ok
6+
create or replace table test as (
7+
select
8+
cast(unnest(range(1000)) as varchar) as x,
9+
cast(unnest(range(2000,3000)) as varchar) as y,
10+
cast(unnest(range(3000,4000)) as varchar) as z
11+
);
12+
13+
# test simple permutation of initial table projection
14+
statement ok
15+
create index test_composite_key on test(x, y, z);
16+
17+
query II
18+
explain analyze select * from test where x = '525' and y = '2525' and z = '3525';
19+
----
20+
analyzed_plan <REGEX>:.*Index Scan.*
21+
22+
query III
23+
select * from test where x = '525' and y = '2525' and z = '3525';
24+
----
25+
525 2525 3525

0 commit comments

Comments
 (0)