Skip to content

Commit eecf763

Browse files
committed
core/translate: SQLite-compatible affinity check for index seeks
Fixes #7373, #7374, and a related IN-list/IN-subquery affinity bypass surfaced during review. The bug. Index-driven seeks applied the indexed column's affinity to the seek key. SQLite applies the comparison's resolved affinity (`sqlite3CompareAffinity`). The two diverge for cross-type comparisons: -- l.txt TEXT, r.flag INTEGER, '' stored in l. SELECT l.txt, r.flag FROM l JOIN r ON l.txt <= r.flag; Comparison affinity here is NUMERIC. The TEXT index forced TEXT onto the integer probe ('0') and the b-tree comparator then ranked '' below '0' lexicographically — rather than SQLite's type ordering (INTEGER < TEXT) that puts every integer below every text. False match. Same shape broke `WHERE x IN (SELECT y ...)` with TEXT x / INTEGER y: the IN ephemeral stored integers, the TEXT index never coerced them, and the b-tree sorted every integer below every text key — zero rows. The fix. Port SQLite's `sqlite3IndexAffinityOk` (`Affinity::index_affinity_ok`), cache the resolved comparison affinity on each `Constraint` at construction, and consult it at every index-selection site (regular b-tree, auto-index, ephemeral-subquery, IN-seek) so incompatible indexes are rejected up front. At emission, `index_seek_affinities` now reads the cached comparison affinity instead of the column's. Snapshot churn: 7 files. All are affinity-character shifts (D→C for cross-type numeric comparisons) or removal of redundant TEXT-on-TEXT Affinity opcodes that SQLite never emits either. Fixes #7373. Fixes #7374.
1 parent d001d54 commit eecf763

13 files changed

Lines changed: 379 additions & 154 deletions

core/translate/main_loop/seek.rs

Lines changed: 12 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,36 +1,16 @@
11
use super::*;
22

3-
fn index_seek_affinities(
4-
idx: &Index,
5-
tables: &TableReferences,
6-
seek_def: &SeekDef,
7-
seek_key: &SeekKey,
8-
) -> String {
9-
let table = tables
10-
.joined_tables()
11-
.iter()
12-
.find(|jt| jt.table.get_name() == idx.table_name)
13-
.expect("index source table not found in table references");
14-
15-
idx.columns
16-
.iter()
17-
.zip(seek_def.iter(seek_key))
18-
.map(|(ic, key_component)| {
19-
let col_aff = if let Some(ref expr) = ic.expr {
20-
crate::translate::expr::get_expr_affinity(expr, Some(tables), None)
21-
} else {
22-
table
23-
.table
24-
.get_column_at(ic.pos_in_table)
25-
.expect("index column position out of bounds")
26-
.affinity()
27-
};
28-
match key_component {
29-
SeekKeyComponent::Expr(expr) if col_aff.expr_needs_no_affinity_change(expr) => {
30-
affinity::SQLITE_AFF_NONE
31-
}
32-
_ => col_aff.aff_mask(),
3+
fn index_seek_affinities(seek_def: &SeekDef, seek_key: &SeekKey) -> String {
4+
// Apply the constraint's resolved comparison affinity to the seek key,
5+
// not the indexed column's affinity.
6+
seek_def
7+
.iter(seek_key)
8+
.zip(seek_def.iter_affinity(seek_key))
9+
.map(|(key_component, aff)| match key_component {
10+
SeekKeyComponent::Expr(expr) if aff.expr_needs_no_affinity_change(expr) => {
11+
affinity::SQLITE_AFF_NONE
3312
}
13+
_ => aff.aff_mask(),
3414
})
3515
.collect()
3616
}
@@ -222,8 +202,7 @@ impl<'a, 'plan> SeekEmitter<'a, 'plan> {
222202
0,
223203
&self.t_ctx.resolver,
224204
)?;
225-
let affinities =
226-
index_seek_affinities(idx, self.tables, self.seek_def, &self.seek_def.start);
205+
let affinities = index_seek_affinities(self.seek_def, &self.seek_def.start);
227206
if affinities.chars().any(|c| c != affinity::SQLITE_AFF_NONE) {
228207
self.program.emit_insn(Insn::Affinity {
229208
start_reg: self.start_reg,
@@ -342,8 +321,7 @@ impl<'a, 'plan> SeekEmitter<'a, 'plan> {
342321
self.seek_def.prefix.len(),
343322
&self.t_ctx.resolver,
344323
)?;
345-
let affinities =
346-
index_seek_affinities(idx, self.tables, self.seek_def, &self.seek_def.end);
324+
let affinities = index_seek_affinities(self.seek_def, &self.seek_def.end);
347325
if affinities.chars().any(|c| c != affinity::SQLITE_AFF_NONE) {
348326
self.program.emit_insn(Insn::Affinity {
349327
start_reg: self.start_reg,

core/translate/optimizer/access_method.rs

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -577,6 +577,15 @@ pub(super) fn choose_best_in_seek_candidate(
577577
if table_collation != index_collation {
578578
continue;
579579
}
580+
// SQLite's `indexInAffinityOk`: see
581+
// [`Constraint::satisfies_index_affinity`]. Rowid IN-seek is
582+
// gated above by `if let Some(index)` — rowids carry INTEGER
583+
// affinity and are coerced separately by `SeekRowid`/the
584+
// IN-list ephemeral, so they don't need this gate.
585+
let idx_aff = constrained_column.affinity_with_strict(rhs_table.table.is_strict());
586+
if !constraint.satisfies_index_affinity(idx_aff) {
587+
continue;
588+
}
580589
}
581590

582591
let rows_per_seek = if (index_info.unique && index_info.column_count == 1)
@@ -1402,18 +1411,16 @@ fn find_best_access_method_for_subquery(
14021411
.iter()
14031412
.enumerate()
14041413
.filter(|(_, c)| {
1405-
c.usable
1406-
&& c.table_col_pos.is_some()
1407-
&& matches!(
1408-
c.operator.as_ast_operator(),
1409-
Some(
1410-
ast::Operator::Equals
1411-
| ast::Operator::Greater
1412-
| ast::Operator::GreaterEquals
1413-
| ast::Operator::Less
1414-
| ast::Operator::LessEquals
1415-
)
1414+
matches!(
1415+
c.operator.as_ast_operator(),
1416+
Some(
1417+
ast::Operator::Equals
1418+
| ast::Operator::Greater
1419+
| ast::Operator::GreaterEquals
1420+
| ast::Operator::Less
1421+
| ast::Operator::LessEquals
14161422
)
1423+
) && c.can_drive_index_seek(&subquery.columns, false)
14171424
})
14181425
.collect();
14191426

core/translate/optimizer/constraints.rs

Lines changed: 87 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,10 @@ use crate::{
33
schema::{Column, Index, Schema},
44
translate::{
55
collate::get_collseq_from_expr,
6-
expr::{as_binary_components, comparison_affinity, walk_expr_mut, WalkControl},
6+
expr::{
7+
as_binary_components, comparison_affinity, get_expr_affinity, unwrap_parens,
8+
walk_expr_mut, WalkControl,
9+
},
710
expression_index::normalize_expr_for_index_matching,
811
plan::{JoinOrderMember, JoinedTable, NonFromClauseSubquery, TableReferences, WhereTerm},
912
planner::{break_predicate_at_and_boundaries, table_mask_from_expr, TableMask, ROWID_STRS},
@@ -70,6 +73,18 @@ pub struct Constraint {
7073
/// Whether this constraint references the implicit rowid (tables without an INTEGER PRIMARY KEY alias).
7174
/// When true and `table_col_pos` is None, this constraint targets the rowid pseudo-column.
7275
pub is_rowid: bool,
76+
/// The constraint's resolved comparison affinity, as defined by SQLite's
77+
/// `comparisonAffinity` in `expr.c`. Cached at construction time so the
78+
/// `sqlite3IndexAffinityOk` check can run without re-resolving the
79+
/// WhereTerm at every index-selection callsite.
80+
///
81+
/// `None` for forms whose comparison affinity has no single resolved value:
82+
/// FTS MATCH and virtual-table push-downs (operators outside SQLite's
83+
/// comparison set), and row-value IN (`(a,b) IN (...)`, which SQLite
84+
/// handles per-LHS-column via `sqlite3VectorFieldSubexpr` — Turso does
85+
/// not yet plumb per-column affinity into the index-selection path, so
86+
/// such constraints fall through to scans).
87+
pub comparison_affinity: Option<Affinity>,
7388
}
7489

7590
#[derive(Debug, Clone, Copy, PartialEq)]
@@ -118,8 +133,7 @@ impl Constraint {
118133
panic!("Expected a valid binary expression");
119134
};
120135
let mut affinity = Affinity::Blob;
121-
if op.as_ast_operator().is_some_and(|op| op.is_comparison()) && self.table_col_pos.is_some()
122-
{
136+
if op.as_ast_operator().is_some_and(|op| op.is_comparison()) {
123137
affinity = comparison_affinity(lhs, rhs, referenced_tables, None);
124138
}
125139

@@ -160,6 +174,35 @@ impl Constraint {
160174
rhs
161175
}
162176
}
177+
178+
/// Returns true when an index column with affinity `idx_aff` can satisfy
179+
/// this constraint per SQLite's `sqlite3IndexAffinityOk`. Constraints
180+
/// whose form has no SQLite-defined comparison affinity (FTS MATCH,
181+
/// virtual-table push-downs, row-value IN) carry `None` and bypass the
182+
/// check — those paths handle types themselves.
183+
pub fn satisfies_index_affinity(&self, idx_aff: Affinity) -> bool {
184+
match self.comparison_affinity {
185+
Some(comparison_aff) => idx_aff.index_affinity_ok(comparison_aff),
186+
None => true,
187+
}
188+
}
189+
190+
/// Whether this constraint can drive an index seek on its target column.
191+
/// Composes the `usable`/`table_col_pos` gates with the affinity check
192+
/// against the column at `table_col_pos` in `columns` (set `is_strict`
193+
/// only for STRICT tables; subqueries pass `false`).
194+
pub fn can_drive_index_seek(&self, columns: &[Column], is_strict: bool) -> bool {
195+
if !self.usable {
196+
return false;
197+
}
198+
let Some(pos) = self.table_col_pos else {
199+
return false;
200+
};
201+
let col = columns.get(pos).unwrap_or_else(|| {
202+
unreachable!("constraint table_col_pos {pos} out of bounds for {columns:?}")
203+
});
204+
self.satisfies_index_affinity(col.affinity_with_strict(is_strict))
205+
}
163206
}
164207

165208
#[derive(Debug, Clone)]
@@ -449,6 +492,13 @@ pub fn constraints_from_where_clause(
449492

450493
// Try to extract as binary expression first
451494
if let Some((lhs, operator, rhs)) = as_binary_components(&term.expr)? {
495+
// Resolve the comparison affinity once per term per SQLite's
496+
// `comparisonAffinity` (see `Constraint::comparison_affinity`)
497+
// and propagate it to every constraint derived from this term.
498+
let cmp_aff = operator
499+
.as_ast_operator()
500+
.filter(|op| op.is_comparison())
501+
.map(|_| comparison_affinity(lhs, rhs, Some(table_references), None));
452502
// If either the LHS or RHS of the constraint is a column from the table, add the constraint.
453503
match lhs {
454504
ast::Expr::Column { table, column, .. } => {
@@ -472,6 +522,7 @@ pub fn constraints_from_where_clause(
472522
),
473523
usable: true,
474524
is_rowid: false,
525+
comparison_affinity: cmp_aff,
475526
});
476527
}
477528
}
@@ -500,6 +551,7 @@ pub fn constraints_from_where_clause(
500551
),
501552
usable: true,
502553
is_rowid: true,
554+
comparison_affinity: cmp_aff,
503555
});
504556
}
505557
}
@@ -537,6 +589,7 @@ pub fn constraints_from_where_clause(
537589
selectivity,
538590
usable: true,
539591
is_rowid: false,
592+
comparison_affinity: cmp_aff,
540593
});
541594
}
542595
_ => {}
@@ -563,6 +616,7 @@ pub fn constraints_from_where_clause(
563616
),
564617
usable: true,
565618
is_rowid: false,
619+
comparison_affinity: cmp_aff,
566620
});
567621
}
568622
}
@@ -591,6 +645,7 @@ pub fn constraints_from_where_clause(
591645
),
592646
usable: true,
593647
is_rowid: true,
648+
comparison_affinity: cmp_aff,
594649
});
595650
}
596651
}
@@ -628,6 +683,7 @@ pub fn constraints_from_where_clause(
628683
selectivity,
629684
usable: true,
630685
is_rowid: false,
686+
comparison_affinity: cmp_aff,
631687
});
632688
}
633689
_ => {}
@@ -658,6 +714,9 @@ pub fn constraints_from_where_clause(
658714
.unwrap_or(params.rows_per_table_fallback as u64)
659715
as f64;
660716
let selectivity = estimate_in_selectivity(estimated_values, row_count, *not);
717+
// SQLite's `comparisonAffinity` for IN-list (`x IN (lit, ...)`)
718+
// is the LHS column's affinity; the RHS literals are not folded.
719+
let cmp_aff = Some(get_expr_affinity(lhs, Some(table_references), None));
661720

662721
match lhs.as_ref() {
663722
ast::Expr::Column { table, column, .. }
@@ -677,6 +736,7 @@ pub fn constraints_from_where_clause(
677736
selectivity,
678737
usable: false, // IN uses a separate seek path, not the range-seek model
679738
is_rowid,
739+
comparison_affinity: cmp_aff,
680740
});
681741
}
682742
ast::Expr::RowId { table, .. } if *table == table_reference.internal_id => {
@@ -693,6 +753,7 @@ pub fn constraints_from_where_clause(
693753
selectivity,
694754
usable: false,
695755
is_rowid: true,
756+
comparison_affinity: cmp_aff,
696757
});
697758
}
698759
_ => {}
@@ -704,7 +765,7 @@ pub fn constraints_from_where_clause(
704765
subquery_id,
705766
lhs: Some(lhs_expr),
706767
not_in,
707-
query_type: ast::SubqueryType::In { .. },
768+
query_type: ast::SubqueryType::In { affinity_str, .. },
708769
} = &term.expr
709770
{
710771
// Find the subquery to check if it's correlated
@@ -723,6 +784,19 @@ pub fn constraints_from_where_clause(
723784
.unwrap_or(params.rows_per_table_fallback as u64)
724785
as f64;
725786
let selectivity = estimate_in_selectivity(estimated_values, row_count, *not_in);
787+
// SQLite's `comparisonAffinity` for IN-subquery combines the
788+
// LHS column affinity with each result column via
789+
// `sqlite3CompareAffinity` — that result is already cached on
790+
// `SubqueryType::In::affinity_str`. For a single-LHS-column
791+
// IN it is the first character; row-value IN has no single
792+
// resolved affinity and is left as `None`.
793+
let is_row_value = matches!(
794+
unwrap_parens(lhs_expr.as_ref()).ok(),
795+
Some(ast::Expr::Parenthesized(exprs)) if exprs.len() != 1
796+
);
797+
let cmp_aff = (!is_row_value)
798+
.then(|| affinity_str.chars().next().map(Affinity::from_char))
799+
.flatten();
726800

727801
match lhs_expr.as_ref() {
728802
ast::Expr::Column { table, column, .. }
@@ -742,6 +816,7 @@ pub fn constraints_from_where_clause(
742816
selectivity,
743817
usable: false, // IN uses a separate seek path (consider_in_list_seek)
744818
is_rowid,
819+
comparison_affinity: cmp_aff,
745820
});
746821
}
747822
ast::Expr::RowId { table, .. } if *table == table_reference.internal_id => {
@@ -758,6 +833,7 @@ pub fn constraints_from_where_clause(
758833
selectivity,
759834
usable: false,
760835
is_rowid: true,
836+
comparison_affinity: cmp_aff,
761837
});
762838
}
763839
_ => {}
@@ -862,6 +938,12 @@ pub fn constraints_from_where_clause(
862938
{
863939
continue;
864940
}
941+
// See [`Constraint::satisfies_index_affinity`].
942+
let idx_col_aff = constrained_column
943+
.affinity_with_strict(table_reference.table.is_strict());
944+
if !constraint.satisfies_index_affinity(idx_col_aff) {
945+
continue;
946+
}
865947
}
866948
if let Some(index_candidate) = cs.candidates.iter_mut().find_map(|candidate| {
867949
if candidate.index.as_ref().is_some_and(|i| {
@@ -1731,6 +1813,7 @@ pub(crate) fn analyze_binary_term_for_index(
17311813
selectivity,
17321814
usable: true,
17331815
is_rowid,
1816+
comparison_affinity: Some(affinity),
17341817
};
17351818

17361819
Some(AnalyzedTerm {

core/translate/optimizer/mod.rs

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2235,14 +2235,17 @@ fn optimize_table_access(
22352235
});
22362236
continue;
22372237
};
2238-
// Ephemeral indexes mirror rowid/column lookups. If the constraint targets an
2239-
// expression (table_col_pos == None) we cannot derive a seek key that matches
2240-
// the row layout, so fall back to a scan in that situation.
2238+
// Ephemeral indexes mirror rowid/column lookups; expression-index
2239+
// constraints (table_col_pos == None) fall back to a scan.
2240+
let table_columns = table_references.joined_tables()[table_idx].table.columns();
2241+
let is_strict = table_references.joined_tables()[table_idx]
2242+
.table
2243+
.is_strict();
22412244
let usable: Vec<(usize, &Constraint)> = table_constraints
22422245
.constraints
22432246
.iter()
22442247
.enumerate()
2245-
.filter(|(_, c)| c.usable && c.table_col_pos.is_some())
2248+
.filter(|(_, c)| c.can_drive_index_seek(table_columns, is_strict))
22462249
.collect();
22472250
// Find this table's position in best_join_order (which excludes build tables)
22482251
let join_order_pos = best_join_order

core/vdbe/affinity.rs

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,31 @@ impl Affinity {
280280
}
281281
}
282282

283+
/// Can an index column with `self` affinity drive a seek for a
284+
/// comparison whose resolved affinity is `comparison_aff`?
285+
///
286+
/// Port of SQLite's `sqlite3IndexAffinityOk` (src/expr.c).
287+
/// The basic idea is: an index can be used if probing it would
288+
/// produce the same result as a scan + WHERE filter.
289+
///
290+
/// - `Blob`: e.g. `x IS NULL`. No coercion either way. Any index OK.
291+
/// - `Text`: e.g. `WHERE txt = 5` (txt TEXT). WHERE coerces `5` to
292+
/// `'5'`. Only a TEXT index's keys are stored as text, so it can
293+
/// match `'5'` directly.
294+
/// - `Numeric`: e.g. `WHERE l.txt = r.flag` (l.txt TEXT, r.flag
295+
/// INTEGER). WHERE NUMERIC-coerces both sides: `'6'` → 6, 6 = 6,
296+
/// match. A TEXT-index seek would probe stored text `'6'` with
297+
/// integer 6, but the b-tree comparator orders every integer
298+
/// below every text (INTEGER < TEXT) and finds nothing — opposite
299+
/// answer from the scan. Only a numeric-affinity index is safe.
300+
pub fn index_affinity_ok(self, comparison_aff: Affinity) -> bool {
301+
match comparison_aff {
302+
Affinity::Blob => true,
303+
Affinity::Text => matches!(self, Affinity::Text),
304+
Affinity::Numeric | Affinity::Integer | Affinity::Real => self.is_numeric(),
305+
}
306+
}
307+
283308
/// Return TRUE if the given expression is a constant which would be
284309
/// unchanged by OP_Affinity with the affinity given in the second
285310
/// argument.

0 commit comments

Comments
 (0)