Skip to content

Commit c58b2f6

Browse files
authored
Merge 'EXPLAIN QUERY PLAN: fix incorrect join order, add missing annotations, convert some snapshot tests to EQP-only' from Jussi Saurio
## EXPLAIN QUERY PLAN didn't show correct join order - EXPLAIN QUERY PLAN was showing original join order, not the actual join order after optimizer pass. Highly annoying and clankers get mega confused by it too. - Fixing the above issue revealed that left join ordering fuzz test (based on EQP output) was not accounting for LEFT->INNER optimizations, so fix that by only asserting on LEFT JOINs that survived the optimizer pass ## EXPLAIN QUERY PLAN was missing stuff that SQLite has - EXPLAIN QUERY PLAN was missing annotations for GROUP BY, DISTINCT, subqueries + it had an incorrect `USE TEMP B-TREE FOR ORDER BY` annotation because we never use temp b-tree for sorting, so instead print `USE SORTER FOR ORDER BY`. Also add turso-custom prints like `USE HASH TABLE FOR count(DISTINCT)` (sqlite doesn't have hash table) ## Add new snapshot format that only prints EQP instead of full bytecode, in cases where we don't much care about the full bytecode - Add new `snapshot-eqp` snapshot test format that just prints the EQP, not the bytecode - Convert a bunch of snapshots to use `snapshot-eqp` (for example, most TPC-H tests; in many of them we just care about the join order and maybe order by elimination) - Note: the massive diff is due to the above - main change commits are smaller Reviewed-by: Preston Thorpe <preston@turso.tech> Closes #5511
2 parents d249968 + 3e13ff5 commit c58b2f6

File tree

167 files changed

+1233
-4415
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

167 files changed

+1233
-4415
lines changed

core/translate/group_by.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,12 @@ use crate::translate::{
1717
optimizer::Optimizable,
1818
};
1919
use crate::{
20+
emit_explain,
2021
schema::PseudoCursorType,
2122
translate::collate::{get_collseq_from_expr, CollationSeq},
2223
util::exprs_are_equivalent,
2324
vdbe::{
24-
builder::{CursorType, ProgramBuilder},
25+
builder::{CursorType, ProgramBuilder, QueryMode},
2526
insn::Insn,
2627
BranchOffset,
2728
},
@@ -156,6 +157,7 @@ pub fn init_group_by<'a>(
156157
columns: column_count,
157158
order_and_collations,
158159
});
160+
emit_explain!(program, false, "USE SORTER FOR GROUP BY".to_owned());
159161
let pseudo_cursor = group_by_create_pseudo_table(program, column_count);
160162
GroupByRowSource::Sorter {
161163
pseudo_cursor,

core/translate/main_loop.rs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ use super::{
2121
},
2222
};
2323
use crate::{
24+
emit_explain,
2425
schema::{Index, Table},
2526
translate::{
2627
collate::{get_collseq_from_expr, resolve_comparison_collseq, CollationSeq},
@@ -37,7 +38,7 @@ use crate::{
3738
affinity::{self, Affinity},
3839
builder::{
3940
CursorKey, CursorType, HashBuildSignature, MaterializedBuildInputModeTag,
40-
ProgramBuilder,
41+
ProgramBuilder, QueryMode,
4142
},
4243
insn::{to_u16, CmpInsFlags, HashBuildData, IdxInsertFlags, Insn},
4344
BranchOffset, CursorID,
@@ -147,6 +148,11 @@ pub fn init_loop(
147148
label_on_conflict: program.allocate_label(),
148149
}),
149150
};
151+
emit_explain!(
152+
program,
153+
false,
154+
format!("USE HASH TABLE FOR {}(DISTINCT)", agg.func)
155+
);
150156
}
151157
// Include hash-join build tables so their cursors are opened for hash build.
152158
let mut required_tables: HashSet<usize> = join_order

core/translate/order_by.rs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -188,9 +188,11 @@ pub fn emit_order_by(
188188
+ if has_sequence { 1 } else { 0 }
189189
+ remappings.iter().filter(|r| !r.deduplicated).count();
190190

191-
// TODO: we need to know how many indices used for sorting
192-
// to emit correct explain output.
193-
emit_explain!(program, false, "USE TEMP B-TREE FOR ORDER BY".to_owned());
191+
if use_heap_sort {
192+
emit_explain!(program, false, "USE TEMP B-TREE FOR ORDER BY".to_owned());
193+
} else {
194+
emit_explain!(program, false, "USE SORTER FOR ORDER BY".to_owned());
195+
}
194196

195197
let cursor_id = if !use_heap_sort {
196198
let pseudo_cursor = program.alloc_cursor_id(CursorType::Pseudo(PseudoCursorType {

core/translate/subquery.rs

Lines changed: 37 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -852,21 +852,24 @@ pub fn emit_from_clause_subqueries(
852852
}
853853
}
854854

855-
// Include hash-join build tables so EXPLAIN reflects all tables that feed the loop.
856-
let mut required_tables: HashSet<usize> = join_order
855+
// Build the iteration order: join_order first (execution order), then any
856+
// hash-join build tables that aren't already in the join order.
857+
let mut visit_order: Vec<usize> = join_order
857858
.iter()
858859
.map(|member| member.original_idx)
859860
.collect();
861+
let visit_set: HashSet<usize> = visit_order.iter().copied().collect();
860862
for table in tables.joined_tables().iter() {
861863
if let Operation::HashJoin(hash_join_op) = &table.op {
862-
required_tables.insert(hash_join_op.build_table_idx);
864+
let build_idx = hash_join_op.build_table_idx;
865+
if !visit_set.contains(&build_idx) {
866+
visit_order.push(build_idx);
867+
}
863868
}
864869
}
865870

866-
for (table_index, table_reference) in tables.joined_tables_mut().iter_mut().enumerate() {
867-
if !required_tables.contains(&table_index) {
868-
continue;
869-
}
871+
for table_index in visit_order {
872+
let table_reference = &mut tables.joined_tables_mut()[table_index];
870873
emit_explain!(
871874
program,
872875
true,
@@ -1530,6 +1533,29 @@ pub fn emit_non_from_clause_subquery(
15301533
is_correlated: bool,
15311534
) -> Result<()> {
15321535
program.nested(|program| {
1536+
let subquery_id = program.next_subquery_eqp_id();
1537+
let correlated_prefix = if is_correlated { "CORRELATED " } else { "" };
1538+
match query_type {
1539+
SubqueryType::Exists { .. } => {
1540+
// EXISTS subqueries don't get a separate EQP annotation in SQLite;
1541+
// instead the SEARCH/SCAN line gets an "EXISTS" suffix handled elsewhere.
1542+
}
1543+
SubqueryType::In { .. } => {
1544+
emit_explain!(
1545+
program,
1546+
true,
1547+
format!("{correlated_prefix}LIST SUBQUERY {subquery_id}")
1548+
);
1549+
}
1550+
SubqueryType::RowValue { .. } => {
1551+
emit_explain!(
1552+
program,
1553+
true,
1554+
format!("{correlated_prefix}SCALAR SUBQUERY {subquery_id}")
1555+
);
1556+
}
1557+
}
1558+
15331559
let label_skip_after_first_run = if !is_correlated {
15341560
let label = program.allocate_label();
15351561
program.emit_insn(Insn::Once {
@@ -1586,6 +1612,10 @@ pub fn emit_non_from_clause_subquery(
15861612
});
15871613
}
15881614
}
1615+
// Pop the parent explain for LIST/SCALAR SUBQUERY annotations.
1616+
if !matches!(query_type, SubqueryType::Exists { .. }) {
1617+
program.pop_current_parent_explain();
1618+
}
15891619
if let Some(label) = label_skip_after_first_run {
15901620
program.preassign_label_to_next_insn(label);
15911621
}

core/vdbe/builder.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,8 @@ pub struct ProgramBuilder {
185185
/// references in non-recursive CTEs and to prevent fallthrough to schema
186186
/// resolution for same-named tables/views.
187187
ctes_being_defined: Vec<String>,
188+
/// Counter for subquery numbering in EXPLAIN QUERY PLAN output.
189+
next_subquery_eqp_id: usize,
188190
}
189191

190192
#[derive(Debug, Clone, Copy, Eq, PartialEq)]
@@ -413,9 +415,16 @@ impl ProgramBuilder {
413415
materialized_ctes: HashMap::default(),
414416
cte_reference_counts: HashMap::default(),
415417
ctes_being_defined: Vec::new(),
418+
next_subquery_eqp_id: 1,
416419
}
417420
}
418421

422+
pub fn next_subquery_eqp_id(&mut self) -> usize {
423+
let id = self.next_subquery_eqp_id;
424+
self.next_subquery_eqp_id += 1;
425+
id
426+
}
427+
419428
pub fn alloc_hash_table_id(&mut self) -> usize {
420429
let id = self.next_hash_table_id;
421430
self.next_hash_table_id = self

testing/runner/src/parser/ast.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,9 @@ pub struct SnapshotCase {
152152
pub name_span: Range<usize>,
153153
/// SQL to execute (EXPLAIN will be prepended)
154154
pub sql: String,
155+
/// If true, only run EXPLAIN QUERY PLAN (no bytecode).
156+
/// Set by the `snapshot-eqp` directive.
157+
pub eqp_only: bool,
155158
/// Common modifiers (setups, skip, backend, requires)
156159
pub modifiers: CaseModifiers,
157160
}

testing/runner/src/parser/lexer.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,10 @@ pub enum Token {
142142
#[token("snapshot")]
143143
Snapshot,
144144

145+
/// `snapshot-eqp` keyword (EXPLAIN QUERY PLAN only, no bytecode)
146+
#[token("snapshot-eqp")]
147+
SnapshotEqp,
148+
145149
/// `expect` keyword
146150
#[token("expect")]
147151
Expect,
@@ -233,6 +237,7 @@ impl fmt::Display for Token {
233237
Token::Setup => write!(f, "setup"),
234238
Token::Test => write!(f, "test"),
235239
Token::Snapshot => write!(f, "snapshot"),
240+
Token::SnapshotEqp => write!(f, "snapshot-eqp"),
236241
Token::Expect => write!(f, "expect"),
237242
Token::Error => write!(f, "error"),
238243
Token::Pattern => write!(f, "pattern"),

testing/runner/src/parser/mod.rs

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,8 @@ impl Parser {
7777
| Token::AtRequires
7878
| Token::AtBackend
7979
| Token::Test
80-
| Token::Snapshot,
80+
| Token::Snapshot
81+
| Token::SnapshotEqp,
8182
) => {
8283
// Could be test or snapshot with decorators, peek ahead
8384
let item = self.parse_test_or_snapshot()?;
@@ -258,8 +259,9 @@ impl Parser {
258259

259260
// Now check if it's a test or snapshot
260261
match self.peek() {
261-
Some(Token::Snapshot) => {
262-
self.expect_token(Token::Snapshot)?;
262+
Some(Token::Snapshot | Token::SnapshotEqp) => {
263+
let eqp_only = matches!(self.peek(), Some(Token::SnapshotEqp));
264+
self.advance();
263265
let (name, name_span) = self.expect_identifier_with_span()?;
264266
let sql = self.expect_block_content()?.trim().to_string();
265267

@@ -269,6 +271,7 @@ impl Parser {
269271
name,
270272
name_span,
271273
sql,
274+
eqp_only,
272275
modifiers: CaseModifiers {
273276
setups: test_setups,
274277
skip,

testing/runner/src/runner/mod.rs

Lines changed: 30 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -154,37 +154,40 @@ impl Runnable for SnapshotCase {
154154
}
155155

156156
fn queries_to_execute(&self) -> Vec<String> {
157-
// Run both EXPLAIN QUERY PLAN and EXPLAIN
158-
vec![
159-
format!("EXPLAIN QUERY PLAN {}", self.sql),
160-
format!("EXPLAIN {}", self.sql),
161-
]
157+
if self.eqp_only {
158+
vec![format!("EXPLAIN QUERY PLAN {}", self.sql)]
159+
} else {
160+
vec![
161+
format!("EXPLAIN QUERY PLAN {}", self.sql),
162+
format!("EXPLAIN {}", self.sql),
163+
]
164+
}
162165
}
163166

164167
async fn evaluate_results(
165168
&self,
166169
results: Vec<QueryResult>,
167170
options: &RunOptions,
168171
) -> TestOutcome {
169-
// results[0] = EXPLAIN QUERY PLAN
170-
// results[1] = EXPLAIN
171-
let eqp_result = results.first().expect("should have two query results");
172-
let explain_result = results.get(1).expect("should have two query results");
172+
let eqp_result = results.first().expect("should have EQP result");
173173

174-
// Check for errors in query execution
175174
if let Some(err) = &eqp_result.error {
176175
return TestOutcome::Error {
177176
message: format!("EXPLAIN QUERY PLAN failed: {err}"),
178177
};
179178
}
180-
if let Some(err) = &explain_result.error {
181-
return TestOutcome::Error {
182-
message: format!("EXPLAIN failed: {err}"),
183-
};
184-
}
185179

186-
// Format both outputs
187-
let actual_output = format_snapshot_content(&eqp_result.rows, &explain_result.rows);
180+
let actual_output = if self.eqp_only {
181+
format_eqp_snapshot_content(&eqp_result.rows)
182+
} else {
183+
let explain_result = results.get(1).expect("should have EXPLAIN result");
184+
if let Some(err) = &explain_result.error {
185+
return TestOutcome::Error {
186+
message: format!("EXPLAIN failed: {err}"),
187+
};
188+
}
189+
format_snapshot_content(&eqp_result.rows, &explain_result.rows)
190+
};
188191

189192
// Build snapshot info with metadata
190193
let db_location_str = options.db_config.location.to_string();
@@ -223,6 +226,16 @@ impl Runnable for SnapshotCase {
223226
}
224227
}
225228

229+
/// Format EQP-only snapshot content (no bytecode).
230+
fn format_eqp_snapshot_content(eqp_rows: &[Vec<String>]) -> String {
231+
use crate::snapshot::format_explain_query_plan_output;
232+
233+
let mut output = String::new();
234+
output.push_str("QUERY PLAN\n");
235+
output.push_str(&format_explain_query_plan_output(eqp_rows));
236+
output
237+
}
238+
226239
/// Format the combined snapshot content with both EXPLAIN QUERY PLAN and EXPLAIN output.
227240
fn format_snapshot_content(eqp_rows: &[Vec<String>], explain_rows: &[Vec<String>]) -> String {
228241
use crate::snapshot::{format_explain_output, format_explain_query_plan_output};
Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,26 @@
11
@database :memory:
22
@skip-file-if mvcc "mvcc has slightly different cursor ids, so skipping it for now"
33

4-
setup orderby_temp_btree {
4+
setup orderby_plan {
55
CREATE TABLE t(a);
66
}
77

8-
@setup orderby_temp_btree
9-
snapshot orderby_a_uses_temp_btree {
8+
@setup orderby_plan
9+
snapshot orderby_a_uses_sorter {
1010
SELECT * FROM t ORDER BY a;
1111
}
1212

13-
@setup orderby_temp_btree
14-
snapshot orderby_a_desc_uses_temp_btree {
13+
@setup orderby_plan
14+
snapshot orderby_a_desc_uses_sorter {
1515
SELECT * FROM t ORDER BY a DESC;
1616
}
1717

18-
@setup orderby_temp_btree
19-
snapshot orderby_rowid_no_temp_btree {
18+
@setup orderby_plan
19+
snapshot orderby_rowid_no_sorter {
2020
SELECT * FROM t ORDER BY rowid;
2121
}
2222

23-
@setup orderby_temp_btree
24-
snapshot orderby_rowid_desc_no_temp_btree {
23+
@setup orderby_plan
24+
snapshot orderby_rowid_desc_no_sorter {
2525
SELECT * FROM t ORDER BY rowid DESC;
2626
}

0 commit comments

Comments
 (0)