Skip to content

Commit 071637f

Browse files
committed
Move join fuzzing out to its own mod in tests/fuzz
1 parent 982c0e8 commit 071637f

File tree

3 files changed

+517
-567
lines changed

3 files changed

+517
-567
lines changed

tests/fuzz/helpers.rs

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,43 @@ pub fn assert_differential(
4040
);
4141
}
4242

43+
pub fn assert_differential_no_ordering(
44+
limbo_conn: &Arc<turso_core::Connection>,
45+
sqlite_conn: &rusqlite::Connection,
46+
query: &str,
47+
context: &str,
48+
) {
49+
let limbo_results = limbo_exec_rows(limbo_conn, query);
50+
let sqlite_results = sqlite_exec_rows(sqlite_conn, query);
51+
52+
// Check if results match
53+
if limbo_results.len() != sqlite_results.len() {
54+
panic!(
55+
"Row count mismatch for query: {}\nLimbo: {} rows, SQLite: {} rows\nLimbo: {:?}\nSQLite: {:?}\n",
56+
query, limbo_results.len(), sqlite_results.len(), limbo_results, sqlite_results,
57+
);
58+
}
59+
60+
// Check if all rows match (order might be different)
61+
// Since Value doesn't implement Ord, we'll check containment both ways
62+
let all_limbo_in_sqlite = limbo_results.iter().all(|limbo_row| {
63+
sqlite_results
64+
.iter()
65+
.any(|sqlite_row| limbo_row == sqlite_row)
66+
});
67+
let all_sqlite_in_limbo = sqlite_results.iter().all(|sqlite_row| {
68+
limbo_results
69+
.iter()
70+
.any(|limbo_row| sqlite_row == limbo_row)
71+
});
72+
73+
if !all_limbo_in_sqlite || !all_sqlite_in_limbo {
74+
panic!(
75+
"Results mismatch for query: {query}\nLimbo: {limbo_results:?}\nSQLite: {sqlite_results:?}\n{context}",
76+
);
77+
}
78+
}
79+
4380
/// Assert that both engines either succeeded or both failed. Panics on mismatch.
4481
pub fn assert_outcome_parity<T: std::fmt::Debug, U: std::fmt::Debug>(
4582
sqlite_res: &Result<T, impl std::fmt::Debug>,

tests/fuzz/join.rs

Lines changed: 303 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,303 @@
1+
#[cfg(test)]
2+
mod join_fuzz_tests {
3+
use crate::helpers;
4+
use core_tester::common::{limbo_exec_rows, sqlite_exec_rows, TempDatabase};
5+
use rand::Rng;
6+
use rand_chacha::ChaCha8Rng;
7+
use rusqlite::params;
8+
9+
fn join_fuzz_inner(db: TempDatabase, add_indexes: bool, iterations: usize, rows: i64) {
10+
let (mut rng, seed) =
11+
helpers::init_fuzz_test("join_fuzz_inner (add_indexes={add_indexes})");
12+
13+
let builder = helpers::builder_from_db(&db);
14+
let limbo_db = builder.clone().build();
15+
let sqlite_db = builder.clone().build();
16+
let limbo_conn = limbo_db.connect_limbo();
17+
let sqlite_conn = rusqlite::Connection::open(sqlite_db.path.clone()).unwrap();
18+
19+
let schema = r#"
20+
CREATE TABLE t1(id INTEGER PRIMARY KEY, a INT, b INT, c INT, d INT);
21+
CREATE TABLE t2(id INTEGER PRIMARY KEY, a INT, b INT, c INT, d INT);
22+
CREATE TABLE t3(id INTEGER PRIMARY KEY, a INT, b INT, c INT, d INT);
23+
CREATE TABLE t4(id INTEGER PRIMARY KEY, a INT, b INT, c INT, d INT);"#;
24+
25+
sqlite_conn.execute_batch(schema).unwrap();
26+
limbo_conn.prepare_execute_batch(schema).unwrap();
27+
28+
if add_indexes {
29+
let index_ddl = r#"
30+
CREATE INDEX t1_a_idx ON t1(a);
31+
CREATE INDEX t1_b_idx ON t1(b);
32+
CREATE INDEX t1_c_idx ON t1(c);
33+
CREATE INDEX t1_d_idx ON t1(d);
34+
35+
CREATE INDEX t2_a_idx ON t2(a);
36+
CREATE INDEX t2_b_idx ON t2(b);
37+
CREATE INDEX t2_c_idx ON t2(c);
38+
CREATE INDEX t2_d_idx ON t2(d);
39+
40+
CREATE INDEX t3_a_idx ON t3(a);
41+
CREATE INDEX t3_b_idx ON t3(b);
42+
CREATE INDEX t3_c_idx ON t3(c);
43+
CREATE INDEX t3_d_idx ON t3(d);
44+
45+
CREATE INDEX t4_a_idx ON t4(a);
46+
CREATE INDEX t4_b_idx ON t4(b);
47+
CREATE INDEX t4_c_idx ON t4(c);
48+
CREATE INDEX t4_d_idx ON t4(d);
49+
"#;
50+
sqlite_conn.execute_batch(index_ddl).unwrap();
51+
limbo_conn.prepare_execute_batch(index_ddl).unwrap();
52+
}
53+
54+
let tables = ["t1", "t2", "t3", "t4"];
55+
let mut all_inserts: Vec<String> = Vec::new();
56+
for (t_idx, tname) in tables.iter().enumerate() {
57+
for i in 0..rows {
58+
let id = i + 1 + (t_idx as i64) * 10_000;
59+
60+
// 25% chance of NULL per column.
61+
let gen_val = |rng: &mut ChaCha8Rng| {
62+
if rng.random_range(0..4) == 0 {
63+
None
64+
} else {
65+
Some(rng.random_range(-10..=20))
66+
}
67+
};
68+
let a = gen_val(&mut rng);
69+
let b = gen_val(&mut rng);
70+
let c = gen_val(&mut rng);
71+
let d = gen_val(&mut rng);
72+
73+
let fmt_val = |v: Option<i32>| match v {
74+
Some(x) => x.to_string(),
75+
None => "NULL".to_string(),
76+
};
77+
78+
let stmt = format!(
79+
"INSERT INTO {tname}(id,a,b,c,d) VALUES ({id}, {a}, {b}, {c}, {d})",
80+
a = fmt_val(a),
81+
b = fmt_val(b),
82+
c = fmt_val(c),
83+
d = fmt_val(d),
84+
);
85+
86+
sqlite_conn.execute(&stmt, params![]).unwrap();
87+
limbo_conn.execute(&stmt).unwrap();
88+
all_inserts.push(stmt);
89+
}
90+
}
91+
92+
let _non_pk_cols = ["a", "b", "c", "d"];
93+
94+
// Helper to generate a derived table (FROM clause subquery) for a given table
95+
let gen_derived_table = |rng: &mut ChaCha8Rng,
96+
table: &str,
97+
alias: &str|
98+
-> (String, Vec<&str>) {
99+
let kind = rng.random_range(0..4);
100+
match kind {
101+
0 => {
102+
// Simple passthrough: (SELECT * FROM t) AS alias
103+
(
104+
format!("(SELECT * FROM {table}) AS {alias}"),
105+
vec!["a", "b", "c", "d"],
106+
)
107+
}
108+
1 => {
109+
// Select specific columns with expression: (SELECT a, b, c + d AS cd FROM t) AS alias
110+
(
111+
format!("(SELECT a, b, c, d, c + d AS cd FROM {table}) AS {alias}"),
112+
vec!["a", "b", "c", "d"],
113+
)
114+
}
115+
2 => {
116+
// With aggregate: (SELECT a, sum(b) AS sum_b, count(*) AS cnt FROM t GROUP BY a) AS alias
117+
(
118+
format!("(SELECT a, sum(b) AS sum_b, max(c) AS max_c, count(*) AS cnt FROM {table} GROUP BY a) AS {alias}"),
119+
vec!["a"], // Only 'a' can be used for joins
120+
)
121+
}
122+
3 => {
123+
// With filter: (SELECT * FROM t WHERE a IS NOT NULL) AS alias
124+
(
125+
format!("(SELECT * FROM {table} WHERE a IS NOT NULL) AS {alias}"),
126+
vec!["a", "b", "c", "d"],
127+
)
128+
}
129+
_ => unreachable!(),
130+
}
131+
};
132+
133+
for iter in 0..iterations {
134+
if iter % (iterations / 100).max(1) == 0 {
135+
println!(
136+
"join_fuzz_inner(add_indexes={}) iter {}/{}",
137+
add_indexes,
138+
iter + 1,
139+
iterations
140+
);
141+
}
142+
143+
let num_tables = rng.random_range(2..=4);
144+
let used_tables = &tables[..num_tables];
145+
146+
// Decide which tables to wrap in derived tables (30% chance each)
147+
let use_derived: Vec<bool> = (0..num_tables).map(|_| rng.random_bool(0.3)).collect();
148+
149+
// Generate table references (either direct or derived) and track available join columns
150+
let mut table_refs: Vec<(String, String, Vec<&str>)> = Vec::new(); // (from_expr, alias, joinable_cols)
151+
for (i, &tname) in used_tables.iter().enumerate() {
152+
if use_derived[i] {
153+
let alias = format!("sub_{tname}");
154+
let (derived, cols) = gen_derived_table(&mut rng, tname, &alias);
155+
table_refs.push((derived, alias, cols));
156+
} else {
157+
table_refs.push((
158+
tname.to_string(),
159+
tname.to_string(),
160+
vec!["a", "b", "c", "d"],
161+
));
162+
}
163+
}
164+
165+
let mut select_cols: Vec<String> = Vec::new();
166+
for (_, alias, _) in table_refs.iter() {
167+
// For derived tables without id column (like aggregates), we can't select id
168+
// So we select the first available column for ordering
169+
if alias.starts_with("sub_")
170+
&& use_derived[table_refs.iter().position(|(_, a, _)| a == alias).unwrap()]
171+
{
172+
// Check if this is an aggregate derived table (kind==2) by checking if only 'a' is joinable
173+
let idx = table_refs.iter().position(|(_, a, _)| a == alias).unwrap();
174+
if table_refs[idx].2.len() == 1 {
175+
select_cols.push(format!("{alias}.a"));
176+
} else {
177+
select_cols.push(format!("{alias}.a")); // Use 'a' for consistency
178+
}
179+
} else {
180+
select_cols.push(format!("{alias}.id"));
181+
}
182+
}
183+
let select_clause = select_cols.join(", ");
184+
185+
let mut from_clause = format!("FROM {}", table_refs[0].0);
186+
for i in 1..num_tables {
187+
let (_, left_alias, left_cols) = &table_refs[i - 1];
188+
let (right_expr, right_alias, right_cols) = &table_refs[i];
189+
190+
let join_type = if rng.random_bool(0.5) {
191+
"JOIN"
192+
} else {
193+
"LEFT JOIN"
194+
};
195+
196+
// Find common joinable columns between left and right
197+
let common_cols: Vec<&str> = left_cols
198+
.iter()
199+
.filter(|c| right_cols.contains(c))
200+
.copied()
201+
.collect();
202+
203+
// If no common columns (e.g., both are aggregates with only 'a'), use 'a'
204+
let join_cols = if common_cols.is_empty() {
205+
vec!["a"]
206+
} else {
207+
common_cols
208+
};
209+
210+
let num_preds = rng.random_range(1..=join_cols.len().min(3));
211+
let mut preds = Vec::new();
212+
for _ in 0..num_preds {
213+
let col = join_cols[rng.random_range(0..join_cols.len())];
214+
preds.push(format!("{left_alias}.{col} = {right_alias}.{col}"));
215+
}
216+
preds.sort();
217+
preds.dedup();
218+
219+
let on_clause = preds.join(" AND ");
220+
from_clause = format!("{from_clause} {join_type} {right_expr} ON {on_clause}");
221+
}
222+
223+
// WHERE clause: 0..2 predicates on columns available in each table ref
224+
let mut where_parts = Vec::new();
225+
let num_where = rng.random_range(0..=2);
226+
for _ in 0..num_where {
227+
let idx = rng.random_range(0..num_tables);
228+
let (_, alias, cols) = &table_refs[idx];
229+
if cols.is_empty() {
230+
continue;
231+
}
232+
let col = cols[rng.random_range(0..cols.len())];
233+
let kind = rng.random_range(0..4);
234+
let cond = match kind {
235+
0 => {
236+
let val = rng.random_range(-10..=20);
237+
format!("{alias}.{col} = {val}")
238+
}
239+
1 => {
240+
let val = rng.random_range(-10..=20);
241+
format!("{alias}.{col} <> {val}")
242+
}
243+
2 => format!("{alias}.{col} IS NULL"),
244+
3 => format!("{alias}.{col} IS NOT NULL"),
245+
_ => unreachable!(),
246+
};
247+
where_parts.push(cond);
248+
}
249+
let where_clause = if where_parts.is_empty() {
250+
String::new()
251+
} else {
252+
format!("WHERE {}", where_parts.join(" AND "))
253+
};
254+
let order_clause = format!("ORDER BY {}", select_cols.join(", "));
255+
let limit = 50;
256+
let query = format!(
257+
"SELECT {select_clause} {from_clause} {where_clause} {order_clause} LIMIT {limit}",
258+
);
259+
// Print some sample queries to verify derived table generation
260+
if iter < 10 {
261+
println!("query[{iter}]: {query}");
262+
}
263+
let sqlite_rows = sqlite_exec_rows(&sqlite_conn, &query);
264+
let limbo_rows = limbo_exec_rows(&limbo_conn, &query);
265+
if sqlite_rows != limbo_rows {
266+
// Print DDL and DML for reproduction
267+
eprintln!("\n=== REPRODUCTION DDL/DML ===");
268+
eprintln!("{schema}");
269+
for ins in &all_inserts {
270+
eprintln!("{ins};");
271+
}
272+
eprintln!("\n=== FAILING QUERY ===");
273+
eprintln!("{query}");
274+
eprintln!("=== END ===\n");
275+
276+
panic!(
277+
"JOIN FUZZ MISMATCH (add_indexes={})\nseed: {}\niteration: {}\nquery: {}\n\
278+
sqlite ({} rows): {:?}\nlimbo ({} rows): {:?}\nsqlite path: {:?}\nlimbo path: {:?}",
279+
add_indexes,
280+
seed,
281+
iter,
282+
query,
283+
sqlite_rows.len(),
284+
sqlite_rows,
285+
limbo_rows.len(),
286+
limbo_rows,
287+
sqlite_db.path,
288+
limbo_db.path,
289+
);
290+
}
291+
}
292+
}
293+
294+
#[turso_macros::test(mvcc)]
295+
pub fn join_fuzz_unindexed_keys(db: TempDatabase) {
296+
join_fuzz_inner(db, false, 2000, 200);
297+
}
298+
299+
#[turso_macros::test(mvcc)]
300+
pub fn join_fuzz_indexed_keys(db: TempDatabase) {
301+
join_fuzz_inner(db, true, 2000, 200);
302+
}
303+
}

0 commit comments

Comments
 (0)