|
| 1 | +#[cfg(test)] |
| 2 | +mod join_fuzz_tests { |
| 3 | + use crate::helpers; |
| 4 | + use core_tester::common::{limbo_exec_rows, sqlite_exec_rows, TempDatabase}; |
| 5 | + use rand::Rng; |
| 6 | + use rand_chacha::ChaCha8Rng; |
| 7 | + use rusqlite::params; |
| 8 | + |
| 9 | + fn join_fuzz_inner(db: TempDatabase, add_indexes: bool, iterations: usize, rows: i64) { |
| 10 | + let (mut rng, seed) = |
| 11 | + helpers::init_fuzz_test("join_fuzz_inner (add_indexes={add_indexes})"); |
| 12 | + |
| 13 | + let builder = helpers::builder_from_db(&db); |
| 14 | + let limbo_db = builder.clone().build(); |
| 15 | + let sqlite_db = builder.clone().build(); |
| 16 | + let limbo_conn = limbo_db.connect_limbo(); |
| 17 | + let sqlite_conn = rusqlite::Connection::open(sqlite_db.path.clone()).unwrap(); |
| 18 | + |
| 19 | + let schema = r#" |
| 20 | + CREATE TABLE t1(id INTEGER PRIMARY KEY, a INT, b INT, c INT, d INT); |
| 21 | + CREATE TABLE t2(id INTEGER PRIMARY KEY, a INT, b INT, c INT, d INT); |
| 22 | + CREATE TABLE t3(id INTEGER PRIMARY KEY, a INT, b INT, c INT, d INT); |
| 23 | + CREATE TABLE t4(id INTEGER PRIMARY KEY, a INT, b INT, c INT, d INT);"#; |
| 24 | + |
| 25 | + sqlite_conn.execute_batch(schema).unwrap(); |
| 26 | + limbo_conn.prepare_execute_batch(schema).unwrap(); |
| 27 | + |
| 28 | + if add_indexes { |
| 29 | + let index_ddl = r#" |
| 30 | + CREATE INDEX t1_a_idx ON t1(a); |
| 31 | + CREATE INDEX t1_b_idx ON t1(b); |
| 32 | + CREATE INDEX t1_c_idx ON t1(c); |
| 33 | + CREATE INDEX t1_d_idx ON t1(d); |
| 34 | +
|
| 35 | + CREATE INDEX t2_a_idx ON t2(a); |
| 36 | + CREATE INDEX t2_b_idx ON t2(b); |
| 37 | + CREATE INDEX t2_c_idx ON t2(c); |
| 38 | + CREATE INDEX t2_d_idx ON t2(d); |
| 39 | +
|
| 40 | + CREATE INDEX t3_a_idx ON t3(a); |
| 41 | + CREATE INDEX t3_b_idx ON t3(b); |
| 42 | + CREATE INDEX t3_c_idx ON t3(c); |
| 43 | + CREATE INDEX t3_d_idx ON t3(d); |
| 44 | +
|
| 45 | + CREATE INDEX t4_a_idx ON t4(a); |
| 46 | + CREATE INDEX t4_b_idx ON t4(b); |
| 47 | + CREATE INDEX t4_c_idx ON t4(c); |
| 48 | + CREATE INDEX t4_d_idx ON t4(d); |
| 49 | + "#; |
| 50 | + sqlite_conn.execute_batch(index_ddl).unwrap(); |
| 51 | + limbo_conn.prepare_execute_batch(index_ddl).unwrap(); |
| 52 | + } |
| 53 | + |
| 54 | + let tables = ["t1", "t2", "t3", "t4"]; |
| 55 | + let mut all_inserts: Vec<String> = Vec::new(); |
| 56 | + for (t_idx, tname) in tables.iter().enumerate() { |
| 57 | + for i in 0..rows { |
| 58 | + let id = i + 1 + (t_idx as i64) * 10_000; |
| 59 | + |
| 60 | + // 25% chance of NULL per column. |
| 61 | + let gen_val = |rng: &mut ChaCha8Rng| { |
| 62 | + if rng.random_range(0..4) == 0 { |
| 63 | + None |
| 64 | + } else { |
| 65 | + Some(rng.random_range(-10..=20)) |
| 66 | + } |
| 67 | + }; |
| 68 | + let a = gen_val(&mut rng); |
| 69 | + let b = gen_val(&mut rng); |
| 70 | + let c = gen_val(&mut rng); |
| 71 | + let d = gen_val(&mut rng); |
| 72 | + |
| 73 | + let fmt_val = |v: Option<i32>| match v { |
| 74 | + Some(x) => x.to_string(), |
| 75 | + None => "NULL".to_string(), |
| 76 | + }; |
| 77 | + |
| 78 | + let stmt = format!( |
| 79 | + "INSERT INTO {tname}(id,a,b,c,d) VALUES ({id}, {a}, {b}, {c}, {d})", |
| 80 | + a = fmt_val(a), |
| 81 | + b = fmt_val(b), |
| 82 | + c = fmt_val(c), |
| 83 | + d = fmt_val(d), |
| 84 | + ); |
| 85 | + |
| 86 | + sqlite_conn.execute(&stmt, params![]).unwrap(); |
| 87 | + limbo_conn.execute(&stmt).unwrap(); |
| 88 | + all_inserts.push(stmt); |
| 89 | + } |
| 90 | + } |
| 91 | + |
| 92 | + let _non_pk_cols = ["a", "b", "c", "d"]; |
| 93 | + |
| 94 | + // Helper to generate a derived table (FROM clause subquery) for a given table |
| 95 | + let gen_derived_table = |rng: &mut ChaCha8Rng, |
| 96 | + table: &str, |
| 97 | + alias: &str| |
| 98 | + -> (String, Vec<&str>) { |
| 99 | + let kind = rng.random_range(0..4); |
| 100 | + match kind { |
| 101 | + 0 => { |
| 102 | + // Simple passthrough: (SELECT * FROM t) AS alias |
| 103 | + ( |
| 104 | + format!("(SELECT * FROM {table}) AS {alias}"), |
| 105 | + vec!["a", "b", "c", "d"], |
| 106 | + ) |
| 107 | + } |
| 108 | + 1 => { |
| 109 | + // Select specific columns with expression: (SELECT a, b, c + d AS cd FROM t) AS alias |
| 110 | + ( |
| 111 | + format!("(SELECT a, b, c, d, c + d AS cd FROM {table}) AS {alias}"), |
| 112 | + vec!["a", "b", "c", "d"], |
| 113 | + ) |
| 114 | + } |
| 115 | + 2 => { |
| 116 | + // With aggregate: (SELECT a, sum(b) AS sum_b, count(*) AS cnt FROM t GROUP BY a) AS alias |
| 117 | + ( |
| 118 | + format!("(SELECT a, sum(b) AS sum_b, max(c) AS max_c, count(*) AS cnt FROM {table} GROUP BY a) AS {alias}"), |
| 119 | + vec!["a"], // Only 'a' can be used for joins |
| 120 | + ) |
| 121 | + } |
| 122 | + 3 => { |
| 123 | + // With filter: (SELECT * FROM t WHERE a IS NOT NULL) AS alias |
| 124 | + ( |
| 125 | + format!("(SELECT * FROM {table} WHERE a IS NOT NULL) AS {alias}"), |
| 126 | + vec!["a", "b", "c", "d"], |
| 127 | + ) |
| 128 | + } |
| 129 | + _ => unreachable!(), |
| 130 | + } |
| 131 | + }; |
| 132 | + |
| 133 | + for iter in 0..iterations { |
| 134 | + if iter % (iterations / 100).max(1) == 0 { |
| 135 | + println!( |
| 136 | + "join_fuzz_inner(add_indexes={}) iter {}/{}", |
| 137 | + add_indexes, |
| 138 | + iter + 1, |
| 139 | + iterations |
| 140 | + ); |
| 141 | + } |
| 142 | + |
| 143 | + let num_tables = rng.random_range(2..=4); |
| 144 | + let used_tables = &tables[..num_tables]; |
| 145 | + |
| 146 | + // Decide which tables to wrap in derived tables (30% chance each) |
| 147 | + let use_derived: Vec<bool> = (0..num_tables).map(|_| rng.random_bool(0.3)).collect(); |
| 148 | + |
| 149 | + // Generate table references (either direct or derived) and track available join columns |
| 150 | + let mut table_refs: Vec<(String, String, Vec<&str>)> = Vec::new(); // (from_expr, alias, joinable_cols) |
| 151 | + for (i, &tname) in used_tables.iter().enumerate() { |
| 152 | + if use_derived[i] { |
| 153 | + let alias = format!("sub_{tname}"); |
| 154 | + let (derived, cols) = gen_derived_table(&mut rng, tname, &alias); |
| 155 | + table_refs.push((derived, alias, cols)); |
| 156 | + } else { |
| 157 | + table_refs.push(( |
| 158 | + tname.to_string(), |
| 159 | + tname.to_string(), |
| 160 | + vec!["a", "b", "c", "d"], |
| 161 | + )); |
| 162 | + } |
| 163 | + } |
| 164 | + |
| 165 | + let mut select_cols: Vec<String> = Vec::new(); |
| 166 | + for (_, alias, _) in table_refs.iter() { |
| 167 | + // For derived tables without id column (like aggregates), we can't select id |
| 168 | + // So we select the first available column for ordering |
| 169 | + if alias.starts_with("sub_") |
| 170 | + && use_derived[table_refs.iter().position(|(_, a, _)| a == alias).unwrap()] |
| 171 | + { |
| 172 | + // Check if this is an aggregate derived table (kind==2) by checking if only 'a' is joinable |
| 173 | + let idx = table_refs.iter().position(|(_, a, _)| a == alias).unwrap(); |
| 174 | + if table_refs[idx].2.len() == 1 { |
| 175 | + select_cols.push(format!("{alias}.a")); |
| 176 | + } else { |
| 177 | + select_cols.push(format!("{alias}.a")); // Use 'a' for consistency |
| 178 | + } |
| 179 | + } else { |
| 180 | + select_cols.push(format!("{alias}.id")); |
| 181 | + } |
| 182 | + } |
| 183 | + let select_clause = select_cols.join(", "); |
| 184 | + |
| 185 | + let mut from_clause = format!("FROM {}", table_refs[0].0); |
| 186 | + for i in 1..num_tables { |
| 187 | + let (_, left_alias, left_cols) = &table_refs[i - 1]; |
| 188 | + let (right_expr, right_alias, right_cols) = &table_refs[i]; |
| 189 | + |
| 190 | + let join_type = if rng.random_bool(0.5) { |
| 191 | + "JOIN" |
| 192 | + } else { |
| 193 | + "LEFT JOIN" |
| 194 | + }; |
| 195 | + |
| 196 | + // Find common joinable columns between left and right |
| 197 | + let common_cols: Vec<&str> = left_cols |
| 198 | + .iter() |
| 199 | + .filter(|c| right_cols.contains(c)) |
| 200 | + .copied() |
| 201 | + .collect(); |
| 202 | + |
| 203 | + // If no common columns (e.g., both are aggregates with only 'a'), use 'a' |
| 204 | + let join_cols = if common_cols.is_empty() { |
| 205 | + vec!["a"] |
| 206 | + } else { |
| 207 | + common_cols |
| 208 | + }; |
| 209 | + |
| 210 | + let num_preds = rng.random_range(1..=join_cols.len().min(3)); |
| 211 | + let mut preds = Vec::new(); |
| 212 | + for _ in 0..num_preds { |
| 213 | + let col = join_cols[rng.random_range(0..join_cols.len())]; |
| 214 | + preds.push(format!("{left_alias}.{col} = {right_alias}.{col}")); |
| 215 | + } |
| 216 | + preds.sort(); |
| 217 | + preds.dedup(); |
| 218 | + |
| 219 | + let on_clause = preds.join(" AND "); |
| 220 | + from_clause = format!("{from_clause} {join_type} {right_expr} ON {on_clause}"); |
| 221 | + } |
| 222 | + |
| 223 | + // WHERE clause: 0..2 predicates on columns available in each table ref |
| 224 | + let mut where_parts = Vec::new(); |
| 225 | + let num_where = rng.random_range(0..=2); |
| 226 | + for _ in 0..num_where { |
| 227 | + let idx = rng.random_range(0..num_tables); |
| 228 | + let (_, alias, cols) = &table_refs[idx]; |
| 229 | + if cols.is_empty() { |
| 230 | + continue; |
| 231 | + } |
| 232 | + let col = cols[rng.random_range(0..cols.len())]; |
| 233 | + let kind = rng.random_range(0..4); |
| 234 | + let cond = match kind { |
| 235 | + 0 => { |
| 236 | + let val = rng.random_range(-10..=20); |
| 237 | + format!("{alias}.{col} = {val}") |
| 238 | + } |
| 239 | + 1 => { |
| 240 | + let val = rng.random_range(-10..=20); |
| 241 | + format!("{alias}.{col} <> {val}") |
| 242 | + } |
| 243 | + 2 => format!("{alias}.{col} IS NULL"), |
| 244 | + 3 => format!("{alias}.{col} IS NOT NULL"), |
| 245 | + _ => unreachable!(), |
| 246 | + }; |
| 247 | + where_parts.push(cond); |
| 248 | + } |
| 249 | + let where_clause = if where_parts.is_empty() { |
| 250 | + String::new() |
| 251 | + } else { |
| 252 | + format!("WHERE {}", where_parts.join(" AND ")) |
| 253 | + }; |
| 254 | + let order_clause = format!("ORDER BY {}", select_cols.join(", ")); |
| 255 | + let limit = 50; |
| 256 | + let query = format!( |
| 257 | + "SELECT {select_clause} {from_clause} {where_clause} {order_clause} LIMIT {limit}", |
| 258 | + ); |
| 259 | + // Print some sample queries to verify derived table generation |
| 260 | + if iter < 10 { |
| 261 | + println!("query[{iter}]: {query}"); |
| 262 | + } |
| 263 | + let sqlite_rows = sqlite_exec_rows(&sqlite_conn, &query); |
| 264 | + let limbo_rows = limbo_exec_rows(&limbo_conn, &query); |
| 265 | + if sqlite_rows != limbo_rows { |
| 266 | + // Print DDL and DML for reproduction |
| 267 | + eprintln!("\n=== REPRODUCTION DDL/DML ==="); |
| 268 | + eprintln!("{schema}"); |
| 269 | + for ins in &all_inserts { |
| 270 | + eprintln!("{ins};"); |
| 271 | + } |
| 272 | + eprintln!("\n=== FAILING QUERY ==="); |
| 273 | + eprintln!("{query}"); |
| 274 | + eprintln!("=== END ===\n"); |
| 275 | + |
| 276 | + panic!( |
| 277 | + "JOIN FUZZ MISMATCH (add_indexes={})\nseed: {}\niteration: {}\nquery: {}\n\ |
| 278 | + sqlite ({} rows): {:?}\nlimbo ({} rows): {:?}\nsqlite path: {:?}\nlimbo path: {:?}", |
| 279 | + add_indexes, |
| 280 | + seed, |
| 281 | + iter, |
| 282 | + query, |
| 283 | + sqlite_rows.len(), |
| 284 | + sqlite_rows, |
| 285 | + limbo_rows.len(), |
| 286 | + limbo_rows, |
| 287 | + sqlite_db.path, |
| 288 | + limbo_db.path, |
| 289 | + ); |
| 290 | + } |
| 291 | + } |
| 292 | + } |
| 293 | + |
| 294 | + #[turso_macros::test(mvcc)] |
| 295 | + pub fn join_fuzz_unindexed_keys(db: TempDatabase) { |
| 296 | + join_fuzz_inner(db, false, 2000, 200); |
| 297 | + } |
| 298 | + |
| 299 | + #[turso_macros::test(mvcc)] |
| 300 | + pub fn join_fuzz_indexed_keys(db: TempDatabase) { |
| 301 | + join_fuzz_inner(db, true, 2000, 200); |
| 302 | + } |
| 303 | +} |
0 commit comments