Skip to content

Commit 8df86df

Browse files
committed
Fix duckdb stackoverflow for deletes
1 parent b798c39 commit 8df86df

2 files changed

Lines changed: 180 additions & 2 deletions

File tree

core/src/sql/sql_provider_datafusion/expr.rs

Lines changed: 75 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ use std::sync::Arc;
22

33
use bigdecimal::{num_bigint::BigInt, BigDecimal};
44
use datafusion::{
5-
logical_expr::{Cast, Expr},
5+
logical_expr::{Cast, Expr, Operator},
66
scalar::ScalarValue,
77
sql::unparser::dialect::{
88
DefaultDialect, Dialect, DuckDBDialect, MySqlDialect, PostgreSqlDialect, SqliteDialect,
@@ -64,7 +64,12 @@ pub fn to_sql_with_engine(expr: &Expr, engine: Option<Engine>) -> Result<String>
6464
}
6565
}
6666

67-
Ok(format!("{} {} {}", left, binary_expr.op, right))
67+
match binary_expr.op {
68+
Operator::And | Operator::Or => {
69+
Ok(format!("({}) {} ({})", left, binary_expr.op, right))
70+
}
71+
_ => Ok(format!("{} {} {}", left, binary_expr.op, right)),
72+
}
6873
}
6974
Expr::Column(name) => match engine {
7075
Some(Engine::Spark | Engine::ODBC) => Ok(format!("{name}")),
@@ -536,6 +541,74 @@ mod tests {
536541
Ok(())
537542
}
538543

544+
fn binary(left: Expr, op: Operator, right: Expr) -> Expr {
545+
Expr::BinaryExpr(datafusion::logical_expr::BinaryExpr {
546+
left: Box::new(left),
547+
op,
548+
right: Box::new(right),
549+
})
550+
}
551+
552+
fn int(v: i32) -> Expr {
553+
Expr::Literal(ScalarValue::Int32(Some(v)), None)
554+
}
555+
556+
#[test]
557+
fn test_and_or_binary_exprs_are_parenthesized() -> Result<()> {
558+
// AND operands are wrapped so the tree shape is preserved in SQL
559+
let and_expr = binary(col("a"), Operator::And, col("b"));
560+
assert_eq!(to_sql(&and_expr)?, "(\"a\") AND (\"b\")");
561+
562+
// OR operands are wrapped for the same reason
563+
let or_expr = binary(col("a"), Operator::Or, col("b"));
564+
assert_eq!(to_sql(&or_expr)?, "(\"a\") OR (\"b\")");
565+
566+
Ok(())
567+
}
568+
569+
#[test]
570+
fn test_non_boolean_binary_exprs_not_parenthesized() -> Result<()> {
571+
// Comparison and arithmetic operators must NOT add extra parens
572+
let eq_expr = binary(col("k1"), Operator::Eq, int(1));
573+
assert_eq!(to_sql(&eq_expr)?, "\"k1\" = 1");
574+
575+
let lt_expr = binary(col("v"), Operator::Lt, int(42));
576+
assert_eq!(to_sql(&lt_expr)?, "\"v\" < 42");
577+
578+
let plus_expr = binary(col("x"), Operator::Plus, int(5));
579+
assert_eq!(to_sql(&plus_expr)?, "\"x\" + 5");
580+
581+
Ok(())
582+
}
583+
584+
#[test]
585+
fn test_composite_key_or_chain_parenthesized() -> Result<()> {
586+
// Simulates a two-row composite-key DELETE:
587+
// (k1 = 1 AND k2 = 2) OR (k1 = 2 AND k2 = 4)
588+
//
589+
// Without parens this serialises as a flat chain that DuckDB's optimizer
590+
// reconstructs as a left-recursive tree of depth N, causing a stack overflow
591+
// for large N. With parens the structure is explicit.
592+
let row1 = binary(
593+
binary(col("k1"), Operator::Eq, int(1)),
594+
Operator::And,
595+
binary(col("k2"), Operator::Eq, int(2)),
596+
);
597+
let row2 = binary(
598+
binary(col("k1"), Operator::Eq, int(2)),
599+
Operator::And,
600+
binary(col("k2"), Operator::Eq, int(4)),
601+
);
602+
let or_chain = binary(row1, Operator::Or, row2);
603+
604+
assert_eq!(
605+
to_sql(&or_chain)?,
606+
"((\"k1\" = 1) AND (\"k2\" = 2)) OR ((\"k1\" = 2) AND (\"k2\" = 4))"
607+
);
608+
609+
Ok(())
610+
}
611+
539612
#[test]
540613
fn test_expr_timestamp_scalar_value_to_sql() -> Result<()> {
541614
let expr = Expr::Literal(

core/tests/duckdb/mod.rs

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -359,3 +359,108 @@ mod multipart_table_reference {
359359
assert_eq!(vals.values().to_vec(), vec![10, 20]);
360360
}
361361
}
362+
363+
/// Tests that `to_sql_with_engine` emits a parenthesized WHERE clause for composite-key
364+
/// deletes, preventing a stack overflow in DuckDB's optimizer on the 512 KiB worker thread.
365+
///
366+
/// Without the fix, `to_sql_with_engine` emits a flat unparenthesized OR chain:
367+
/// `"k1" = 1 AND "k2" = 2 OR "k1" = 2 AND "k2" = 4 OR … OR "k1" = N AND "k2" = 2N`
368+
///
369+
/// DuckDB's parser reconstructs this as a left-recursive tree of depth N.
370+
/// ExpressionRewriter / DistributivityRule recurse N levels deep → stack overflow.
371+
///
372+
/// With the fix, AND/OR operands are wrapped in parentheses, so the depth stays O(log N).
373+
mod composite_delete_stack_overflow {
374+
use datafusion::logical_expr::{BinaryExpr, Expr, Operator};
375+
use datafusion::prelude::col;
376+
use datafusion::scalar::ScalarValue;
377+
use datafusion_table_providers::sql::sql_provider_datafusion::expr::{
378+
to_sql_with_engine, Engine,
379+
};
380+
use duckdb::Connection;
381+
382+
const N: usize = 100;
383+
384+
fn setup(conn: &Connection) {
385+
conn.execute_batch("CREATE TABLE test (k1 INTEGER, k2 INTEGER, val INTEGER);")
386+
.unwrap();
387+
conn.execute_batch(&format!(
388+
"INSERT INTO test SELECT i, i*2, i FROM generate_series(1, {N}) t(i);"
389+
))
390+
.unwrap();
391+
}
392+
393+
fn binary(left: Expr, op: Operator, right: Expr) -> Expr {
394+
Expr::BinaryExpr(BinaryExpr {
395+
left: Box::new(left),
396+
op,
397+
right: Box::new(right),
398+
})
399+
}
400+
401+
fn int32(v: i32) -> Expr {
402+
Expr::Literal(ScalarValue::Int32(Some(v)), None)
403+
}
404+
405+
fn reduce_or(conds: &[Expr]) -> Expr {
406+
if conds.len() == 1 {
407+
return conds[0].clone();
408+
}
409+
let mid = conds.len() / 2;
410+
binary(
411+
reduce_or(&conds[..mid]),
412+
Operator::Or,
413+
reduce_or(&conds[mid..]),
414+
)
415+
}
416+
417+
/// Verifies that `to_sql_with_engine` emits a parenthesized WHERE clause that DuckDB
418+
/// can execute on a 512 KiB worker thread stack.
419+
///
420+
/// Failure modes:
421+
/// - Without the parenthesization fix: the assertion on SQL structure fails
422+
/// immediately (clean test failure, no process crash).
423+
/// - With correct SQL but a DuckDB regression: `execute_batch` panics inside the
424+
/// spawned thread, which surfaces as a `join` error.
425+
#[test]
426+
fn composite_delete_via_to_sql_with_engine() {
427+
let conds: Vec<Expr> = (1..=N)
428+
.map(|i| {
429+
binary(
430+
binary(col("k1"), Operator::Eq, int32(i as i32)),
431+
Operator::And,
432+
binary(col("k2"), Operator::Eq, int32((i * 2) as i32)),
433+
)
434+
})
435+
.collect();
436+
437+
let where_clause = to_sql_with_engine(&reduce_or(&conds), Some(Engine::DuckDB))
438+
.expect("to_sql_with_engine failed");
439+
440+
// Without the fix: `"k1" = 1 AND "k2" = 2 OR …` — no parens, fails here.
441+
// With the fix: `(("k1" = 1) AND ("k2" = 2)) OR …`
442+
assert!(
443+
where_clause.starts_with('('),
444+
"WHERE clause must be parenthesized to avoid DuckDB stack overflow:\n{where_clause}"
445+
);
446+
447+
let sql = format!("DELETE FROM test WHERE {where_clause}");
448+
449+
let handle = std::thread::Builder::new()
450+
.stack_size(512 * 1024) // 512 KiB — matches production refresh-worker stack
451+
.name("refresh-worker".to_string())
452+
.spawn(move || {
453+
let conn = Connection::open_in_memory().unwrap();
454+
setup(&conn);
455+
conn.execute_batch(&sql).unwrap();
456+
457+
let remaining: i64 = conn
458+
.query_row("SELECT COUNT(*) FROM test", [], |r| r.get(0))
459+
.unwrap();
460+
assert_eq!(remaining, 0, "all {N} rows should be deleted");
461+
})
462+
.unwrap();
463+
464+
handle.join().unwrap();
465+
}
466+
}

0 commit comments

Comments
 (0)