Skip to content

Commit 0658305

Browse files
authored
Merge pull request #654 from datafusion-contrib/viktor/duckdb-deletes-stack-overflow
Fix duckdb stackoverflow for deletes
2 parents 466acff + 15ae26b commit 0658305

2 files changed

Lines changed: 180 additions & 2 deletions

File tree

core/src/sql/sql_provider_datafusion/expr.rs

Lines changed: 75 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ use std::sync::Arc;
22

33
use bigdecimal::{num_bigint::BigInt, BigDecimal};
44
use datafusion::{
5-
logical_expr::{Cast, Expr},
5+
logical_expr::{Cast, Expr, Operator},
66
scalar::ScalarValue,
77
sql::unparser::dialect::{
88
DefaultDialect, Dialect, DuckDBDialect, MySqlDialect, PostgreSqlDialect, SqliteDialect,
@@ -64,7 +64,12 @@ pub fn to_sql_with_engine(expr: &Expr, engine: Option<Engine>) -> Result<String>
6464
}
6565
}
6666

67-
Ok(format!("{} {} {}", left, binary_expr.op, right))
67+
match binary_expr.op {
68+
Operator::And | Operator::Or => {
69+
Ok(format!("({}) {} ({})", left, binary_expr.op, right))
70+
}
71+
_ => Ok(format!("{} {} {}", left, binary_expr.op, right)),
72+
}
6873
}
6974
Expr::Column(name) => match engine {
7075
Some(Engine::Spark | Engine::ODBC) => Ok(format!("{name}")),
@@ -536,6 +541,74 @@ mod tests {
536541
Ok(())
537542
}
538543

544+
fn binary(left: Expr, op: Operator, right: Expr) -> Expr {
545+
Expr::BinaryExpr(datafusion::logical_expr::BinaryExpr {
546+
left: Box::new(left),
547+
op,
548+
right: Box::new(right),
549+
})
550+
}
551+
552+
fn int(v: i32) -> Expr {
553+
Expr::Literal(ScalarValue::Int32(Some(v)), None)
554+
}
555+
556+
#[test]
557+
fn test_and_or_binary_exprs_are_parenthesized() -> Result<()> {
558+
// AND operands are wrapped so the tree shape is preserved in SQL
559+
let and_expr = binary(col("a"), Operator::And, col("b"));
560+
assert_eq!(to_sql(&and_expr)?, "(\"a\") AND (\"b\")");
561+
562+
// OR operands are wrapped for the same reason
563+
let or_expr = binary(col("a"), Operator::Or, col("b"));
564+
assert_eq!(to_sql(&or_expr)?, "(\"a\") OR (\"b\")");
565+
566+
Ok(())
567+
}
568+
569+
#[test]
570+
fn test_non_boolean_binary_exprs_not_parenthesized() -> Result<()> {
571+
// Comparison and arithmetic operators must NOT add extra parens
572+
let eq_expr = binary(col("k1"), Operator::Eq, int(1));
573+
assert_eq!(to_sql(&eq_expr)?, "\"k1\" = 1");
574+
575+
let lt_expr = binary(col("v"), Operator::Lt, int(42));
576+
assert_eq!(to_sql(&lt_expr)?, "\"v\" < 42");
577+
578+
let plus_expr = binary(col("x"), Operator::Plus, int(5));
579+
assert_eq!(to_sql(&plus_expr)?, "\"x\" + 5");
580+
581+
Ok(())
582+
}
583+
584+
#[test]
585+
fn test_composite_key_or_chain_parenthesized() -> Result<()> {
586+
// Simulates a two-row composite-key DELETE:
587+
// (k1 = 1 AND k2 = 2) OR (k1 = 2 AND k2 = 4)
588+
//
589+
// Without parens this serialises as a flat chain that DuckDB's optimizer
590+
// reconstructs as a left-recursive tree of depth N, causing a stack overflow
591+
// for large N. With parens the structure is explicit.
592+
let row1 = binary(
593+
binary(col("k1"), Operator::Eq, int(1)),
594+
Operator::And,
595+
binary(col("k2"), Operator::Eq, int(2)),
596+
);
597+
let row2 = binary(
598+
binary(col("k1"), Operator::Eq, int(2)),
599+
Operator::And,
600+
binary(col("k2"), Operator::Eq, int(4)),
601+
);
602+
let or_chain = binary(row1, Operator::Or, row2);
603+
604+
assert_eq!(
605+
to_sql(&or_chain)?,
606+
"((\"k1\" = 1) AND (\"k2\" = 2)) OR ((\"k1\" = 2) AND (\"k2\" = 4))"
607+
);
608+
609+
Ok(())
610+
}
611+
539612
#[test]
540613
fn test_expr_timestamp_scalar_value_to_sql() -> Result<()> {
541614
let expr = Expr::Literal(

core/tests/duckdb/mod.rs

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -357,3 +357,108 @@ mod multipart_table_reference {
357357
assert_eq!(vals.values().to_vec(), vec![10, 20]);
358358
}
359359
}
360+
361+
/// Tests that `to_sql_with_engine` emits a parenthesized WHERE clause for composite-key
362+
/// deletes, preventing a stack overflow in DuckDB's optimizer on the 512 KiB worker thread.
363+
///
364+
/// Without the fix, `to_sql_with_engine` emits a flat unparenthesized OR chain:
365+
/// `"k1" = 1 AND "k2" = 2 OR "k1" = 2 AND "k2" = 4 OR … OR "k1" = N AND "k2" = 2N`
366+
///
367+
/// DuckDB's parser reconstructs this as a left-recursive tree of depth N.
368+
/// ExpressionRewriter / DistributivityRule recurse N levels deep → stack overflow.
369+
///
370+
/// With the fix, AND/OR operands are wrapped in parentheses, so the depth stays O(log N).
371+
mod composite_delete_stack_overflow {
372+
use datafusion::logical_expr::{BinaryExpr, Expr, Operator};
373+
use datafusion::prelude::col;
374+
use datafusion::scalar::ScalarValue;
375+
use datafusion_table_providers::sql::sql_provider_datafusion::expr::{
376+
to_sql_with_engine, Engine,
377+
};
378+
use duckdb::Connection;
379+
380+
const N: usize = 100;
381+
382+
fn setup(conn: &Connection) {
383+
conn.execute_batch("CREATE TABLE test (k1 INTEGER, k2 INTEGER, val INTEGER);")
384+
.unwrap();
385+
conn.execute_batch(&format!(
386+
"INSERT INTO test SELECT i, i*2, i FROM generate_series(1, {N}) t(i);"
387+
))
388+
.unwrap();
389+
}
390+
391+
fn binary(left: Expr, op: Operator, right: Expr) -> Expr {
392+
Expr::BinaryExpr(BinaryExpr {
393+
left: Box::new(left),
394+
op,
395+
right: Box::new(right),
396+
})
397+
}
398+
399+
fn int32(v: i32) -> Expr {
400+
Expr::Literal(ScalarValue::Int32(Some(v)), None)
401+
}
402+
403+
fn reduce_or(conds: &[Expr]) -> Expr {
404+
if conds.len() == 1 {
405+
return conds[0].clone();
406+
}
407+
let mid = conds.len() / 2;
408+
binary(
409+
reduce_or(&conds[..mid]),
410+
Operator::Or,
411+
reduce_or(&conds[mid..]),
412+
)
413+
}
414+
415+
/// Verifies that `to_sql_with_engine` emits a parenthesized WHERE clause that DuckDB
416+
/// can execute on a 512 KiB worker thread stack.
417+
///
418+
/// Failure modes:
419+
/// - Without the parenthesization fix: the assertion on SQL structure fails
420+
/// immediately (clean test failure, no process crash).
421+
/// - With correct SQL but a DuckDB regression: `execute_batch` panics inside the
422+
/// spawned thread, which surfaces as a `join` error.
423+
#[test]
424+
fn composite_delete_via_to_sql_with_engine() {
425+
let conds: Vec<Expr> = (1..=N)
426+
.map(|i| {
427+
binary(
428+
binary(col("k1"), Operator::Eq, int32(i as i32)),
429+
Operator::And,
430+
binary(col("k2"), Operator::Eq, int32((i * 2) as i32)),
431+
)
432+
})
433+
.collect();
434+
435+
let where_clause = to_sql_with_engine(&reduce_or(&conds), Some(Engine::DuckDB))
436+
.expect("to_sql_with_engine failed");
437+
438+
// Without the fix: `"k1" = 1 AND "k2" = 2 OR …` — no parens, fails here.
439+
// With the fix: `(("k1" = 1) AND ("k2" = 2)) OR …`
440+
assert!(
441+
where_clause.starts_with('('),
442+
"WHERE clause must be parenthesized to avoid DuckDB stack overflow:\n{where_clause}"
443+
);
444+
445+
let sql = format!("DELETE FROM test WHERE {where_clause}");
446+
447+
let handle = std::thread::Builder::new()
448+
.stack_size(512 * 1024) // 512 KiB — matches production refresh-worker stack
449+
.name("refresh-worker".to_string())
450+
.spawn(move || {
451+
let conn = Connection::open_in_memory().unwrap();
452+
setup(&conn);
453+
conn.execute_batch(&sql).unwrap();
454+
455+
let remaining: i64 = conn
456+
.query_row("SELECT COUNT(*) FROM test", [], |r| r.get(0))
457+
.unwrap();
458+
assert_eq!(remaining, 0, "all {N} rows should be deleted");
459+
})
460+
.unwrap();
461+
462+
handle.join().unwrap();
463+
}
464+
}

0 commit comments

Comments
 (0)