Skip to content

Commit b73bf89

Browse files
max-sixtyclaude
andauthored
fix: Keep Computes with Aggregate when Filter follows (#5130) (#5639)
Co-authored-by: Claude <[email protected]>
1 parent b260e67 commit b73bf89

File tree

3 files changed

+71
-19
lines changed

3 files changed

+71
-19
lines changed

prqlc/prqlc/src/sql/pq/anchor.rs

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -349,7 +349,17 @@ fn is_split_required(transform: &SqlTransform, following: &mut HashSet<String>)
349349
contains_any(following, ["From", "Join", "Aggregate", "Compute"])
350350
}
351351
Super(Filter(_)) => contains_any(following, ["From", "Join"]),
352-
Super(Compute(_)) => contains_any(following, ["From", "Join", /* "Aggregate" */ "Filter"]),
352+
Super(Compute(_)) => {
353+
// Don't split between Compute and Filter when there's an Aggregate in following.
354+
// Filter after Aggregate becomes HAVING in the same SELECT, so all preceding
355+
// Computes (including those with aggregation functions like SUM) must stay
356+
// with the Aggregate to get proper GROUP BY.
357+
if following.contains("Aggregate") {
358+
contains_any(following, ["From", "Join"])
359+
} else {
360+
contains_any(following, ["From", "Join", /* "Aggregate" */ "Filter"])
361+
}
362+
}
353363

354364
// Sort will be pushed down the CTEs, so there is no point in splitting for it.
355365
// Super(Sort(_)) => contains_any(following, ["From", "Join", "Compute", "Aggregate"]),

prqlc/prqlc/tests/integration/sql.rs

Lines changed: 56 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6151,7 +6151,7 @@ fn test_append_select_multiple() {
61516151
)
61526152
sort { +invoice_id, +total }
61536153
select { total, invoice_id }
6154-
"###).unwrap(), @r"
6154+
"###).unwrap(), @"
61556155
WITH table_3 AS (
61566156
SELECT
61576157
*
@@ -6273,7 +6273,10 @@ fn test_append_with_cte() {
62736273

62746274
#[test]
62756275
fn test_distinct_on_sort_on_compute() {
6276-
// Test for handling distinct on with sorting on computed columns
6276+
// Test for handling distinct on with sorting on computed columns.
6277+
// Note: table_0 includes billing_city and _expr_1 even though they're unused
6278+
// downstream. This is a side effect of the fix for #5130 which keeps Computes
6279+
// together with Aggregates when Filter follows.
62776280
assert_snapshot!(compile(r###"
62786281
from invoices
62796282
derive code = case [customer_id < 10 => billing_postal_code, true => null]
@@ -6283,7 +6286,7 @@ fn test_distinct_on_sort_on_compute() {
62836286
)
62846287
filter (customer_id | in [4])
62856288
group {billing_country} (aggregate {total = math.round 2 (sum total)})
6286-
"###).unwrap(), @r"
6289+
"###).unwrap(), @"
62876290
WITH table_1 AS (
62886291
SELECT
62896292
billing_country,
@@ -6309,7 +6312,9 @@ fn test_distinct_on_sort_on_compute() {
63096312
billing_country
63106313
ORDER BY
63116314
_expr_1 DESC
6312-
) AS _expr_0
6315+
) AS _expr_0,
6316+
billing_city,
6317+
_expr_1
63136318
FROM
63146319
table_1
63156320
)
@@ -6917,3 +6922,50 @@ fn test_sort_take_before_aggregate() {
69176922
total_sum DESC
69186923
"#);
69196924
}
6925+
6926+
#[test]
6927+
fn test_aggregate_with_operations_and_filter() {
6928+
// Issue #5130: Filtering on combined aggregates was generating invalid SQL.
6929+
// The CTE was missing GROUP BY when aggregate expressions had operations.
6930+
// Previously, this produced a CTE with SUM() but no GROUP BY clause.
6931+
assert_snapshot!(compile(r###"
6932+
from invoices
6933+
group billing_city (
6934+
aggregate{
6935+
sum_c = (sum customer_id) + (sum customer_id)
6936+
}
6937+
)
6938+
filter sum_c > 0
6939+
"###).unwrap(), @"
6940+
SELECT
6941+
billing_city,
6942+
COALESCE(SUM(customer_id), 0) + COALESCE(SUM(customer_id), 0) AS sum_c
6943+
FROM
6944+
invoices
6945+
GROUP BY
6946+
billing_city
6947+
HAVING
6948+
COALESCE(SUM(customer_id), 0) + COALESCE(SUM(customer_id), 0) > 0
6949+
");
6950+
6951+
// Also test with scalar operations
6952+
assert_snapshot!(compile(r###"
6953+
from invoices
6954+
group billing_city (
6955+
aggregate{
6956+
sum_c = (sum customer_id) * 2
6957+
}
6958+
)
6959+
filter sum_c > 0
6960+
"###).unwrap(), @"
6961+
SELECT
6962+
billing_city,
6963+
COALESCE(SUM(customer_id), 0) * 2 AS sum_c
6964+
FROM
6965+
invoices
6966+
GROUP BY
6967+
billing_city
6968+
HAVING
6969+
COALESCE(SUM(customer_id), 0) * 2 > 0
6970+
");
6971+
}

web/book/tests/documentation/snapshots/documentation__book__README__prql-language-book__1.snap

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2,27 +2,17 @@
22
source: web/book/tests/documentation/book.rs
33
expression: "from employees\nfilter start_date > @2021-01-01 # Clear date syntax\nderive { # `derive` adds columns / variables\n gross_salary = salary + (tax ?? 0), # Terse coalesce\n gross_cost = gross_salary + benefits, # Variables can use other variables\n}\nfilter gross_cost > 0\ngroup {title, country} ( # `group` runs a pipeline over each group\n aggregate { # `aggregate` reduces each group to a value\n average gross_salary,\n sum_gross_cost = sum gross_cost, # `=` sets a column name\n }\n)\nfilter sum_gross_cost > 100_000 # `filter` replaces both of SQL's `WHERE` & `HAVING`\nderive id = f\"{title}_{country}\" # F-strings like Python\nderive country_code = s\"LEFT(country, 2)\" # S-strings permit SQL as an escape hatch\nsort {sum_gross_cost, -country} # `-country` means descending order\ntake 1..20 # Range expressions (also valid as `take 20`)\n"
44
---
5-
WITH table_1 AS (
5+
WITH table_0 AS (
66
SELECT
77
title,
88
country,
9-
salary + COALESCE(tax, 0) + benefits AS _expr_1,
10-
salary + COALESCE(tax, 0) AS _expr_2
9+
AVG(salary + COALESCE(tax, 0)) AS _expr_0,
10+
COALESCE(SUM(salary + COALESCE(tax, 0) + benefits), 0) AS sum_gross_cost
1111
FROM
1212
employees
1313
WHERE
1414
start_date > DATE '2021-01-01'
15-
),
16-
table_0 AS (
17-
SELECT
18-
title,
19-
country,
20-
AVG(_expr_2) AS _expr_0,
21-
COALESCE(SUM(_expr_1), 0) AS sum_gross_cost
22-
FROM
23-
table_1
24-
WHERE
25-
_expr_1 > 0
15+
AND salary + COALESCE(tax, 0) + benefits > 0
2616
GROUP BY
2717
title,
2818
country

0 commit comments

Comments
 (0)