Skip to content
This repository was archived by the owner on Nov 7, 2025. It is now read-only.

Commit 2661385

Browse files
Use -Array variants of aggregates in schema_array_transformer (v2) (#1226)
Review fixes for: #1152 `schema_array_transformer` transforms the SQL query for `Array` columns. Before this change, if an aggregation was performed on a `Array` column, e.g. `sum(myArrayColumn)`, the transformer would change it into `sum(arrayJoin(myArrayColumn))`. However using `arrayJoin` function has problems - `arrayJoin` modifies the result set of SQL query introducing additional rows. If there are many `arrayJoin`s, a Cartesian product many rows will be performed: this causes query slowdown and makes the result invalid (we don't actually want to do a Cartesian product!). Solve the problem by using `-Array` variants of aggregates (e.g. `sumArray` instead of `sum(arrayJoin())`), which does not inflate the number of result rows. Note that this PR does NOT get rid of `arrayJoin()` fully in all cases. There are panels that actually need it, such as "Top products this week" in eCommerce dashboard, where we `GROUP BY` an array column. <img width="1350" alt="Screenshot 2025-01-07 at 11 20 42" src="https://github.com/user-attachments/assets/214890d5-c04a-4a6a-a683-5bffaf944d80" /> This remaining case should use the `ARRAY JOIN` operator, but this is out-of-scope of this PR. Closes #1152 --------- Co-authored-by: Piotr Grabowski <[email protected]>
1 parent 9bf027a commit 2661385

File tree

3 files changed

+108
-29
lines changed

3 files changed

+108
-29
lines changed

quesma/quesma/schema_array_transformer.go

Lines changed: 73 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,52 @@ import (
1616
//
1717
//
1818

19+
type functionWithCombinator struct {
20+
baseFunctionName string
21+
isArray bool
22+
isIf bool
23+
isOrNull bool
24+
isState bool
25+
isMerge bool
26+
}
27+
28+
func (f functionWithCombinator) String() string {
29+
result := f.baseFunctionName
30+
if f.isArray {
31+
result = result + "Array"
32+
}
33+
if f.isIf {
34+
result = result + "If"
35+
}
36+
if f.isOrNull {
37+
result = result + "OrNull"
38+
}
39+
if f.isState {
40+
result = result + "State"
41+
}
42+
if f.isMerge {
43+
result = result + "Merge"
44+
}
45+
return result
46+
}
47+
48+
func parseFunctionWithCombinator(funcName string) (result functionWithCombinator) {
49+
stripSuffix := func(s string, suffix string) (string, bool) {
50+
if strings.HasSuffix(s, suffix) {
51+
return strings.TrimSuffix(s, suffix), true
52+
}
53+
return s, false
54+
}
55+
56+
result.baseFunctionName = funcName
57+
result.baseFunctionName, result.isState = stripSuffix(result.baseFunctionName, "State")
58+
result.baseFunctionName, result.isMerge = stripSuffix(result.baseFunctionName, "Merge")
59+
result.baseFunctionName, result.isIf = stripSuffix(result.baseFunctionName, "If")
60+
result.baseFunctionName, result.isOrNull = stripSuffix(result.baseFunctionName, "OrNull")
61+
62+
return result
63+
}
64+
1965
type arrayTypeResolver struct {
2066
indexSchema schema.Schema
2167
}
@@ -73,6 +119,7 @@ func NewArrayTypeVisitor(resolver arrayTypeResolver) model.ExprVisitor {
73119

74120
}
75121

122+
var childGotArrayFunc bool
76123
visitor.OverrideVisitFunction = func(b *model.BaseExprVisitor, e model.FunctionExpr) interface{} {
77124

78125
if len(e.Args) > 0 {
@@ -81,23 +128,38 @@ func NewArrayTypeVisitor(resolver arrayTypeResolver) model.ExprVisitor {
81128
if ok {
82129
dbType := resolver.dbColumnType(column.ColumnName)
83130
if strings.HasPrefix(dbType, "Array") {
84-
if strings.HasPrefix(e.Name, "sum") {
85-
// here we apply -Array combinator to the sum function
86-
// https://clickhouse.com/docs/en/sql-reference/aggregate-functions/combinators#-array
87-
//
88-
// TODO this can be rewritten to transform all aggregate functions as well
89-
//
90-
e.Name = strings.ReplaceAll(e.Name, "sum", "sumArray")
91-
} else {
92-
logger.Error().Msgf("Unhandled array function %s, column %v (%v)", e.Name, column.ColumnName, dbType)
93-
}
131+
funcParsed := parseFunctionWithCombinator(e.Name)
132+
funcParsed.isArray = true
133+
childGotArrayFunc = true
134+
e.Name = funcParsed.String()
94135
}
136+
} else {
137+
e.Args = b.VisitChildren(e.Args)
95138
}
96139
}
97140

141+
return model.NewFunction(e.Name, e.Args...)
142+
}
143+
144+
visitor.OverrideVisitWindowFunction = func(b *model.BaseExprVisitor, e model.WindowFunction) interface{} {
145+
childGotArrayFunc = false
98146
args := b.VisitChildren(e.Args)
99-
return model.NewFunction(e.Name, args...)
147+
if childGotArrayFunc {
148+
funcParsed := parseFunctionWithCombinator(e.Name)
149+
funcParsed.isArray = true
150+
e.Name = funcParsed.String()
151+
}
152+
return model.NewWindowFunction(e.Name, args, e.PartitionBy, e.OrderBy)
153+
}
154+
155+
visitor.OverrideVisitColumnRef = func(b *model.BaseExprVisitor, e model.ColumnRef) interface{} {
156+
dbType := resolver.dbColumnType(e.ColumnName)
157+
if strings.HasPrefix(dbType, "Array") {
158+
logger.Error().Msgf("Unhandled array column ref %v (%v)", e.ColumnName, dbType)
159+
}
160+
return e
100161
}
162+
101163
return visitor
102164
}
103165

@@ -148,23 +210,6 @@ func checkIfGroupingByArrayColumn(selectCommand model.SelectCommand, resolver ar
148210
return &e
149211
}
150212

151-
visitor.OverrideVisitFunction = func(b *model.BaseExprVisitor, e model.FunctionExpr) interface{} {
152-
153-
if strings.HasPrefix(e.Name, "sum") || strings.HasPrefix(e.Name, "count") {
154-
155-
if len(e.Args) > 0 {
156-
arg := e.Args[0]
157-
158-
if isArrayColumn(arg) {
159-
found = true
160-
}
161-
162-
}
163-
164-
}
165-
return e
166-
}
167-
168213
selectCommand.Accept(visitor)
169214

170215
return found

quesma/quesma/schema_transformer.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -877,6 +877,10 @@ func columnsToAliasedColumns(columns []model.Expr) []model.Expr {
877877
aliasedColumns[i] = model.NewAliasedExpr(column, fmt.Sprintf("column_%d", i))
878878
continue
879879
}
880+
if _, ok := column.(model.WindowFunction); ok {
881+
aliasedColumns[i] = model.NewAliasedExpr(column, fmt.Sprintf("column_%d", i))
882+
continue
883+
}
880884

881885
aliasedColumns[i] = model.NewAliasedExpr(column, fmt.Sprintf("column_%d", i))
882886
logger.Error().Msgf("Quesma internal error - unreachable code: unsupported column type %T", column)

quesma/quesma/schema_transformer_test.go

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -490,7 +490,37 @@ func Test_arrayType(t *testing.T) {
490490
FromClause: model.NewTableRef("kibana_sample_data_ecommerce"),
491491
Columns: []model.Expr{
492492
model.NewColumnRef("order_date"),
493-
model.NewAliasedExpr(model.NewFunction("sumOrNull", model.NewFunction("arrayJoin", model.NewColumnRef("products_quantity"))), "column_1"),
493+
model.NewAliasedExpr(model.NewFunction("sumArrayOrNull", model.NewColumnRef("products_quantity")), "column_1"),
494+
},
495+
GroupBy: []model.Expr{model.NewColumnRef("order_date")},
496+
},
497+
},
498+
},
499+
500+
{
501+
name: "arrayReducePancake",
502+
//SELECT "order_date", avgOrNullMerge(avgOrNullState("products::quantity"")) OVER (), sumOrNull("products::quantity") FROM "kibana_sample_data_ecommerce" GROUP BY "order_date"
503+
query: &model.Query{
504+
TableName: "kibana_sample_data_ecommerce",
505+
SelectCommand: model.SelectCommand{
506+
FromClause: model.NewTableRef("kibana_sample_data_ecommerce"),
507+
Columns: []model.Expr{
508+
model.NewColumnRef("order_date"),
509+
model.NewWindowFunction("avgOrNullMerge", []model.Expr{model.NewFunction("avgOrNullState", model.NewColumnRef("products.quantity"))}, []model.Expr{}, []model.OrderByExpr{}),
510+
model.NewFunction("sumOrNull", model.NewColumnRef("products.quantity")),
511+
},
512+
GroupBy: []model.Expr{model.NewColumnRef("order_date")},
513+
},
514+
},
515+
//SELECT "order_date", avgArrayOrNullMerge(avgArrayOrNullMerge("products::quantity"")) OVER (), sumOrNull("products::quantity") FROM "kibana_sample_data_ecommerce" GROUP BY "order_date"
516+
expected: &model.Query{
517+
TableName: "kibana_sample_data_ecommerce",
518+
SelectCommand: model.SelectCommand{
519+
FromClause: model.NewTableRef("kibana_sample_data_ecommerce"),
520+
Columns: []model.Expr{
521+
model.NewColumnRef("order_date"),
522+
model.NewAliasedExpr(model.NewWindowFunction("avgArrayOrNullMerge", []model.Expr{model.NewFunction("avgArrayOrNullState", model.NewColumnRef("products_quantity"))}, []model.Expr{}, []model.OrderByExpr{}), "column_1"),
523+
model.NewAliasedExpr(model.NewFunction("sumArrayOrNull", model.NewColumnRef("products_quantity")), "column_2"),
494524
},
495525
GroupBy: []model.Expr{model.NewColumnRef("order_date")},
496526
},

0 commit comments

Comments
 (0)