Skip to content
This repository was archived by the owner on Nov 7, 2025. It is now read-only.

Commit 5ed84b9

Browse files
authored
Report errors in queries better #2 (in parsing bucket aggregations) (#1006)
**old comment (motivation):** Our error messages for errors encountered while parsing query request isn't as good as Elastic's, I'll try to improve it here. (It's not that unimportant, with invalid query request where Elastic always fails with an error response, we often don't and e.g. return empty results, which might trick the user that queries went fine) I think I'll split it into few PRs. This might be a good starting point for another one. (update: actually similar small improvement was already merged, but for pipeline aggregations) **new comment (what's been done):** There's really not much going on here. Clue of this PR is to change our very ugly 400-line `func (cw *ClickhouseQueryTranslator) pancakeTryBucketAggregation` function into something a bit nicer. And also report errors in more places, to be consistent with Elastic's behaviour. Quite a few style improvements that I automatically caught in the process, but only some trivial/very local ones, so extracting copy/paste used 5 times into 1 function, consistent naming across parsers of different aggregations, etc. (not much more).
1 parent 8ac3b1f commit 5ed84b9

File tree

11 files changed

+803
-540
lines changed

11 files changed

+803
-540
lines changed

quesma/model/bucket_aggregations/dateRange.go

Lines changed: 36 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -16,23 +16,23 @@ const UnboundedInterval = "*"
1616
// 1) in Clickhouse's proper format, e.g. toStartOfDay(subDate(now(), INTERVAL 3 week))
1717
// 2) * (UnboundedInterval), which means no bound
1818
type DateTimeInterval struct {
19-
Begin string
20-
End string
19+
begin string
20+
end string
2121
}
2222

2323
func NewDateTimeInterval(begin, end string) DateTimeInterval {
2424
return DateTimeInterval{
25-
Begin: begin,
26-
End: end,
25+
begin: begin,
26+
end: end,
2727
}
2828
}
2929

3030
// BeginTimestampToSQL returns SQL select for the begin timestamp, and a boolean indicating if the select is needed
3131
// We query Clickhouse for this timestamp, as it's defined in Clickhouse's format, e.g. now()-1d.
3232
// It's only 1 more field to our SELECT query, so it shouldn't be a performance issue.
3333
func (interval DateTimeInterval) BeginTimestampToSQL() (sqlSelect model.Expr, selectNeeded bool) {
34-
if interval.Begin != UnboundedInterval {
35-
return model.NewFunction("toInt64", model.NewFunction("toUnixTimestamp", model.NewLiteral(interval.Begin))), true
34+
if interval.begin != UnboundedInterval {
35+
return model.NewFunction("toInt64", model.NewFunction("toUnixTimestamp", model.NewLiteral(interval.begin))), true
3636
}
3737
return nil, false
3838
}
@@ -41,21 +41,21 @@ func (interval DateTimeInterval) BeginTimestampToSQL() (sqlSelect model.Expr, se
4141
// We query Clickhouse for this timestamp, as it's defined in Clickhouse's format, e.g. now()-1d.
4242
// It's only 1 more field to our SELECT query, so it isn't a performance issue.
4343
func (interval DateTimeInterval) EndTimestampToSQL() (sqlSelect model.Expr, selectNeeded bool) {
44-
if interval.End != UnboundedInterval {
45-
return model.NewFunction("toInt64", model.NewFunction("toUnixTimestamp", model.NewLiteral(interval.End))), true
44+
if interval.end != UnboundedInterval {
45+
return model.NewFunction("toInt64", model.NewFunction("toUnixTimestamp", model.NewLiteral(interval.end))), true
4646
}
4747
return nil, false
4848
}
4949

50-
func (interval DateTimeInterval) ToWhereClause(fieldName string) model.Expr {
50+
func (interval DateTimeInterval) ToWhereClause(field model.Expr) model.Expr {
5151
begin, isBegin := interval.BeginTimestampToSQL()
5252
end, isEnd := interval.EndTimestampToSQL()
5353

5454
if isBegin {
55-
begin = model.NewInfixExpr(model.NewColumnRef(fieldName), ">=", begin)
55+
begin = model.NewInfixExpr(field, ">=", begin)
5656
}
5757
if isEnd {
58-
end = model.NewInfixExpr(model.NewColumnRef(fieldName), "<", end)
58+
end = model.NewInfixExpr(field, "<", end)
5959
}
6060

6161
if isBegin && isEnd {
@@ -65,20 +65,20 @@ func (interval DateTimeInterval) ToWhereClause(fieldName string) model.Expr {
6565
} else if isEnd {
6666
return end
6767
} else {
68-
return model.NewLiteral("TRUE")
68+
return model.TrueExpr
6969
}
7070
}
7171

7272
type DateRange struct {
7373
ctx context.Context
74-
FieldName string
75-
Format string
76-
Intervals []DateTimeInterval
77-
SelectColumnsNr int // how many columns we add to the query because of date_range aggregation, e.g. SELECT x,y,z -> 3
74+
field model.Expr
75+
format string
76+
intervals []DateTimeInterval
77+
selectColumnsNr int // how many columns we add to the query because of date_range aggregation, e.g. SELECT x,y,z -> 3
7878
}
7979

80-
func NewDateRange(ctx context.Context, fieldName string, format string, intervals []DateTimeInterval, selectColumnsNr int) DateRange {
81-
return DateRange{ctx: ctx, FieldName: fieldName, Format: format, Intervals: intervals, SelectColumnsNr: selectColumnsNr}
80+
func NewDateRange(ctx context.Context, field model.Expr, format string, intervals []DateTimeInterval, selectColumnsNr int) DateRange {
81+
return DateRange{ctx: ctx, field: field, format: format, intervals: intervals, selectColumnsNr: selectColumnsNr}
8282
}
8383

8484
func (query DateRange) AggregationType() model.AggregationType {
@@ -92,15 +92,15 @@ func (query DateRange) TranslateSqlResponseToJson(rows []model.QueryResultRow) m
9292
}
9393

9494
response := make([]model.JsonMap, 0)
95-
startIteration := len(rows[0].Cols) - 1 - query.SelectColumnsNr
95+
startIteration := len(rows[0].Cols) - 1 - query.selectColumnsNr
9696
if startIteration < 0 || startIteration >= len(rows[0].Cols) {
9797
logger.ErrorWithCtx(query.ctx).Msgf(
9898
"unexpected column nr in aggregation response, startIteration: %d, len(rows[0].Cols): %d",
9999
startIteration, len(rows[0].Cols),
100100
)
101101
return nil
102102
}
103-
for intervalIdx, columnIdx := 0, startIteration; intervalIdx < len(query.Intervals); intervalIdx++ {
103+
for intervalIdx, columnIdx := 0, startIteration; intervalIdx < len(query.intervals); intervalIdx++ {
104104
responseForInterval, nextColumnIdx := query.responseForInterval(&rows[0], intervalIdx, columnIdx)
105105
response = append(response, responseForInterval)
106106
columnIdx = nextColumnIdx
@@ -111,7 +111,7 @@ func (query DateRange) TranslateSqlResponseToJson(rows []model.QueryResultRow) m
111111
}
112112

113113
func (query DateRange) String() string {
114-
return "date_range, intervals: " + fmt.Sprintf("%v", query.Intervals)
114+
return "date_range, intervals: " + fmt.Sprintf("%v", query.intervals)
115115
}
116116

117117
func (query DateRange) responseForInterval(row *model.QueryResultRow, intervalIdx, columnIdx int) (
@@ -123,7 +123,7 @@ func (query DateRange) responseForInterval(row *model.QueryResultRow, intervalId
123123

124124
var from, to int64
125125
var fromString, toString string
126-
if query.Intervals[intervalIdx].Begin == UnboundedInterval {
126+
if query.intervals[intervalIdx].begin == UnboundedInterval {
127127
fromString = UnboundedInterval
128128
} else {
129129
if columnIdx >= len(row.Cols) {
@@ -137,7 +137,7 @@ func (query DateRange) responseForInterval(row *model.QueryResultRow, intervalId
137137
columnIdx++
138138
}
139139

140-
if query.Intervals[intervalIdx].End == UnboundedInterval {
140+
if query.intervals[intervalIdx].end == UnboundedInterval {
141141
toString = UnboundedInterval
142142
} else {
143143
if columnIdx >= len(row.Cols) {
@@ -173,16 +173,16 @@ func (query DateRange) DoesNotHaveGroupBy() bool {
173173
}
174174

175175
func (query DateRange) CombinatorGroups() (result []CombinatorGroup) {
176-
for intervalIdx, interval := range query.Intervals {
176+
for intervalIdx, interval := range query.intervals {
177177
prefix := fmt.Sprintf("range_%d__", intervalIdx)
178-
if len(query.Intervals) == 1 {
178+
if len(query.intervals) == 1 {
179179
prefix = ""
180180
}
181181
result = append(result, CombinatorGroup{
182182
idx: intervalIdx,
183183
Prefix: prefix,
184184
Key: prefix, // TODO: we need translate date to real time
185-
WhereClause: interval.ToWhereClause(query.FieldName),
185+
WhereClause: interval.ToWhereClause(query.field),
186186
})
187187
}
188188
return
@@ -199,23 +199,23 @@ func (query DateRange) CombinatorTranslateSqlResponseToJson(subGroup CombinatorG
199199
}
200200

201201
// TODO: we need translate relative to real time
202-
interval := query.Intervals[subGroup.idx]
203-
if interval.Begin != UnboundedInterval {
204-
response["from"] = interval.Begin
205-
response["from_as_string"] = interval.Begin
202+
interval := query.intervals[subGroup.idx]
203+
if interval.begin != UnboundedInterval {
204+
response["from"] = interval.begin
205+
response["from_as_string"] = interval.begin
206206
}
207-
if interval.End != UnboundedInterval {
208-
response["to"] = interval.End
209-
response["to_as_string"] = interval.End
207+
if interval.end != UnboundedInterval {
208+
response["to"] = interval.end
209+
response["to_as_string"] = interval.end
210210
}
211211

212212
return response
213213
}
214214

215215
func (query DateRange) CombinatorSplit() []model.QueryType {
216-
result := make([]model.QueryType, 0, len(query.Intervals))
217-
for _, interval := range query.Intervals {
218-
result = append(result, NewDateRange(query.ctx, query.FieldName, query.Format, []DateTimeInterval{interval}, query.SelectColumnsNr))
216+
result := make([]model.QueryType, 0, len(query.intervals))
217+
for _, interval := range query.intervals {
218+
result = append(result, NewDateRange(query.ctx, query.field, query.format, []DateTimeInterval{interval}, query.selectColumnsNr))
219219
}
220220
return result
221221
}

quesma/model/bucket_aggregations/range.go

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,6 @@ import (
1212
"strings"
1313
)
1414

15-
const keyedDefaultValue = false
16-
1715
var IntervalInfiniteRange = math.NaN()
1816

1917
type Interval struct {
@@ -91,10 +89,6 @@ func NewRange(ctx context.Context, expr model.Expr, intervals []Interval, keyed
9189
return Range{ctx, expr, intervals, keyed}
9290
}
9391

94-
func NewRangeWithDefaultKeyed(ctx context.Context, expr model.Expr, intervals []Interval) Range {
95-
return Range{ctx, expr, intervals, keyedDefaultValue}
96-
}
97-
9892
func (query Range) AggregationType() model.AggregationType {
9993
return model.BucketAggregation
10094
}

quesma/queryparser/aggregation_date_range_parser.go

Lines changed: 33 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -3,80 +3,53 @@
33
package queryparser
44

55
import (
6-
"quesma/logger"
6+
"fmt"
77
"quesma/model/bucket_aggregations"
88
"unicode"
99
)
1010

11-
func (cw *ClickhouseQueryTranslator) parseDateRangeAggregation(dateRange QueryMap) (bucket_aggregations.DateRange, error) {
12-
var err error
13-
var fieldName, format string
14-
15-
if field, exists := dateRange["field"]; exists {
16-
if fieldNameRaw, ok := field.(string); ok {
17-
fieldName = cw.ResolveField(cw.Ctx, fieldNameRaw)
18-
} else {
19-
logger.WarnWithCtx(cw.Ctx).Msgf("field specified for date range aggregation is not a string. Using empty. Querymap: %v", dateRange)
20-
}
21-
} else {
22-
logger.WarnWithCtx(cw.Ctx).Msgf("no field specified for date range aggregation. Using empty. Querymap: %v", dateRange)
23-
}
24-
var ranges []any
25-
var ok bool
26-
if formatRaw, exists := dateRange["format"]; exists {
27-
if formatParsed, ok := formatRaw.(string); ok {
28-
format = formatParsed
29-
} else {
30-
logger.WarnWithCtx(cw.Ctx).Msgf("format specified for date range aggregation is not a string. Using empty. Querymap: %v", dateRange)
31-
}
11+
func (cw *ClickhouseQueryTranslator) parseDateRangeAggregation(aggregation *pancakeAggregationTreeNode, params QueryMap) (err error) {
12+
field := cw.parseFieldField(params, "date_range")
13+
if field == nil {
14+
return fmt.Errorf("no field specified for date range aggregation, params: %v", params)
3215
}
33-
if rangesRaw, exists := dateRange["ranges"]; exists {
34-
if ranges, ok = rangesRaw.([]any); !ok {
35-
logger.WarnWithCtx(cw.Ctx).Msgf("ranges specified for date range aggregation is not an array. Using empty. Querymap: %v", dateRange)
36-
}
37-
} else {
38-
logger.WarnWithCtx(cw.Ctx).Msgf("no ranges specified for date range aggregation. Using empty. Querymap: %v", dateRange)
16+
format := cw.parseStringField(params, "format", "")
17+
ranges, err := cw.parseArrayField(params, "ranges")
18+
if err != nil {
19+
return err
3920
}
21+
4022
intervals := make([]bucket_aggregations.DateTimeInterval, 0, len(ranges))
4123
selectColumnsNr := len(ranges) // we query Clickhouse for every unbounded part of interval (begin and end)
42-
for _, Range := range ranges {
43-
rangeMap := Range.(QueryMap)
44-
var intervalBegin, intervalEnd string
45-
from, exists := rangeMap["from"]
46-
if exists {
47-
if fromRaw, ok := from.(string); ok {
48-
intervalBegin, err = cw.parseDateTimeInClickhouseMathLanguage(fromRaw)
49-
if err != nil {
50-
return bucket_aggregations.DateRange{}, err
51-
}
52-
selectColumnsNr++
53-
} else {
54-
logger.WarnWithCtx(cw.Ctx).Msgf("from specified for date range aggregation is not a string. Querymap: %v "+
55-
"Using default (unbounded).", dateRange)
56-
intervalBegin = bucket_aggregations.UnboundedInterval
24+
for _, rangeRaw := range ranges {
25+
rangeMap, ok := rangeRaw.(QueryMap)
26+
if !ok {
27+
return fmt.Errorf("range is not a map, but %T, range: %v", rangeRaw, rangeRaw)
28+
}
29+
30+
const defaultIntervalBound = bucket_aggregations.UnboundedInterval
31+
intervalBegin := defaultIntervalBound
32+
if from := cw.parseStringField(rangeMap, "from", defaultIntervalBound); from != defaultIntervalBound {
33+
intervalBegin, err = cw.parseDateTimeInClickhouseMathLanguage(from)
34+
if err != nil {
35+
return err
5736
}
58-
} else {
59-
intervalBegin = bucket_aggregations.UnboundedInterval
37+
selectColumnsNr++
6038
}
61-
to, exists := rangeMap["to"]
62-
if exists {
63-
if toRaw, ok := to.(string); ok {
64-
intervalEnd, err = cw.parseDateTimeInClickhouseMathLanguage(toRaw)
65-
if err != nil {
66-
return bucket_aggregations.DateRange{}, err
67-
}
68-
selectColumnsNr++
69-
} else {
70-
logger.WarnWithCtx(cw.Ctx).Msgf("To specified for date range aggregation is not a string. Querymap: %v "+
71-
"Using default (unbounded).", dateRange)
72-
intervalEnd = bucket_aggregations.UnboundedInterval
39+
40+
intervalEnd := bucket_aggregations.UnboundedInterval
41+
if to := cw.parseStringField(rangeMap, "to", defaultIntervalBound); to != defaultIntervalBound {
42+
intervalEnd, err = cw.parseDateTimeInClickhouseMathLanguage(to)
43+
if err != nil {
44+
return err
7345
}
74-
} else {
75-
intervalEnd = bucket_aggregations.UnboundedInterval
46+
selectColumnsNr++
7647
}
7748
intervals = append(intervals, bucket_aggregations.NewDateTimeInterval(intervalBegin, intervalEnd))
7849
}
79-
return bucket_aggregations.NewDateRange(cw.Ctx, fieldName, format, intervals, selectColumnsNr), nil
50+
51+
aggregation.queryType = bucket_aggregations.NewDateRange(cw.Ctx, field, format, intervals, selectColumnsNr)
52+
return nil
8053
}
8154

8255
// parseDateTimeInClickhouseMathLanguage parses dateTime from Clickhouse's format

quesma/queryparser/aggregation_parser.go

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
package queryparser
44

55
import (
6+
"fmt"
67
"quesma/clickhouse"
78
"quesma/logger"
89
"quesma/model"
@@ -175,7 +176,11 @@ func (cw *ClickhouseQueryTranslator) parseTopHits(queryMap QueryMap) (parsedTopH
175176
const defaultSize = 1
176177
size := cw.parseSize(params, defaultSize)
177178

178-
orderBy := cw.parseOrder(params, queryMap, []model.Expr{})
179+
orderBy, err := cw.parseOrder(params, []model.Expr{})
180+
if err != nil {
181+
logger.WarnWithCtx(cw.Ctx).Msgf("error parsing order in top_hits: %v", err)
182+
return
183+
}
179184
if len(orderBy) == 1 && orderBy[0].IsCountDesc() { // we don't need count DESC
180185
orderBy = []model.OrderByExpr{}
181186
}
@@ -287,6 +292,16 @@ func (cw *ClickhouseQueryTranslator) parseStringField(queryMap QueryMap, fieldNa
287292
return defaultValue
288293
}
289294

295+
func (cw *ClickhouseQueryTranslator) parseArrayField(queryMap QueryMap, fieldName string) ([]any, error) {
296+
if valueRaw, exists := queryMap[fieldName]; exists {
297+
if asArray, ok := valueRaw.([]any); ok {
298+
return asArray, nil
299+
}
300+
return nil, fmt.Errorf("%s is not an array, but %T, value: %v", fieldName, valueRaw, valueRaw)
301+
}
302+
return nil, fmt.Errorf("array field '%s' not found in aggregation queryMap: %v", fieldName, queryMap)
303+
}
304+
290305
// parseFieldFieldMaybeScript is basically almost a copy of parseFieldField above, but it also handles a basic script, if "field" is missing.
291306
func (cw *ClickhouseQueryTranslator) parseFieldFieldMaybeScript(shouldBeMap any, aggregationType string) (field model.Expr, isFromScript bool) {
292307
Map, ok := shouldBeMap.(QueryMap)

0 commit comments

Comments
 (0)