Skip to content
This repository was archived by the owner on Nov 7, 2025. It is now read-only.

Commit db1feea

Browse files
authored
Missing bucket results for date_histogram aggregation (#1113)
#1044
1 parent 526e5c1 commit db1feea

File tree

8 files changed

+208
-39
lines changed

8 files changed

+208
-39
lines changed

quesma/model/bucket_aggregations/date_histogram.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ import (
1717
type DateHistogramIntervalType bool
1818

1919
const (
20-
DefaultMinDocCount = -1
2120
DateHistogramFixedInterval DateHistogramIntervalType = true
2221
DateHistogramCalendarInterval DateHistogramIntervalType = false
2322
defaultDateTimeType = clickhouse.DateTime64

quesma/model/bucket_aggregations/histogram.go

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -108,9 +108,14 @@ func (query *HistogramRowsTransformer) Transform(ctx context.Context, rowsFromDB
108108
return postprocessedRows
109109
}
110110

111-
// we're sure key is float64
111+
// we're sure key is either float64, or in rare cases nil
112112
func (query *HistogramRowsTransformer) getKeyFloat64(row model.QueryResultRow) (float64, bool) {
113-
return row.Cols[len(row.Cols)-2].Value.(float64), true
113+
switch val := row.Cols[len(row.Cols)-2].Value.(type) {
114+
case float64:
115+
return val, true
116+
default:
117+
return -1, false
118+
}
114119
}
115120

116121
// we don't know the type

quesma/queryparser/aggregation_parser.go

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ import (
77
"quesma/clickhouse"
88
"quesma/logger"
99
"quesma/model"
10-
"quesma/model/bucket_aggregations"
1110
"regexp"
1211
"slices"
1312
"strconv"
@@ -366,22 +365,6 @@ func (cw *ClickhouseQueryTranslator) parseFieldFromScriptField(queryMap QueryMap
366365
return
367366
}
368367

369-
func (cw *ClickhouseQueryTranslator) parseMinDocCount(queryMap QueryMap) int {
370-
if minDocCountRaw, exists := queryMap["min_doc_count"]; exists {
371-
if minDocCount, ok := minDocCountRaw.(float64); ok {
372-
asInt := int(minDocCount)
373-
if asInt != 0 && asInt != 1 {
374-
logger.WarnWithCtx(cw.Ctx).Msgf("min_doc_count is not 0 or 1, but %d. Not really supported", asInt)
375-
}
376-
return asInt
377-
} else {
378-
logger.WarnWithCtx(cw.Ctx).Msgf("min_doc_count is not a number, but %T, value: %v. Using default value: %d",
379-
minDocCountRaw, minDocCountRaw, bucket_aggregations.DefaultMinDocCount)
380-
}
381-
}
382-
return bucket_aggregations.DefaultMinDocCount
383-
}
384-
385368
// quoteArray returns a new array with the same elements, but quoted
386369
func quoteArray(array []string) []string {
387370
quotedArray := make([]string, 0, len(array))

quesma/queryparser/pancake_aggregation_parser_buckets.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -470,6 +470,23 @@ func (cw *ClickhouseQueryTranslator) parseOrder(params QueryMap, fieldExpression
470470
return fullOrderBy, nil
471471
}
472472

473+
func (cw *ClickhouseQueryTranslator) parseMinDocCount(queryMap QueryMap) int {
474+
const defaultMinDocCount = 0
475+
if minDocCountRaw, exists := queryMap["min_doc_count"]; exists {
476+
if minDocCount, ok := minDocCountRaw.(float64); ok {
477+
asInt := int(minDocCount)
478+
if asInt != 0 && asInt != 1 {
479+
logger.WarnWithCtx(cw.Ctx).Msgf("min_doc_count is not 0 or 1, but %d. Not really supported", asInt)
480+
}
481+
return asInt
482+
} else {
483+
logger.WarnWithCtx(cw.Ctx).Msgf("min_doc_count is not a number, but %T, value: %v. Using default value: %d",
484+
minDocCountRaw, minDocCountRaw, defaultMinDocCount)
485+
}
486+
}
487+
return defaultMinDocCount
488+
}
489+
473490
// addMissingParameterIfPresent parses 'missing' parameter from 'params'.
474491
func (cw *ClickhouseQueryTranslator) addMissingParameterIfPresent(field model.Expr, params QueryMap) (updatedField model.Expr, didWeAddMissing bool) {
475492
if params["missing"] == nil {

quesma/testdata/aggregation_requests.go

Lines changed: 23 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -329,10 +329,6 @@ var AggregationTests = []AggregationTestCase{
329329
"aggs": {
330330
"1": {
331331
"date_histogram": {
332-
"extended_bounds": {
333-
"max": 1707486436029,
334-
"min": 1706881636029
335-
},
336332
"field": "timestamp",
337333
"fixed_interval": "3h",
338334
"time_zone": "Europe/Warsaw"
@@ -2631,7 +2627,8 @@ var AggregationTests = []AggregationTestCase{
26312627
"series": {
26322628
"date_histogram": {
26332629
"field": "@timestamp",
2634-
"fixed_interval": "60s"
2630+
"fixed_interval": "60s",
2631+
"min_doc_count": 12
26352632
}
26362633
}
26372634
}
@@ -2869,10 +2866,6 @@ var AggregationTests = []AggregationTestCase{
28692866
},
28702867
"date_histogram": {
28712868
"calendar_interval": "1d",
2872-
"extended_bounds": {
2873-
"max": 1708969256351,
2874-
"min": 1708364456351
2875-
},
28762869
"field": "order_date"
28772870
}
28782871
}
@@ -3106,7 +3099,7 @@ var AggregationTests = []AggregationTestCase{
31063099
"fixed_interval": "12h",
31073100
"extended_bounds": {
31083101
"min": 1708627654149,
3109-
"max": 1709232454149
3102+
"max": 1708782454149
31103103
}
31113104
},
31123105
"aggs": {
@@ -3215,6 +3208,26 @@ var AggregationTests = []AggregationTestCase{
32153208
"doc_count": 83,
32163209
"key": 1708689600000,
32173210
"key_as_string": "2024-02-23T12:00:00.000"
3211+
},
3212+
{
3213+
"1-bucket": {
3214+
"1-metric": {
3215+
"value": null
3216+
}
3217+
},
3218+
"doc_count": 0,
3219+
"key": 1708732800000,
3220+
"key_as_string": "2024-02-24T00:00:00.000"
3221+
},
3222+
{
3223+
"1-bucket": {
3224+
"1-metric": {
3225+
"value": null
3226+
}
3227+
},
3228+
"doc_count": 0,
3229+
"key": 1708776000000,
3230+
"key_as_string": "2024-02-24T12:00:00.000"
32183231
}
32193232
]
32203233
}

quesma/testdata/aggregation_requests_2.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3377,6 +3377,11 @@ var AggregationTests2 = []AggregationTestCase{
33773377
"key": 1706021640000,
33783378
"key_as_string": "2024-01-23T14:54:00.000"
33793379
},
3380+
{
3381+
"doc_count": 0,
3382+
"key": 1706021670000,
3383+
"key_as_string": "2024-01-23T14:54:30.000"
3384+
},
33803385
{
33813386
"doc_count": 17,
33823387
"key": 1706021700000,
@@ -3632,11 +3637,21 @@ var AggregationTests2 = []AggregationTestCase{
36323637
"sum_other_doc_count": 1917
36333638
}
36343639
},
3640+
{
3641+
"doc_count": 0,
3642+
"key": 1706021670000,
3643+
"key_as_string": "2024-01-23T14:54:30.000"
3644+
},
36353645
{
36363646
"doc_count": 17,
36373647
"key": 1706021700000,
36383648
"key_as_string": "2024-01-23T14:55:00.000"
36393649
},
3650+
{
3651+
"doc_count": 0,
3652+
"key": 1706021730000,
3653+
"key_as_string": "2024-01-23T14:55:30.000"
3654+
},
36403655
{
36413656
"doc_count": 15,
36423657
"key": 1706021760000,

quesma/testdata/dates.go

Lines changed: 131 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -323,7 +323,7 @@ var AggregationTestsWithDates = []AggregationTestCase{
323323
("@timestamp", 'Europe/Warsaw'))*1000) / 10000) AS "aggr__timeseries__key_0"
324324
ORDER BY "aggr__timeseries__key_0" ASC`,
325325
},
326-
{ // [1]
326+
{ // [2]
327327
TestName: "extended_bounds post keys (timezone calculations most tricky to get right)",
328328
QueryRequestJson: `
329329
{
@@ -504,4 +504,134 @@ var AggregationTestsWithDates = []AggregationTestCase{
504504
("@timestamp", 'Europe/Warsaw'))*1000) / 10000) AS "aggr__timeseries__key_0"
505505
ORDER BY "aggr__timeseries__key_0" ASC`,
506506
},
507+
{ // [3]
508+
TestName: "empty results, we still should add empty buckets, because of the extended_bounds and min_doc_count defaulting to 0",
509+
QueryRequestJson: `
510+
{
511+
"_source": {
512+
"excludes": []
513+
},
514+
"aggs": {
515+
"0": {
516+
"aggs": {
517+
"1": {
518+
"sum": {
519+
"field": "body_bytes_sent"
520+
}
521+
}
522+
},
523+
"date_histogram": {
524+
"calendar_interval": "1d",
525+
"extended_bounds": {
526+
"min": 1732327903466,
527+
"max": 1732713503466
528+
},
529+
"field": "@timestamp",
530+
"time_zone": "Europe/Warsaw"
531+
}
532+
}
533+
},
534+
"query": {
535+
"bool": {
536+
"filter": [
537+
{
538+
"range": {
539+
"@timestamp": {
540+
"format": "strict_date_optional_time",
541+
"gte": "2009-11-27T13:18:23.466Z",
542+
"lte": "2024-11-27T13:18:23.466Z"
543+
}
544+
}
545+
}
546+
],
547+
"must": [],
548+
"must_not": [],
549+
"should": []
550+
}
551+
},
552+
"runtime_mappings": {},
553+
"script_fields": {},
554+
"size": 0,
555+
"stored_fields": [
556+
"*"
557+
],
558+
"track_total_hits": true
559+
}`,
560+
ExpectedResponse: `
561+
{
562+
"completion_time_in_millis": 1707486436398,
563+
"expiration_time_in_millis": 1707486496397,
564+
"is_partial": false,
565+
"is_running": false,
566+
"response": {
567+
"_shards": {
568+
"failed": 0,
569+
"skipped": 0,
570+
"successful": 1,
571+
"total": 1
572+
},
573+
"aggregations": {
574+
"0": {
575+
"buckets": [
576+
{
577+
"doc_count": 0,
578+
"key": 1732402800000,
579+
"key_as_string": "2024-11-23T23:00:00.000",
580+
"1": {
581+
"value": null
582+
}
583+
},
584+
{
585+
"doc_count": 0,
586+
"key": 1732489200000,
587+
"key_as_string": "2024-11-24T23:00:00.000",
588+
"1": {
589+
"value": null
590+
}
591+
},
592+
{
593+
"doc_count": 0,
594+
"key": 1732575600000,
595+
"key_as_string": "2024-11-25T23:00:00.000",
596+
"1": {
597+
"value": null
598+
}
599+
},
600+
{
601+
"doc_count": 0,
602+
"key": 1732662000000,
603+
"key_as_string": "2024-11-26T23:00:00.000",
604+
"1": {
605+
"value": null
606+
}
607+
}
608+
]
609+
}
610+
},
611+
"hits": {
612+
"hits": [],
613+
"max_score": null,
614+
"total": {
615+
"relation": "eq",
616+
"value": 2200
617+
}
618+
},
619+
"timed_out": false,
620+
"took": 1
621+
},
622+
"start_time_in_millis": 1707486436397
623+
}`,
624+
ExpectedPancakeResults: []model.QueryResultRow{},
625+
ExpectedPancakeSQL: `
626+
SELECT toInt64((toUnixTimestamp64Milli("@timestamp")+timeZoneOffset(toTimezone(
627+
"@timestamp", 'Europe/Warsaw'))*1000) / 86400000) AS "aggr__0__key_0",
628+
count(*) AS "aggr__0__count",
629+
sumOrNull("body_bytes_sent") AS "metric__0__1_col_0"
630+
FROM __quesma_table_name
631+
WHERE ("@timestamp">=fromUnixTimestamp64Milli(1259327903466) AND "@timestamp"<=
632+
fromUnixTimestamp64Milli(1732713503466))
633+
GROUP BY toInt64((toUnixTimestamp64Milli("@timestamp")+timeZoneOffset(toTimezone
634+
("@timestamp", 'Europe/Warsaw'))*1000) / 86400000) AS "aggr__0__key_0"
635+
ORDER BY "aggr__0__key_0" ASC`,
636+
},
507637
}

quesma/testdata/kibana-visualize/aggregation_requests.go

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -46,10 +46,6 @@ var AggregationTests = []testdata.AggregationTestCase{
4646
}
4747
},
4848
"date_histogram": {
49-
"extended_bounds": {
50-
"max": 1716812096627,
51-
"min": 1716811196627
52-
},
5349
"field": "@timestamp",
5450
"fixed_interval": "30s",
5551
"time_zone": "Europe/Warsaw"
@@ -141,6 +137,15 @@ var AggregationTests = []testdata.AggregationTestCase{
141137
"key": 1716827010000,
142138
"key_as_string": "2024-05-27T16:23:30.000"
143139
},
140+
{
141+
"doc_count": 0,
142+
"key": 1716827040000,
143+
"key_as_string": "2024-05-27T16:24:00.000",
144+
"1": {
145+
"buckets": [],
146+
"sum_other_doc_count": 0
147+
}
148+
},
144149
{
145150
"1": {
146151
"buckets": [
@@ -267,10 +272,6 @@ var AggregationTests = []testdata.AggregationTestCase{
267272
"aggs": {
268273
"1": {
269274
"date_histogram": {
270-
"extended_bounds": {
271-
"max": 1716812073493,
272-
"min": 1716811173493
273-
},
274275
"field": "@timestamp",
275276
"fixed_interval": "30s"
276277
}
@@ -345,6 +346,11 @@ var AggregationTests = []testdata.AggregationTestCase{
345346
"key": 1716834450000,
346347
"key_as_string": "2024-05-27T18:27:30.000"
347348
},
349+
{
350+
"doc_count": 0,
351+
"key": 1716834480000,
352+
"key_as_string": "2024-05-27T18:28:00.000"
353+
},
348354
{
349355
"doc_count": 2,
350356
"key": 1716834510000,
@@ -454,6 +460,7 @@ var AggregationTests = []testdata.AggregationTestCase{
454460
"max": 1716834478178,
455461
"min": 1716833578178
456462
},
463+
"min_doc_count": 1,
457464
"field": "@timestamp",
458465
"fixed_interval": "30s"
459466
}

0 commit comments

Comments
 (0)