Skip to content
This repository was archived by the owner on Nov 7, 2025. It is now read-only.

Commit 89892c2

Browse files
authored
date_histogram: fix adding empty in-between rows (#1158)
We had a problem when adding empty rows for `>= month` `calendar_interval`, as we always added fixed amount of time, which obviously isn't right for `month/quarter/year` interval. This PR fixes that but only for `in-between` rows. Tried also `pre` and `post`, but that wasn't as trivial, because our `date_histogram` is the most complex aggregation, I think, and its code is a bit of a mess. I'll finish that in next PR.
1 parent bdef811 commit 89892c2

File tree

3 files changed

+230
-1
lines changed

3 files changed

+230
-1
lines changed

quesma/model/bucket_aggregations/date_histogram.go

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -197,12 +197,16 @@ func (query *DateHistogram) generateSQLForCalendarInterval() model.Expr {
197197
query.intervalType = DateHistogramFixedInterval
198198
return query.generateSQLForFixedInterval()
199199
case "week", "1w":
200+
query.interval = "1w"
200201
return exprForBiggerIntervals("toStartOfWeek")
201202
case "month", "1M":
203+
query.interval = "1M"
202204
return exprForBiggerIntervals("toStartOfMonth")
203205
case "quarter", "1q":
206+
query.interval = "1q"
204207
return exprForBiggerIntervals("toStartOfQuarter")
205208
case "year", "1y":
209+
query.interval = "1y"
206210
return exprForBiggerIntervals("toStartOfYear")
207211
}
208212

@@ -315,7 +319,7 @@ func (qt *DateHistogramRowsTransformer) Transform(ctx context.Context, rowsFromD
315319
// ugly, but works, will do for now
316320
doWeDivide := (currentKey/qt.getKey(rowsFromDB[i])) >= 100 || (float64(currentKey)/float64(qt.getKey(rowsFromDB[i]))) <= 0.01
317321

318-
for midKey := lastKey + qt.differenceBetweenTwoNextKeys; midKey < currentKey && emptyRowsAdded < maxEmptyBucketsAdded; midKey += qt.differenceBetweenTwoNextKeys {
322+
for midKey := qt.nextKey(lastKey); midKey < currentKey && emptyRowsAdded < maxEmptyBucketsAdded; midKey = qt.nextKey(midKey) {
319323
midRow := rowsFromDB[i-1].Copy()
320324
divideBy := int64(1)
321325
if doWeDivide {
@@ -398,3 +402,30 @@ func (qt *DateHistogramRowsTransformer) Transform(ctx context.Context, rowsFromD
398402
func (qt *DateHistogramRowsTransformer) getKey(row model.QueryResultRow) int64 {
399403
return row.Cols[len(row.Cols)-2].Value.(int64)
400404
}
405+
406+
func (qt *DateHistogramRowsTransformer) nextKey(key int64) int64 {
407+
if qt.dateHistogram.intervalType == DateHistogramFixedInterval {
408+
return key + qt.differenceBetweenTwoNextKeys
409+
}
410+
if qt.dateHistogram.interval != "1M" && qt.dateHistogram.interval != "1q" && qt.dateHistogram.interval != "1y" {
411+
// intervals < month are the same as fixed_interval here
412+
return key + qt.differenceBetweenTwoNextKeys
413+
}
414+
415+
addNMonths := func(key int64, N int) int64 {
416+
ts := time.UnixMilli(key).UTC()
417+
// adding 2 days below isn't exactly necessary, it's only a quick way to make sure we're in the same month, even for weird timezones
418+
return ts.AddDate(0, N, 2).UnixMilli() - ts.AddDate(0, 0, 2).UnixMilli()
419+
}
420+
var monthsNr int
421+
switch qt.dateHistogram.interval {
422+
case "1M":
423+
monthsNr = 1
424+
case "1q":
425+
monthsNr = 3
426+
case "1y":
427+
monthsNr = 12
428+
}
429+
deltaInMs := addNMonths(key, monthsNr)
430+
return key + deltaInMs
431+
}

quesma/testdata/dates.go

Lines changed: 194 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -634,4 +634,198 @@ var AggregationTestsWithDates = []AggregationTestCase{
634634
("@timestamp", 'Europe/Warsaw'))*1000) / 86400000) AS "aggr__0__key_0"
635635
ORDER BY "aggr__0__key_0" ASC`,
636636
},
637+
{ // [4]
638+
TestName: "date_histogram add in-between rows, calendar_interval: >= month (regression test)",
639+
QueryRequestJson: `
640+
{
641+
"size": 0,
642+
"aggs": {
643+
"sales_per_month": {
644+
"date_histogram": {
645+
"field": "date",
646+
"calendar_interval": "month"
647+
}
648+
}
649+
}
650+
}`,
651+
ExpectedResponse: `
652+
{
653+
"aggregations": {
654+
"sales_per_month": {
655+
"buckets": [
656+
{
657+
"key_as_string": "2015-01-01T00:00:00.000",
658+
"key": 1420070400000,
659+
"doc_count": 3
660+
},
661+
{
662+
"key_as_string": "2015-02-01T00:00:00.000",
663+
"key": 1422748800000,
664+
"doc_count": 0
665+
},
666+
{
667+
"key_as_string": "2015-03-01T00:00:00.000",
668+
"key": 1425168000000,
669+
"doc_count": 0
670+
},
671+
{
672+
"key_as_string": "2015-04-01T00:00:00.000",
673+
"key": 1427846400000,
674+
"doc_count": 0
675+
},
676+
{
677+
"key_as_string": "2015-05-01T00:00:00.000",
678+
"key": 1430438400000,
679+
"doc_count": 0
680+
},
681+
{
682+
"key_as_string": "2015-06-01T00:00:00.000",
683+
"key": 1433116800000,
684+
"doc_count": 0
685+
},
686+
{
687+
"key_as_string": "2015-07-01T00:00:00.000",
688+
"key": 1435708800000,
689+
"doc_count": 2
690+
}
691+
]
692+
}
693+
}
694+
}`,
695+
ExpectedPancakeResults: []model.QueryResultRow{
696+
{Cols: []model.QueryResultCol{
697+
model.NewQueryResultCol("aggr__sales_per_month__key_0", int64(1420070400000)),
698+
model.NewQueryResultCol("aggr__sales_per_month__count", int64(3)),
699+
}},
700+
{Cols: []model.QueryResultCol{
701+
model.NewQueryResultCol("aggr__sales_per_month__key_0", int64(1422748800000)),
702+
model.NewQueryResultCol("aggr__sales_per_month__count", int64(0)),
703+
}},
704+
{Cols: []model.QueryResultCol{
705+
model.NewQueryResultCol("aggr__sales_per_month__key_0", int64(1435708800000)),
706+
model.NewQueryResultCol("aggr__sales_per_month__count", int64(2)),
707+
}},
708+
},
709+
ExpectedPancakeSQL: `
710+
SELECT toInt64(toUnixTimestamp(toStartOfMonth(toTimezone("date", 'UTC'))))*1000
711+
AS "aggr__sales_per_month__key_0", count(*) AS "aggr__sales_per_month__count"
712+
FROM __quesma_table_name
713+
GROUP BY toInt64(toUnixTimestamp(toStartOfMonth(toTimezone("date", 'UTC'))))*
714+
1000 AS "aggr__sales_per_month__key_0"
715+
ORDER BY "aggr__sales_per_month__key_0" ASC`,
716+
},
717+
{ // [5]
718+
TestName: "date_histogram add in-between rows, calendar_interval: >= month (regression test)",
719+
QueryRequestJson: `
720+
{
721+
"size": 0,
722+
"aggs": {
723+
"sales_per_quarter": {
724+
"date_histogram": {
725+
"field": "date",
726+
"calendar_interval": "quarter"
727+
}
728+
}
729+
}
730+
}`,
731+
ExpectedResponse: `
732+
{
733+
"aggregations": {
734+
"sales_per_quarter": {
735+
"buckets": [
736+
{
737+
"key_as_string": "2015-01-01T00:00:00.000",
738+
"key": 1420070400000,
739+
"doc_count": 3
740+
},
741+
{
742+
"key_as_string": "2015-04-01T00:00:00.000",
743+
"key": 1427846400000,
744+
"doc_count": 0
745+
},
746+
{
747+
"key_as_string": "2015-07-01T00:00:00.000",
748+
"key": 1435708800000,
749+
"doc_count": 2
750+
}
751+
]
752+
}
753+
}
754+
}`,
755+
ExpectedPancakeResults: []model.QueryResultRow{
756+
{Cols: []model.QueryResultCol{
757+
model.NewQueryResultCol("aggr__sales_per_quarter__key_0", int64(1420070400000)),
758+
model.NewQueryResultCol("aggr__sales_per_quarter__count", int64(3)),
759+
}},
760+
{Cols: []model.QueryResultCol{
761+
model.NewQueryResultCol("aggr__sales_per_quarter__key_0", int64(1435708800000)),
762+
model.NewQueryResultCol("aggr__sales_per_quarter__count", int64(2)),
763+
}},
764+
},
765+
ExpectedPancakeSQL: `
766+
SELECT toInt64(toUnixTimestamp(toStartOfQuarter(toTimezone("date", 'UTC'))))*
767+
1000 AS "aggr__sales_per_quarter__key_0",
768+
count(*) AS "aggr__sales_per_quarter__count"
769+
FROM __quesma_table_name
770+
GROUP BY toInt64(toUnixTimestamp(toStartOfQuarter(toTimezone("date", 'UTC'))))*
771+
1000 AS "aggr__sales_per_quarter__key_0"
772+
ORDER BY "aggr__sales_per_quarter__key_0" ASC`,
773+
},
774+
{ // [6]
775+
TestName: "date_histogram add in-between rows, calendar_interval: >= month (regression test)",
776+
QueryRequestJson: `
777+
{
778+
"size": 0,
779+
"aggs": {
780+
"sales_per_year": {
781+
"date_histogram": {
782+
"field": "date",
783+
"calendar_interval": "year"
784+
}
785+
}
786+
}
787+
}`,
788+
ExpectedResponse: `
789+
{
790+
"aggregations": {
791+
"sales_per_year": {
792+
"buckets": [
793+
{
794+
"key_as_string": "2015-01-01T00:00:00.000",
795+
"key": 1420070400000,
796+
"doc_count": 3
797+
},
798+
{
799+
"key_as_string": "2016-01-01T00:00:00.000",
800+
"key": 1451606400000,
801+
"doc_count": 0
802+
},
803+
{
804+
"key_as_string": "2017-01-01T00:00:00.000",
805+
"key": 1483228800000,
806+
"doc_count": 2
807+
}
808+
]
809+
}
810+
}
811+
}`,
812+
ExpectedPancakeResults: []model.QueryResultRow{
813+
{Cols: []model.QueryResultCol{
814+
model.NewQueryResultCol("aggr__sales_per_year__key_0", int64(1420070400000)),
815+
model.NewQueryResultCol("aggr__sales_per_year__count", int64(3)),
816+
}},
817+
{Cols: []model.QueryResultCol{
818+
model.NewQueryResultCol("aggr__sales_per_year__key_0", int64(1483228800000)),
819+
model.NewQueryResultCol("aggr__sales_per_year__count", int64(2)),
820+
}},
821+
},
822+
ExpectedPancakeSQL: `
823+
SELECT toInt64(toUnixTimestamp(toStartOfYear(toTimezone("date", 'UTC'))))*1000
824+
AS "aggr__sales_per_year__key_0",
825+
count(*) AS "aggr__sales_per_year__count"
826+
FROM __quesma_table_name
827+
GROUP BY toInt64(toUnixTimestamp(toStartOfYear(toTimezone("date", 'UTC'))))*1000
828+
AS "aggr__sales_per_year__key_0"
829+
ORDER BY "aggr__sales_per_year__key_0" ASC`,
830+
},
637831
}

quesma/util/intervals.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ func ParseInterval(fixedInterval string) (time.Duration, error) {
2222
return time.Hour * 24 * 7, nil
2323
case "month":
2424
return time.Hour * 24 * 30, nil
25+
case "quarter":
26+
return time.Hour * 24 * 30 * 3, nil
2527
case "year":
2628
return time.Hour * 24 * 365, nil
2729
}
@@ -33,6 +35,8 @@ func ParseInterval(fixedInterval string) (time.Duration, error) {
3335
unit = 7 * 24 * time.Hour
3436
case strings.HasSuffix(fixedInterval, "M"):
3537
unit = 30 * 24 * time.Hour
38+
case strings.HasSuffix(fixedInterval, "q"):
39+
unit = 3 * 30 * 24 * time.Hour
3640
case strings.HasSuffix(fixedInterval, "y"):
3741
unit = 365 * 24 * time.Hour
3842
default:

0 commit comments

Comments
 (0)