Skip to content
This repository was archived by the owner on Nov 7, 2025. It is now read-only.

Commit 6b71cac

Browse files
authored
[sample_logs] Treat leaf filter as metric, not bucket aggregation (#1317)
Uncomments 2 tests and fixes 2 last TODO requests from the `sample_logs` dashboard. Leaf filter is basically a metric, even a very simple one, response always looks like this: `{"doc_count": 12}`. It seems reasonable to me to treat it as a metric. Works, but maybe let me try to refactor the code a bit. Now it's a hackish solution. Edit: I changed my mind. If we release tomorrow (Wednesday) morning, why not do it with a few more panels on sample dashboards working. Code doesn't look that bad to me and we can polish it afterwards. That's the version I showed today - almost everything works on all samples.
1 parent e9d2db5 commit 6b71cac

File tree

6 files changed

+162
-59
lines changed

6 files changed

+162
-59
lines changed

platform/parsers/elastic_query_dsl/pancake_sql_query_generation_test.go

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -52,13 +52,6 @@ func TestPancakeQueryGeneration(t *testing.T) {
5252
if test.TestName == "TODO Top products this/last week(file:kibana-sample-data-ecommerce,nr:9)" {
5353
t.Skip("works IRL, need to update test's schema. It's already WIP https://github.com/QuesmaOrg/quesma/pull/1255. Let's wait for merge.")
5454
}
55-
// sample_logs
56-
if test.TestName == "Table gz, css, zip, etc.(file:kibana-sample-data-logs,nr:6)" {
57-
t.Skip()
58-
}
59-
if test.TestName == "Errors by host(file:kibana-sample-data-logs,nr:7)" {
60-
t.Skip()
61-
}
6255

6356
if filters(test.TestName) {
6457
t.Skip("Fix filters")

platform/parsers/elastic_query_dsl/pancake_transformer.go

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -157,19 +157,32 @@ func (a *pancakeTransformer) createLayer(previousAggrNames []string, childAggreg
157157
return childAggregations[i].name < childAggregations[j].name
158158
})
159159

160+
metrics := make([]*pancakeModelMetricAggregation, 0)
161+
160162
for _, childAgg := range childAggregations {
161163
if childAgg.queryType == nil {
162164
return nil, fmt.Errorf("query type is nil in createLayer")
163165
}
164166
switch childAgg.queryType.AggregationType() {
165167
case model.MetricsAggregation:
166-
metrics, err := a.metricAggregationTreeNodeToModel(previousAggrNames, childAgg)
168+
metric, err := a.metricAggregationTreeNodeToModel(previousAggrNames, childAgg)
167169
if err != nil {
168170
return nil, err
169171
}
170-
result[0].layer.currentMetricAggregations = append(result[0].layer.currentMetricAggregations, metrics)
172+
metrics = append(metrics, metric)
171173

172174
case model.BucketAggregation:
175+
filter, isFilter := childAgg.queryType.(bucket_aggregations.FilterAgg)
176+
if isFilter && len(childAgg.children) == 0 {
177+
childAgg.selectedColumns = append(childAgg.selectedColumns, model.NewFunction("countIf", filter.WhereClause))
178+
metric, err := a.metricAggregationTreeNodeToModel(previousAggrNames, childAgg)
179+
if err != nil {
180+
return nil, err
181+
}
182+
metrics = append(metrics, metric)
183+
break
184+
}
185+
173186
bucket, err := a.bucketAggregationToLayer(previousAggrNames, childAgg)
174187
if err != nil {
175188
return nil, err
@@ -202,6 +215,9 @@ func (a *pancakeTransformer) createLayer(previousAggrNames []string, childAggreg
202215
childAgg.name, childAgg.queryType.AggregationType().String())
203216
}
204217
}
218+
for _, resultLayer := range result {
219+
resultLayer.layer.currentMetricAggregations = metrics
220+
}
205221
return result, nil
206222
}
207223

@@ -255,7 +271,7 @@ func (a *pancakeTransformer) connectPipelineAggregations(layers []*pancakeModelL
255271
}
256272

257273
parentBucketLayerIdx := i + layerIdx
258-
if parentBucketLayerIdx > 0 {
274+
if parentBucketLayerIdx > 0 && layers[parentBucketLayerIdx-1].nextBucketAggregation != nil {
259275
pipeline.queryType.SetParentBucketAggregation(layers[parentBucketLayerIdx-1].nextBucketAggregation.queryType)
260276
}
261277
parentBucketLayer.childrenPipelineAggregations = append(parentBucketLayer.childrenPipelineAggregations, pipeline)
@@ -276,7 +292,16 @@ func (a *pancakeTransformer) findParentBucketLayer(layers []*pancakeModelLayer,
276292
layer := layers[0]
277293
for i, aggrName := range pipeline.GetPathToParent() {
278294
layer = layers[i]
279-
if layer.nextBucketAggregation == nil || layer.nextBucketAggregation.name != aggrName {
295+
asBucket := layer.nextBucketAggregation != nil && layer.nextBucketAggregation.name == aggrName
296+
asMetric := false
297+
for _, metric := range layer.currentMetricAggregations {
298+
if metric.name == aggrName {
299+
asMetric = true
300+
break
301+
}
302+
}
303+
304+
if !asBucket && !asMetric {
280305
return nil, -1, fmt.Errorf("could not find parent bucket layer")
281306
}
282307
}

platform/testdata/aggregation_requests.go

Lines changed: 101 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -288,29 +288,29 @@ var AggregationTests = []AggregationTestCase{
288288
model.NewQueryResultCol("aggr__0__parent_count", 46),
289289
model.NewQueryResultCol("aggr__0__key_0", "Abu Dhabi"),
290290
model.NewQueryResultCol("aggr__0__count", uint64(23)),
291-
model.NewQueryResultCol("aggr__0__1-bucket__count", 7),
291+
model.NewQueryResultCol("metric__0__1-bucket_col_0", 7),
292292
model.NewQueryResultCol("metric__0__3-bucket_col_0", 3),
293293
}},
294294
{Cols: []model.QueryResultCol{
295295
model.NewQueryResultCol("aggr__0__parent_count", 46),
296296
model.NewQueryResultCol("aggr__0__key_0", "Adelaide"),
297297
model.NewQueryResultCol("aggr__0__count", uint64(20)),
298-
model.NewQueryResultCol("aggr__0__1-bucket__count", 3),
298+
model.NewQueryResultCol("metric__0__1-bucket_col_0", 3),
299299
model.NewQueryResultCol("metric__0__3-bucket_col_0", 2),
300300
}},
301301
{Cols: []model.QueryResultCol{
302302
model.NewQueryResultCol("aggr__0__parent_count", 46),
303303
model.NewQueryResultCol("aggr__0__key_0", "Albuquerque"),
304304
model.NewQueryResultCol("aggr__0__count", uint64(3)),
305-
model.NewQueryResultCol("aggr__0__1-bucket__count", 0),
305+
model.NewQueryResultCol("metric__0__1-bucket_col_0", 0),
306306
model.NewQueryResultCol("metric__0__3-bucket_col_0", 2),
307307
}},
308308
},
309309
ExpectedPancakeSQL: `
310310
SELECT sum(count(*)) OVER () AS "aggr__0__parent_count",
311311
"OriginCityName" AS "aggr__0__key_0", count(*) AS "aggr__0__count",
312-
countIf("Cancelled"==true) AS "metric__0__3-bucket_col_0",
313-
countIf("FlightDelay"==true) AS "aggr__0__1-bucket__count"
312+
countIf("FlightDelay"==true) AS "metric__0__1-bucket_col_0",
313+
countIf("Cancelled"==true) AS "metric__0__3-bucket_col_0"
314314
FROM ` + TableName + `
315315
WHERE ("timestamp">=fromUnixTimestamp64Milli(1706881636029) AND "timestamp"<=fromUnixTimestamp64Milli(1707486436029))
316316
GROUP BY "OriginCityName" AS "aggr__0__key_0"
@@ -857,11 +857,11 @@ var AggregationTests = []AggregationTestCase{
857857
}`,
858858
ExpectedPancakeResults: []model.QueryResultRow{
859859
{Cols: []model.QueryResultCol{
860-
model.NewQueryResultCol("aggr__0-bucket__count", uint64(553)),
860+
model.NewQueryResultCol("metric__0-bucket_col_0", uint64(553)),
861861
}},
862862
},
863863
ExpectedPancakeSQL: `
864-
SELECT countIf("FlightDelay"==true) AS "aggr__0-bucket__count"
864+
SELECT countIf("FlightDelay"==true) AS "metric__0-bucket_col_0"
865865
FROM ` + TableName + `
866866
WHERE ("timestamp">=fromUnixTimestamp64Milli(1706881636029) AND "timestamp"<=fromUnixTimestamp64Milli(1707486436029))`,
867867
},
@@ -3918,7 +3918,7 @@ var AggregationTests = []AggregationTestCase{
39183918
}},
39193919
},
39203920
ExpectedAdditionalPancakeResults: [][]model.QueryResultRow{
3921-
{
3921+
{ // not all columns, but it doesn't matter, we don't need them in this testcase, it seems
39223922
{Cols: []model.QueryResultCol{
39233923
model.NewQueryResultCol("aggr__sample__count", int64(1634)),
39243924
model.NewQueryResultCol("aggr__sample__bytes_gauge_top__parent_count", int64(1634)),
@@ -4036,22 +4036,104 @@ var AggregationTests = []AggregationTestCase{
40364036
FROM (
40374037
SELECT "bytes_gauge"
40384038
FROM __quesma_table_name
4039-
WHERE ("timestamp">=fromUnixTimestamp64Milli(1709932426749) AND "timestamp"<=fromUnixTimestamp64Milli(1711228426749))
4039+
WHERE ("timestamp">=fromUnixTimestamp64Milli(1709932426749) AND "timestamp"<=
4040+
fromUnixTimestamp64Milli(1711228426749))
40404041
LIMIT 20000)`,
4041-
ExpectedAdditionalPancakeSQLs: []string{
4042-
`SELECT sum(count(*)) OVER () AS "aggr__sample__count",
4042+
ExpectedAdditionalPancakeSQLs: []string{`
4043+
SELECT sum(count(*)) OVER () AS "aggr__sample__count",
4044+
quantilesMerge(0.050000)(quantilesState(0.050000)("bytes_gauge")) OVER () AS
4045+
"metric__sample__bytes_gauge_percentiles_col_0",
4046+
quantilesMerge(0.100000)(quantilesState(0.100000)("bytes_gauge")) OVER () AS
4047+
"metric__sample__bytes_gauge_percentiles_col_1",
4048+
quantilesMerge(0.150000)(quantilesState(0.150000)("bytes_gauge")) OVER () AS
4049+
"metric__sample__bytes_gauge_percentiles_col_2",
4050+
quantilesMerge(0.200000)(quantilesState(0.200000)("bytes_gauge")) OVER () AS
4051+
"metric__sample__bytes_gauge_percentiles_col_3",
4052+
quantilesMerge(0.250000)(quantilesState(0.250000)("bytes_gauge")) OVER () AS
4053+
"metric__sample__bytes_gauge_percentiles_col_4",
4054+
quantilesMerge(0.300000)(quantilesState(0.300000)("bytes_gauge")) OVER () AS
4055+
"metric__sample__bytes_gauge_percentiles_col_5",
4056+
quantilesMerge(0.350000)(quantilesState(0.350000)("bytes_gauge")) OVER () AS
4057+
"metric__sample__bytes_gauge_percentiles_col_6",
4058+
quantilesMerge(0.400000)(quantilesState(0.400000)("bytes_gauge")) OVER () AS
4059+
"metric__sample__bytes_gauge_percentiles_col_7",
4060+
quantilesMerge(0.450000)(quantilesState(0.450000)("bytes_gauge")) OVER () AS
4061+
"metric__sample__bytes_gauge_percentiles_col_8",
4062+
quantilesMerge(0.500000)(quantilesState(0.500000)("bytes_gauge")) OVER () AS
4063+
"metric__sample__bytes_gauge_percentiles_col_9",
4064+
quantilesMerge(0.550000)(quantilesState(0.550000)("bytes_gauge")) OVER () AS
4065+
"metric__sample__bytes_gauge_percentiles_col_10",
4066+
quantilesMerge(0.600000)(quantilesState(0.600000)("bytes_gauge")) OVER () AS
4067+
"metric__sample__bytes_gauge_percentiles_col_11",
4068+
quantilesMerge(0.650000)(quantilesState(0.650000)("bytes_gauge")) OVER () AS
4069+
"metric__sample__bytes_gauge_percentiles_col_12",
4070+
quantilesMerge(0.700000)(quantilesState(0.700000)("bytes_gauge")) OVER () AS
4071+
"metric__sample__bytes_gauge_percentiles_col_13",
4072+
quantilesMerge(0.750000)(quantilesState(0.750000)("bytes_gauge")) OVER () AS
4073+
"metric__sample__bytes_gauge_percentiles_col_14",
4074+
quantilesMerge(0.800000)(quantilesState(0.800000)("bytes_gauge")) OVER () AS
4075+
"metric__sample__bytes_gauge_percentiles_col_15",
4076+
quantilesMerge(0.850000)(quantilesState(0.850000)("bytes_gauge")) OVER () AS
4077+
"metric__sample__bytes_gauge_percentiles_col_16",
4078+
quantilesMerge(0.900000)(quantilesState(0.900000)("bytes_gauge")) OVER () AS
4079+
"metric__sample__bytes_gauge_percentiles_col_17",
4080+
quantilesMerge(0.950000)(quantilesState(0.950000)("bytes_gauge")) OVER () AS
4081+
"metric__sample__bytes_gauge_percentiles_col_18",
4082+
quantilesMerge(0.999999)(quantilesState(0.999999)("bytes_gauge")) OVER () AS
4083+
"metric__sample__bytes_gauge_percentiles_col_19",
4084+
quantilesMerge(0.050000)(quantilesState(0.050000)("bytes_gauge")) OVER () AS
4085+
"metric__sample__bytes_gauge_percentiles_keyed_true_col_0",
4086+
quantilesMerge(0.100000)(quantilesState(0.100000)("bytes_gauge")) OVER () AS
4087+
"metric__sample__bytes_gauge_percentiles_keyed_true_col_1",
4088+
quantilesMerge(0.150000)(quantilesState(0.150000)("bytes_gauge")) OVER () AS
4089+
"metric__sample__bytes_gauge_percentiles_keyed_true_col_2",
4090+
quantilesMerge(0.200000)(quantilesState(0.200000)("bytes_gauge")) OVER () AS
4091+
"metric__sample__bytes_gauge_percentiles_keyed_true_col_3",
4092+
quantilesMerge(0.250000)(quantilesState(0.250000)("bytes_gauge")) OVER () AS
4093+
"metric__sample__bytes_gauge_percentiles_keyed_true_col_4",
4094+
quantilesMerge(0.300000)(quantilesState(0.300000)("bytes_gauge")) OVER () AS
4095+
"metric__sample__bytes_gauge_percentiles_keyed_true_col_5",
4096+
quantilesMerge(0.350000)(quantilesState(0.350000)("bytes_gauge")) OVER () AS
4097+
"metric__sample__bytes_gauge_percentiles_keyed_true_col_6",
4098+
quantilesMerge(0.400000)(quantilesState(0.400000)("bytes_gauge")) OVER () AS
4099+
"metric__sample__bytes_gauge_percentiles_keyed_true_col_7",
4100+
quantilesMerge(0.450000)(quantilesState(0.450000)("bytes_gauge")) OVER () AS
4101+
"metric__sample__bytes_gauge_percentiles_keyed_true_col_8",
4102+
quantilesMerge(0.500000)(quantilesState(0.500000)("bytes_gauge")) OVER () AS
4103+
"metric__sample__bytes_gauge_percentiles_keyed_true_col_9",
4104+
quantilesMerge(0.550000)(quantilesState(0.550000)("bytes_gauge")) OVER () AS
4105+
"metric__sample__bytes_gauge_percentiles_keyed_true_col_10",
4106+
quantilesMerge(0.600000)(quantilesState(0.600000)("bytes_gauge")) OVER () AS
4107+
"metric__sample__bytes_gauge_percentiles_keyed_true_col_11",
4108+
quantilesMerge(0.650000)(quantilesState(0.650000)("bytes_gauge")) OVER () AS
4109+
"metric__sample__bytes_gauge_percentiles_keyed_true_col_12",
4110+
quantilesMerge(0.700000)(quantilesState(0.700000)("bytes_gauge")) OVER () AS
4111+
"metric__sample__bytes_gauge_percentiles_keyed_true_col_13",
4112+
quantilesMerge(0.750000)(quantilesState(0.750000)("bytes_gauge")) OVER () AS
4113+
"metric__sample__bytes_gauge_percentiles_keyed_true_col_14",
4114+
quantilesMerge(0.800000)(quantilesState(0.800000)("bytes_gauge")) OVER () AS
4115+
"metric__sample__bytes_gauge_percentiles_keyed_true_col_15",
4116+
quantilesMerge(0.850000)(quantilesState(0.850000)("bytes_gauge")) OVER () AS
4117+
"metric__sample__bytes_gauge_percentiles_keyed_true_col_16",
4118+
quantilesMerge(0.900000)(quantilesState(0.900000)("bytes_gauge")) OVER () AS
4119+
"metric__sample__bytes_gauge_percentiles_keyed_true_col_17",
4120+
quantilesMerge(0.950000)(quantilesState(0.950000)("bytes_gauge")) OVER () AS
4121+
"metric__sample__bytes_gauge_percentiles_keyed_true_col_18",
4122+
quantilesMerge(0.999999)(quantilesState(0.999999)("bytes_gauge")) OVER () AS
4123+
"metric__sample__bytes_gauge_percentiles_keyed_true_col_19",
40434124
sum(count(*)) OVER () AS "aggr__sample__bytes_gauge_top__parent_count",
40444125
"bytes_gauge" AS "aggr__sample__bytes_gauge_top__key_0",
40454126
count(*) AS "aggr__sample__bytes_gauge_top__count"
40464127
FROM (
40474128
SELECT "bytes_gauge"
40484129
FROM __quesma_table_name
4049-
WHERE ("timestamp">=fromUnixTimestamp64Milli(1709932426749) AND "timestamp"<=fromUnixTimestamp64Milli(1711228426749))
4130+
WHERE ("timestamp">=fromUnixTimestamp64Milli(1709932426749) AND "timestamp"<=
4131+
fromUnixTimestamp64Milli(1711228426749))
40504132
LIMIT 20000)
40514133
GROUP BY "bytes_gauge" AS "aggr__sample__bytes_gauge_top__key_0"
40524134
ORDER BY "aggr__sample__bytes_gauge_top__count" DESC,
4053-
"aggr__sample__bytes_gauge_top__key_0" ASC
4054-
LIMIT 11`,
4135+
"aggr__sample__bytes_gauge_top__key_0" ASC
4136+
LIMIT 11`,
40554137
},
40564138
},
40574139
{ // [21]
@@ -5233,29 +5315,29 @@ var AggregationTests = []AggregationTestCase{
52335315
model.NewQueryResultCol("aggr__0__parent_count", uint64(14)),
52345316
model.NewQueryResultCol("aggr__0__key_0", "Albuquerque"),
52355317
model.NewQueryResultCol("aggr__0__count", uint64(4)),
5236-
model.NewQueryResultCol("aggr__0__1-bucket__count", uint64(1)),
5318+
model.NewQueryResultCol("metric__0__1-bucket_col_0", uint64(1)),
52375319
model.NewQueryResultCol("metric__0__3-bucket_col_0", uint64(2)),
52385320
}},
52395321
{Cols: []model.QueryResultCol{
52405322
model.NewQueryResultCol("aggr__0__parent_count", uint64(14)),
52415323
model.NewQueryResultCol("aggr__0__key_0", "Atlanta"),
52425324
model.NewQueryResultCol("aggr__0__count", uint64(5)),
5243-
model.NewQueryResultCol("aggr__0__1-bucket__count", uint64(0)),
5325+
model.NewQueryResultCol("metric__0__1-bucket_col_0", uint64(0)),
52445326
model.NewQueryResultCol("metric__0__3-bucket_col_0", uint64(0)),
52455327
}},
52465328
{Cols: []model.QueryResultCol{
52475329
model.NewQueryResultCol("aggr__0__parent_count", uint64(14)),
52485330
model.NewQueryResultCol("aggr__0__key_0", "Baltimore"),
52495331
model.NewQueryResultCol("aggr__0__count", uint64(5)),
5250-
model.NewQueryResultCol("aggr__0__1-bucket__count", uint64(2)),
5332+
model.NewQueryResultCol("metric__0__1-bucket_col_0", uint64(2)),
52515333
model.NewQueryResultCol("metric__0__3-bucket_col_0", uint64(0)),
52525334
}},
52535335
},
52545336
ExpectedPancakeSQL: `
52555337
SELECT sum(count(*)) OVER () AS "aggr__0__parent_count",
52565338
"OriginCityName" AS "aggr__0__key_0", count(*) AS "aggr__0__count",
5257-
countIf("Cancelled"==true) AS "metric__0__3-bucket_col_0",
5258-
countIf("FlightDelay"==true) AS "aggr__0__1-bucket__count"
5339+
countIf("FlightDelay"==true) AS "metric__0__1-bucket_col_0",
5340+
countIf("Cancelled"==true) AS "metric__0__3-bucket_col_0"
52595341
FROM ` + TableName + `
52605342
GROUP BY "OriginCityName" AS "aggr__0__key_0"
52615343
ORDER BY "aggr__0__key_0" ASC

platform/testdata/clients/clover.go

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -340,14 +340,15 @@ var CloverTests = []testdata.AggregationTestCase{
340340
ExpectedPancakeResults: []model.QueryResultRow{
341341
{Cols: []model.QueryResultCol{
342342
model.NewQueryResultCol("aggr__timeseries__count", int64(202)),
343+
model.NewQueryResultCol("metric__timeseries__a2-denominator_col_0", int64(202)),
343344
model.NewQueryResultCol("metric__timeseries__a2-numerator_col_0", int64(202)),
344-
model.NewQueryResultCol("aggr__timeseries__a2-denominator__count", int64(202)),
345345
}},
346346
},
347347
ExpectedPancakeSQL: `
348348
SELECT count(*) AS "aggr__timeseries__count",
349-
countIf(NOT ("table.flower" ILIKE '%clover%')) AS "metric__timeseries__a2-numerator_col_0",
350-
countIf(true) AS "aggr__timeseries__a2-denominator__count"
349+
countIf(true) AS "metric__timeseries__a2-denominator_col_0",
350+
countIf(NOT ("table.flower" ILIKE '%clover%')) AS
351+
"metric__timeseries__a2-numerator_col_0"
351352
FROM __quesma_table_name
352353
WHERE ("@timestamp">=fromUnixTimestamp64Milli(1728640683723) AND "@timestamp"<=
353354
fromUnixTimestamp64Milli(1728641583723))`,
@@ -637,15 +638,15 @@ var CloverTests = []testdata.AggregationTestCase{
637638
ExpectedPancakeResults: []model.QueryResultRow{
638639
{Cols: []model.QueryResultCol{
639640
model.NewQueryResultCol("aggr__timeseries__count", int64(1000)),
641+
model.NewQueryResultCol("metric__timeseries__f2-denominator_col_0", int64(1000)),
640642
model.NewQueryResultCol("metric__timeseries__f2-numerator_col_0", int64(178)),
641-
model.NewQueryResultCol("aggr__timeseries__f2-denominator__count", int64(1000)),
642643
}},
643644
},
644645
ExpectedPancakeSQL: `
645646
SELECT count(*) AS "aggr__timeseries__count",
647+
countIf(true) AS "metric__timeseries__f2-denominator_col_0",
646648
countIf(NOT ("a.b_str" IS NOT NULL)) AS
647-
"metric__timeseries__f2-numerator_col_0",
648-
countIf(true) AS "aggr__timeseries__f2-denominator__count"
649+
"metric__timeseries__f2-numerator_col_0"
649650
FROM __quesma_table_name
650651
WHERE ("@timestamp">=fromUnixTimestamp64Milli(1721399904783) AND "@timestamp"<=
651652
fromUnixTimestamp64Milli(1730475504783))`,

0 commit comments

Comments
 (0)