Skip to content
This repository was archived by the owner on Nov 7, 2025. It is now read-only.

Commit e3ca75a

Browse files
trzysiekjakozaur
andauthored
Order by: some_other_aggregation enhancements (#788)
A couple of small enhancements here: * During Kibana QA, I found out that it really happens a lot that in `order by: some_other_aggregation`, this aggregation is not a direct child, but some further descender in the aggregation tree, so added support for it (before `order by: "agg1"` worked, now `order by: "agg1>agg2>agg3"` also works (`agg2` is a child of `agg1`, `agg3` of `agg2`, etc.) * Before we had support for a) `order by: "2"` (where `2` is a metric aggr with single value like e.g. `avg`) b) `order by: "2.10"` (where `2` is a `percentile[s|_ranks]` aggr, and `10` is a percentile) Here added also support for `2.count`, `2.std_deviation`, and other stats from `stats` or `extended_stats` aggregations. * Before we could only order by 1 expression, 2 or more weren't supported. Fixed that. Sorry for quite a big PR, but it turned out to be like that out of necessity. E.g. I implemented the last point only because without it, proper tests for previous points would need to be much larger 😆 --------- Co-authored-by: Jacek Migdal <[email protected]>
1 parent e2fe800 commit e3ca75a

13 files changed

+1070
-361
lines changed

quesma/model/metrics_aggregations/extended_stats.go

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,3 +112,26 @@ func (query ExtendedStats) getValue(row model.QueryResultRow, functionName strin
112112
}
113113
return row.Cols[column].Value
114114
}
115+
116+
func (query ExtendedStats) ColumnIdx(name string) int {
117+
nameToColumnIdx := map[string]int{
118+
"count": 0,
119+
"min": 1,
120+
"max": 2,
121+
"avg": 3,
122+
"sum": 4,
123+
"sum_of_squares": 5,
124+
"variance": 6,
125+
"variance_population": 6,
126+
"variance_sampling": 7,
127+
"std_deviation": 8,
128+
"std_deviation_population": 8,
129+
"std_deviation_sampling": 9,
130+
}
131+
132+
if columnIdx, ok := nameToColumnIdx[name]; ok {
133+
return columnIdx
134+
}
135+
logger.ErrorWithCtx(query.ctx).Msgf("extended_stats column %s not found", name)
136+
return -1
137+
}
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
// Copyright Quesma, licensed under the Elastic License 2.0.
2+
// SPDX-License-Identifier: Elastic-2.0
3+
package metrics_aggregations
4+
5+
// MultipleMetricColumnsInterface is an interface for metrics aggregations
6+
// that have multiple columns in the response.
7+
// It allows to get the index of the column by its name, e.g.
8+
// "count", or "standard_deviation" for extended_stats, or "50" for quantile.
9+
type MultipleMetricColumnsInterface interface {
10+
ColumnIdx(name string) int
11+
}

quesma/model/metrics_aggregations/quantile.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,3 +164,14 @@ func (query Quantile) createPercentileNameToReturn(percentileName string) string
164164
}
165165
return percentileName
166166
}
167+
168+
func (query Quantile) ColumnIdx(name string) int {
169+
for i, percentileName := range query.percentileNames {
170+
if percentileName == name {
171+
return i
172+
}
173+
}
174+
175+
logger.ErrorWithCtx(query.ctx).Msgf("quantile column %s not found", name)
176+
return -1
177+
}

quesma/model/metrics_aggregations/stats.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,3 +43,14 @@ func (query Stats) TranslateSqlResponseToJson(rows []model.QueryResultRow) model
4343
func (query Stats) String() string {
4444
return "stats"
4545
}
46+
47+
func (query Stats) ColumnIdx(name string) int {
48+
for i, column := range statsColumnsInOrder {
49+
if column == name {
50+
return i
51+
}
52+
}
53+
54+
logger.ErrorWithCtx(query.ctx).Msgf("stats column %s not found", name)
55+
return -1
56+
}

quesma/queryparser/pancake_aggregation_parser.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ func (cw *ClickhouseQueryTranslator) PancakeParseAggregationJson(body types.JSON
7979
// Phase 3: Generate SQL queries from pancake model
8080
aggregationQueries := make([]*model.Query, 0)
8181
for _, pancakeQuery := range pancakeQueries {
82-
generator := &pancakeSqlQueryGenerator{}
82+
generator := newPancakeSqlQueryGeneratorr(cw.Ctx)
8383
dbQuery, err := generator.generateQuery(pancakeQuery)
8484
if err != nil {
8585
return nil, err

quesma/queryparser/pancake_aggregation_parser_buckets.go

Lines changed: 42 additions & 115 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ import (
1010
"quesma/logger"
1111
"quesma/model"
1212
"quesma/model/bucket_aggregations"
13-
"quesma/util"
1413
"sort"
1514
"strconv"
1615
"strings"
@@ -320,78 +319,6 @@ func (cw *ClickhouseQueryTranslator) pancakeTryBucketAggregation(aggregation *pa
320319
return
321320
}
322321

323-
func (cw *ClickhouseQueryTranslator) pancakeFindMetricAggregation(queryMap QueryMap, aggregationName string) model.Expr {
324-
notFoundValue := model.NewLiteral("")
325-
326-
aggsRaw, exists := queryMap["aggs"]
327-
if !exists {
328-
logger.WarnWithCtx(cw.Ctx).Msgf("no aggs in queryMap, queryMap: %+v", queryMap)
329-
return notFoundValue
330-
}
331-
aggs, ok := aggsRaw.(QueryMap)
332-
if !ok {
333-
logger.WarnWithCtx(cw.Ctx).Msgf("aggs is not a map, but %T, value: %v. Skipping", aggsRaw, aggsRaw)
334-
return notFoundValue
335-
}
336-
337-
var percentileNameWeLookFor string
338-
weTrySplitByDot := false
339-
340-
// We try 2 things here:
341-
// First (always): maybe there exists an aggregation with exactly this name
342-
// Second (if aggregation_name == X.Y): maybe it's aggregationName.some_value, e.g. "2.75", when "2" aggregation is a percentile, and 75 is its value
343-
aggregationNamesToTry := []string{aggregationName}
344-
splitByDot := strings.Split(aggregationName, ".")
345-
if len(splitByDot) == 2 {
346-
weTrySplitByDot = true
347-
percentileNameWeLookFor = splitByDot[1]
348-
aggregationNamesToTry = append(aggregationNamesToTry, splitByDot[0])
349-
}
350-
351-
for _, aggNameToTry := range aggregationNamesToTry {
352-
currentAggMapRaw, exists := aggs[aggNameToTry]
353-
if !exists {
354-
continue
355-
}
356-
357-
currentAggMap, ok := currentAggMapRaw.(QueryMap)
358-
if !ok {
359-
logger.WarnWithCtx(cw.Ctx).Msgf("aggregation %s is not a map, but %T, value: %v. Skipping",
360-
aggregationName, currentAggMapRaw, currentAggMapRaw)
361-
continue
362-
}
363-
364-
agg, success := cw.tryMetricsAggregation(currentAggMap)
365-
if !success {
366-
logger.WarnWithCtx(cw.Ctx).Msgf("failed to parse metric aggregation: %v", agg)
367-
continue
368-
}
369-
370-
// we build a temporary query only to extract the name of the metric
371-
columns, err := generateMetricSelectedColumns(cw.Ctx, agg)
372-
if err != nil {
373-
continue
374-
}
375-
376-
if aggNameToTry == aggregationName {
377-
if len(columns) != 1 {
378-
continue
379-
}
380-
return columns[0]
381-
} else if weTrySplitByDot {
382-
userPercents := util.MapKeysSortedByValue(agg.Percentiles)
383-
for i, percentileName := range userPercents {
384-
if percentileName == percentileNameWeLookFor {
385-
return columns[i]
386-
}
387-
}
388-
}
389-
}
390-
391-
logger.ErrorWithCtx(cw.Ctx).Msgf("no given metric aggregation found (name: %v, queryMap: %+v)", aggregationName, queryMap)
392-
return notFoundValue
393-
}
394-
395322
// samplerRaw - in a proper request should be of QueryMap type.
396323
func (cw *ClickhouseQueryTranslator) parseSampler(samplerRaw any) bucket_aggregations.Sampler {
397324
const defaultSize = 100
@@ -420,60 +347,60 @@ func (cw *ClickhouseQueryTranslator) parseRandomSampler(randomSamplerRaw any) bu
420347
}
421348

422349
func (cw *ClickhouseQueryTranslator) parseOrder(terms, queryMap QueryMap, fieldExpressions []model.Expr) []model.OrderByExpr {
423-
defaultMainOrderBy := model.NewCountFunc()
424350
defaultDirection := model.DescOrder
351+
defaultOrderBy := model.NewOrderByExpr(model.NewCountFunc(), defaultDirection)
425352

426-
fieldOrderBys := make([]model.OrderByExpr, 0, len(fieldExpressions))
427-
for _, fieldExpression := range fieldExpressions {
428-
fieldOrderBys = append(fieldOrderBys, model.OrderByExpr{Expr: fieldExpression})
429-
}
430-
431-
var mainOrderBy model.Expr = defaultMainOrderBy
432-
fullOrderBy := []model.OrderByExpr{ // default
433-
{Expr: mainOrderBy, Direction: defaultDirection},
434-
}
435-
fullOrderBy = append(fullOrderBy, fieldOrderBys...)
436-
direction := defaultDirection
437-
438-
orderRaw, exists := terms["order"]
353+
ordersRaw, exists := terms["order"]
439354
if !exists {
440-
return fullOrderBy
355+
return []model.OrderByExpr{defaultOrderBy}
441356
}
442357

443-
order, ok := orderRaw.(QueryMap) // TODO it can be array too, don't handle it yet
444-
if !ok {
445-
logger.WarnWithCtx(cw.Ctx).Msgf("order is not a map, but %T, value: %v. Using default order", orderRaw, orderRaw)
446-
return fullOrderBy
447-
}
448-
if len(order) != 1 {
449-
logger.WarnWithCtx(cw.Ctx).Msgf("order should have 1 key, but has %d. Order: %+v. Using default", len(order), order)
450-
return fullOrderBy
358+
// order can be either a single order {}, or a list of such single orders [{}(,{}...)]
359+
orders := make([]QueryMap, 0)
360+
switch ordersTyped := ordersRaw.(type) {
361+
case QueryMap:
362+
orders = append(orders, ordersTyped)
363+
case []any:
364+
for _, order := range ordersTyped {
365+
if orderTyped, ok := order.(QueryMap); ok {
366+
orders = append(orders, orderTyped)
367+
} else {
368+
logger.WarnWithCtx(cw.Ctx).Msgf("invalid order: %v", order)
369+
}
370+
}
371+
default:
372+
logger.WarnWithCtx(cw.Ctx).Msgf("order is not a map/list of maps, but %T, value: %v. Using default order", ordersRaw, ordersRaw)
373+
return []model.OrderByExpr{defaultOrderBy}
451374
}
452375

453-
for key, valueRaw := range order { // value == "asc" or "desc"
454-
value, ok := valueRaw.(string)
455-
if !ok {
456-
logger.WarnWithCtx(cw.Ctx).Msgf("order value is not a string, but %T, value: %v. Using default (desc)", valueRaw, valueRaw)
457-
value = "desc"
458-
}
459-
if strings.ToLower(value) == "asc" {
460-
direction = model.AscOrder
376+
fullOrderBy := make([]model.OrderByExpr, 0)
377+
378+
for _, order := range orders {
379+
if len(order) != 1 {
380+
logger.WarnWithCtx(cw.Ctx).Msgf("invalid order length, should be 1: %v", order)
461381
}
382+
for key, valueRaw := range order { // value == "asc" or "desc"
383+
value, ok := valueRaw.(string)
384+
if !ok {
385+
logger.WarnWithCtx(cw.Ctx).Msgf("order value is not a string, but %T, value: %v. Using default (desc)", valueRaw, valueRaw)
386+
value = "desc"
387+
}
462388

463-
if key == "_key" {
464-
fullOrderBy = fieldOrderBys
465-
for i := range fullOrderBy {
466-
fullOrderBy[i].Direction = direction
389+
direction := defaultDirection
390+
if strings.ToLower(value) == "asc" {
391+
direction = model.AscOrder
467392
}
468-
break // mainOrderBy remains default
469-
} else if key != "_count" {
470-
mainOrderBy = cw.pancakeFindMetricAggregation(queryMap, key)
471-
}
472393

473-
fullOrderBy = []model.OrderByExpr{
474-
{Expr: mainOrderBy, Direction: direction},
394+
if key == "_key" {
395+
for _, fieldExpression := range fieldExpressions {
396+
fullOrderBy = append(fullOrderBy, model.OrderByExpr{Expr: fieldExpression, Direction: direction})
397+
}
398+
} else if key == "_count" {
399+
fullOrderBy = append(fullOrderBy, model.NewOrderByExpr(model.NewCountFunc(), direction))
400+
} else {
401+
fullOrderBy = append(fullOrderBy, model.OrderByExpr{Expr: model.NewLiteral(key), Direction: direction})
402+
}
475403
}
476-
fullOrderBy = append(fullOrderBy, fieldOrderBys...)
477404
}
478405

479406
return fullOrderBy
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
// Copyright Quesma, licensed under the Elastic License 2.0.
2+
// SPDX-License-Identifier: Elastic-2.0
3+
package queryparser
4+
5+
import (
6+
"context"
7+
"quesma/logger"
8+
"quesma/model"
9+
"quesma/model/metrics_aggregations"
10+
"strings"
11+
)
12+
13+
type pancakeOrderByTransformer struct {
14+
ctx context.Context
15+
}
16+
17+
func newPancakeOrderByTransformer(ctx context.Context) *pancakeOrderByTransformer {
18+
return &pancakeOrderByTransformer{ctx: ctx}
19+
}
20+
21+
// transformSingleOrderBy transforms a single order by expression, of query `query` and bucket aggregation `bucketAggrInternalName`.
22+
// What it does, it finds metric aggregation that corresponds to the order by expression, and returns a new aliased expression
23+
//
24+
// TODO: maybe the same logic needs to be applied to pipeline aggregations, needs checking.
25+
func (t *pancakeOrderByTransformer) transformSingleOrderBy(orderBy model.Expr, bucketAggregation *pancakeModelBucketAggregation, query *pancakeModel) *model.AliasedExpr {
26+
fullPathToOrderByExprRaw, isPath := orderBy.(model.LiteralExpr)
27+
if !isPath {
28+
return nil
29+
}
30+
31+
fullPathToOrderByExpr, ok := fullPathToOrderByExprRaw.Value.(string)
32+
if !ok {
33+
logger.ErrorWithCtx(t.ctx).Msgf("path to metric is not a string, but %T (val: %v)",
34+
fullPathToOrderByExprRaw.Value, fullPathToOrderByExprRaw.Value)
35+
return nil
36+
}
37+
38+
// fullPathToOrderByExpr is in the form of "[aggr1][>aggr2...]>metric_aggr[.submetric]" ([] means optional)
39+
// submetric: e.g. "percentiles.50", or "stats.sum", "extended_stats.std_deviation"
40+
// Most metric aggregations don't have submetrics
41+
var fullPathWithoutSubmetric, submetricName string
42+
splitByDot := strings.Split(fullPathToOrderByExpr, ".")
43+
switch len(splitByDot) {
44+
case 1:
45+
fullPathWithoutSubmetric = splitByDot[0]
46+
case 2:
47+
fullPathWithoutSubmetric, submetricName = splitByDot[0], splitByDot[1]
48+
default:
49+
logger.ErrorWithCtx(t.ctx).Msgf("path to metric is not valid: %s", fullPathToOrderByExpr)
50+
return nil
51+
}
52+
53+
foundLayerIdx := -1
54+
for layerIdx, layer := range query.layers {
55+
if layer.nextBucketAggregation == bucketAggregation {
56+
foundLayerIdx = layerIdx
57+
break
58+
}
59+
}
60+
if foundLayerIdx == -1 {
61+
logger.ErrorWithCtx(t.ctx).Msgf("bucket aggregation not found in query")
62+
return nil
63+
}
64+
foundLayerIdx += 1
65+
fullPath := strings.Split(fullPathWithoutSubmetric, ">")
66+
path := fullPath
67+
68+
for len(path) > 1 {
69+
if foundLayerIdx >= len(query.layers) {
70+
logger.ErrorWithCtx(t.ctx).Msgf("out of layers in path: %s", fullPathToOrderByExpr)
71+
return nil
72+
}
73+
if query.layers[foundLayerIdx].nextBucketAggregation == nil {
74+
logger.ErrorWithCtx(t.ctx).Msgf("no bucket aggregation in path: %s", fullPathToOrderByExpr)
75+
return nil
76+
}
77+
if query.layers[foundLayerIdx].nextBucketAggregation.name != path[0] {
78+
logger.ErrorWithCtx(t.ctx).Msgf("bucket aggregation mismatch in path: %s, expected: %s, was: %s",
79+
fullPathToOrderByExpr, path[0], query.layers[foundLayerIdx].nextBucketAggregation.name)
80+
return nil
81+
}
82+
foundLayerIdx += 1
83+
path = path[1:]
84+
}
85+
86+
if foundLayerIdx >= len(query.layers) {
87+
logger.ErrorWithCtx(t.ctx).Msgf("out of layers in path: %s", fullPathToOrderByExpr)
88+
return nil
89+
}
90+
91+
for _, metric := range query.layers[foundLayerIdx].currentMetricAggregations {
92+
columnIdx := 0 // when no multiple columns, it must be 0
93+
if multipleColumnsMetric, ok := metric.queryType.(metrics_aggregations.MultipleMetricColumnsInterface); ok {
94+
columnIdx = multipleColumnsMetric.ColumnIdx(submetricName)
95+
}
96+
97+
if metric.name == path[0] {
98+
result := model.NewAliasedExpr(orderBy, metric.InternalNameForCol(columnIdx))
99+
return &result
100+
}
101+
}
102+
103+
logger.ErrorWithCtx(t.ctx).Msgf("no metric found for path: %s", fullPathToOrderByExpr)
104+
return nil
105+
}

0 commit comments

Comments
 (0)