Skip to content
This repository was archived by the owner on Nov 7, 2025. It is now read-only.

Commit 6c80a67

Browse files
trzysiektrzysiek
andauthored
[ecommerce] Add a new simple metric aggr: geo_bounds (#1360)
Co-authored-by: trzysiek <[email protected]>
1 parent 3d079a1 commit 6c80a67

File tree

7 files changed

+190
-16
lines changed

7 files changed

+190
-16
lines changed

docs/public/docs/limitations.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ Currently supported:
3838
including: `boolean`, `match`, `match phrase`, `multi-match`, `query string`, `nested`, `match all`, `exists`, `prefix`, `range`, `term`, `terms`, `wildcard`
3939
- most popular [Aggregations](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations.html),
4040
including: `avg`, `cardinality`, `max`, `min`, `percentile ranks`, `percentiles`, `stats`, `sum`, `top hits`, `top metrics`, `value count`,
41-
`date histogram`, `date range`, `filter`, `filters`, `histogram`, `range`, `singificant terms`, `terms`, `ip prefix`, `ip range`, `geohash_grid`
41+
`date histogram`, `date range`, `filter`, `filters`, `histogram`, `range`, `singificant terms`, `terms`, `ip prefix`, `ip range`, `geo_bounds`, `geohash_grid`
4242

4343
Which as a result allows you to run Kibana/OSD queries and dashboards on data residing in ClickHouse/Hydrolix.
4444

platform/model/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ More info: https://www.elastic.co/guide/en/elasticsearch/reference/current/searc
1818
Boxplot | :x: | Composite | :white_check_mark: | Bucket selector | :x: |
1919
Cardinality | :white_check_mark: | Date histogram | :white_check_mark: | Bucket sort | :x: |
2020
Extended stats | :white_check_mark:[^1] | Date range | :white_check_mark: | Change point | :x: |
21-
Geo-bounds | :x: | Diversified sampler | :x: | Cumulative cardinality | :x: |
21+
Geo-bounds | :white_check_mark: | Diversified sampler | :x: | Cumulative cardinality | :x: |
2222
Geo-centroid | :x: | Filter | :white_check_mark: | Cumulative sum | :white_check_mark: |
2323
Geo-line | :x: | Filters | :white_check_mark: | Derivative | :white_check_mark: |
2424
Cartesian-bounds | :x: | Frequent item sets | :x: | Extended stats bucket | :x: |
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
// Copyright Quesma, licensed under the Elastic License 2.0.
2+
// SPDX-License-Identifier: Elastic-2.0
3+
package metrics_aggregations
4+
5+
import (
6+
"context"
7+
"github.com/QuesmaOrg/quesma/platform/logger"
8+
"github.com/QuesmaOrg/quesma/platform/model"
9+
)
10+
11+
type GeoBounds struct {
12+
ctx context.Context
13+
}
14+
15+
func NewGeoBounds(ctx context.Context) GeoBounds {
16+
return GeoBounds{ctx: ctx}
17+
}
18+
19+
func (query GeoBounds) AggregationType() model.AggregationType {
20+
return model.MetricsAggregation
21+
}
22+
23+
func (query GeoBounds) TranslateSqlResponseToJson(rows []model.QueryResultRow) model.JsonMap {
24+
if len(rows) == 0 {
25+
logger.ErrorWithCtx(query.ctx).Msg("GeoBounds: expected at least one row in the result")
26+
return model.JsonMap{}
27+
}
28+
if len(rows[0].Cols) < 4 {
29+
logger.ErrorWithCtx(query.ctx).Msgf("GeoBounds: expected at least 4 columns in the result, got: %v", rows[0].Cols)
30+
return model.JsonMap{}
31+
}
32+
33+
return model.JsonMap{
34+
"bounds": model.JsonMap{
35+
"top_left": model.JsonMap{
36+
"lon": rows[0].Cols[0].Value,
37+
"lat": rows[0].Cols[1].Value,
38+
},
39+
"bottom_right": model.JsonMap{
40+
"lat": rows[0].Cols[2].Value,
41+
"lon": rows[0].Cols[3].Value,
42+
},
43+
},
44+
}
45+
}
46+
47+
func (query GeoBounds) String() string {
48+
return "geo_bounds"
49+
}

platform/parsers/elastic_query_dsl/aggregation_parser.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,13 @@ func (cw *ClickhouseQueryTranslator) tryMetricsAggregation(queryMap QueryMap) (m
158158
}, true
159159
}
160160

161+
if geoBounds, exists := queryMap["geo_bounds"]; exists {
162+
return metricsAggregation{
163+
AggrType: "geo_bounds",
164+
Fields: []model.Expr{cw.parseFieldField(geoBounds, "geo_bounds")},
165+
}, true
166+
}
167+
161168
return metricsAggregation{}, false
162169
}
163170

platform/parsers/elastic_query_dsl/pancake_aggregation_parser_metrics.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,21 @@ func generateMetricSelectedColumns(ctx context.Context, metricsAggr metricsAggre
116116
result = append(result, model.NewFunction("avgOrNull", lonColumn))
117117
result = append(result, model.NewCountFunc())
118118
}
119+
case "geo_bounds":
120+
firstExpr := getFirstExpression()
121+
result = make([]model.Expr, 0, 3)
122+
if col, ok := firstExpr.(model.ColumnRef); ok {
123+
// TODO this is internalPropertyName and should be taken from schema
124+
// instead of using util.FieldToColumnEncoder and doing encoding in-place
125+
colName := util.FieldToColumnEncoder(col.ColumnName)
126+
// TODO we have create columns according to the schema
127+
latColumn := model.NewGeoLat(colName)
128+
lonColumn := model.NewGeoLon(colName)
129+
result = append(result, model.NewFunction("minOrNull", lonColumn))
130+
result = append(result, model.NewFunction("argMinOrNull", latColumn, lonColumn))
131+
result = append(result, model.NewFunction("minOrNull", latColumn))
132+
result = append(result, model.NewFunction("argMinOrNull", lonColumn, latColumn))
133+
}
119134
default:
120135
logger.WarnWithCtx(ctx).Msgf("unknown metrics aggregation: %s", metricsAggr.AggrType)
121136
return nil, fmt.Errorf("unknown metrics aggregation %s", metricsAggr.AggrType)
@@ -151,6 +166,9 @@ func (cw *ClickhouseQueryTranslator) generateMetricsType(metricsAggr metricsAggr
151166
return metrics_aggregations.NewPercentileRanks(cw.Ctx, metricsAggr.CutValues, metricsAggr.Keyed)
152167
case "geo_centroid":
153168
return metrics_aggregations.NewGeoCentroid(cw.Ctx)
169+
case "geo_bounds":
170+
return metrics_aggregations.NewGeoBounds(cw.Ctx)
154171
}
172+
155173
return nil
156174
}

platform/testdata/kibana_sample_data_ecommerce.go

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2819,4 +2819,118 @@ var KibanaSampleDataEcommerce = []AggregationTestCase{
28192819
ORDER BY "aggr__join__count" DESC, "aggr__join__key_0" ASC
28202820
LIMIT 65536`,
28212821
},
2822+
{ // [15]
2823+
TestName: "weird",
2824+
QueryRequestJson: `
2825+
{
2826+
"_source": {
2827+
"excludes": []
2828+
},
2829+
"aggs": {
2830+
"fitToBounds": {
2831+
"geo_bounds": {
2832+
"field": "OriginLocation"
2833+
}
2834+
}
2835+
},
2836+
"fields": [
2837+
{
2838+
"field": "@timestamp",
2839+
"format": "date_time"
2840+
},
2841+
{
2842+
"field": "timestamp",
2843+
"format": "date_time"
2844+
}
2845+
],
2846+
"query": {
2847+
"bool": {
2848+
"filter": [
2849+
{
2850+
"range": {
2851+
"timestamp": {
2852+
"format": "strict_date_optional_time",
2853+
"gte": "2025-03-02T14:16:32.069Z",
2854+
"lte": "2025-03-09T14:16:32.069Z"
2855+
}
2856+
}
2857+
}
2858+
],
2859+
"must": [],
2860+
"must_not": [],
2861+
"should": []
2862+
}
2863+
},
2864+
"runtime_mappings": {
2865+
"hour_of_day": {
2866+
"script": {
2867+
"source": "emit(doc['timestamp'].value.getHour());"
2868+
},
2869+
"type": "long"
2870+
}
2871+
},
2872+
"script_fields": {},
2873+
"size": 0,
2874+
"stored_fields": [
2875+
"*"
2876+
],
2877+
"track_total_hits": false
2878+
}`,
2879+
ExpectedResponse: `
2880+
{
2881+
"completion_time_in_millis": 1740838900680,
2882+
"expiration_time_in_millis": 1740838960672,
2883+
"id": "FnBCYVZTQWtUVEgtVGNiUzFabnFqbVEdUEQ3d19oVkxSMEthNU02NjIwRGpkZzo3MTY5NTM=",
2884+
"is_partial": false,
2885+
"is_running": false,
2886+
"response": {
2887+
"_shards": {
2888+
"failed": 0,
2889+
"skipped": 0,
2890+
"successful": 1,
2891+
"total": 1
2892+
},
2893+
"aggregations": {
2894+
"fitToBounds": {
2895+
"bounds": {
2896+
"top_left": {
2897+
"lat": 68.15180202014744,
2898+
"lon": -122.59799961000681
2899+
},
2900+
"bottom_right": {
2901+
"lat": -37.67330203671008,
2902+
"lon": 153.11700434423983
2903+
}
2904+
}
2905+
}
2906+
},
2907+
"hits": {
2908+
"hits": [],
2909+
"max_score": null
2910+
},
2911+
"timed_out": false,
2912+
"took": 8
2913+
},
2914+
"start_time_in_millis": 1740838900672
2915+
}`,
2916+
ExpectedPancakeResults: []model.QueryResultRow{
2917+
{Cols: []model.QueryResultCol{
2918+
model.NewQueryResultCol("metric__fitToBounds_col_0", -122.59799961000681),
2919+
model.NewQueryResultCol("metric__fitToBounds_col_1", 68.15180202014744),
2920+
model.NewQueryResultCol("metric__fitToBounds_col_2", -37.67330203671008),
2921+
model.NewQueryResultCol("metric__fitToBounds_col_3", 153.11700434423983),
2922+
}},
2923+
},
2924+
ExpectedPancakeSQL: `
2925+
SELECT minOrNull(__quesma_geo_lon("originlocation")) AS
2926+
"metric__fitToBounds_col_0",
2927+
argMinOrNull(__quesma_geo_lat("originlocation"), __quesma_geo_lon(
2928+
"originlocation")) AS "metric__fitToBounds_col_1",
2929+
minOrNull(__quesma_geo_lat("originlocation")) AS "metric__fitToBounds_col_2",
2930+
argMinOrNull(__quesma_geo_lon("originlocation"), __quesma_geo_lat(
2931+
"originlocation")) AS "metric__fitToBounds_col_3"
2932+
FROM __quesma_table_name
2933+
WHERE ("timestamp">=fromUnixTimestamp64Milli(1740924992069) AND "timestamp"<=
2934+
fromUnixTimestamp64Milli(1741529792069))`,
2935+
},
28222936
}

platform/testdata/unsupported_requests.go

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -361,20 +361,6 @@ var UnsupportedQueriesTests = []UnsupportedQueryTestCase{
361361
}
362362
}`,
363363
},
364-
{ // [18]
365-
TestName: "metrics aggregation: geo_bounds",
366-
QueryType: "geo_bounds",
367-
QueryRequestJson: `
368-
{
369-
"aggs": {
370-
"viewport": {
371-
"geo_bounds": {
372-
"field": "geometry"
373-
}
374-
}
375-
}
376-
}`,
377-
},
378364
{ // [19]
379365
TestName: "metrics aggregation: geo_line",
380366
QueryType: "geo_line",

0 commit comments

Comments
 (0)