Skip to content
This repository was archived by the owner on Nov 7, 2025. It is now read-only.

Commit 3db6fae

Browse files
trzysiektrzysiek
andauthored
Some slight improvements to geotile_grid aggr + more tests (#1010)
Co-authored-by: trzysiek <[email protected]>
1 parent 1664219 commit 3db6fae

File tree

11 files changed

+508
-96
lines changed

11 files changed

+508
-96
lines changed

platform/model/bucket_aggregations/composite.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ func (query *Composite) TranslateSqlResponseToJson(rows []model.QueryResultRow)
5454
colIdx += 1
5555
} else if geotileGrid, ok := baseAggr.aggregation.(GeoTileGrid); ok {
5656
key[baseAggr.name] = geotileGrid.calcKey(row.Cols[colIdx:])
57-
colIdx += 3
57+
colIdx += 2
5858
} else {
5959
key[baseAggr.name] = col.Value
6060
colIdx += 1

platform/model/bucket_aggregations/geotile_grid.go

Lines changed: 77 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,18 +4,23 @@ package bucket_aggregations
44

55
import (
66
"context"
7+
"fmt"
78
"github.com/QuesmaOrg/quesma/platform/logger"
89
"github.com/QuesmaOrg/quesma/platform/model"
910
"github.com/QuesmaOrg/quesma/platform/util"
10-
"strconv"
11+
"reflect"
1112
)
1213

14+
// GeoTileGrid Warning: we don't handle 'bounds' and 'shard_size' parameters, and proceed like they didn't exist.
15+
// We log every time they arrive in the request, so we should know when they're used and that we should implement them
16+
// (shouldn't be too hard).
1317
type GeoTileGrid struct {
14-
ctx context.Context
18+
ctx context.Context
19+
precisionZoom int
1520
}
1621

17-
func NewGeoTileGrid(ctx context.Context) GeoTileGrid {
18-
return GeoTileGrid{ctx: ctx}
22+
func NewGeoTileGrid(ctx context.Context, precisionZoom int) GeoTileGrid {
23+
return GeoTileGrid{ctx: ctx, precisionZoom: precisionZoom}
1924
}
2025

2126
func (query GeoTileGrid) AggregationType() model.AggregationType {
@@ -43,12 +48,76 @@ func (query GeoTileGrid) TranslateSqlResponseToJson(rows []model.QueryResultRow)
4348
}
4449

4550
func (query GeoTileGrid) calcKey(cols []model.QueryResultCol) string {
46-
zoom, _ := util.ExtractFloat64(cols[0].Value)
47-
x, _ := util.ExtractFloat64(cols[1].Value)
48-
y, _ := util.ExtractFloat64(cols[2].Value)
49-
return strconv.FormatInt(int64(zoom), 10) + "/" + strconv.FormatInt(int64(x), 10) + "/" + strconv.FormatInt(int64(y), 10)
51+
x := int64(util.ExtractFloat64(cols[0].Value))
52+
y := int64(util.ExtractFloat64(cols[1].Value))
53+
return fmt.Sprintf("%d/%d/%d", query.precisionZoom, x, y)
5054
}
5155

5256
func (query GeoTileGrid) String() string {
5357
return "geotile_grid"
5458
}
59+
60+
func CheckParamsGeotileGrid(ctx context.Context, paramsRaw any) error {
61+
requiredParams := map[string]string{"field": "string"}
62+
optionalParams := map[string]string{
63+
"precision": "int",
64+
"bounds": "object", // TODO implement proper check
65+
"size": "int",
66+
"shard_size": "int",
67+
}
68+
logIfYouSeeThemParams := []string{"bounds", "shard_size"}
69+
70+
params, ok := paramsRaw.(model.JsonMap)
71+
if !ok {
72+
return fmt.Errorf("params is not a map, but %+v", paramsRaw)
73+
}
74+
75+
// check if required are present
76+
for paramName, paramType := range requiredParams {
77+
paramVal, exists := params[paramName]
78+
if !exists {
79+
return fmt.Errorf("required parameter %s not found in params", paramName)
80+
}
81+
if reflect.TypeOf(paramVal).Name() != paramType { // TODO I'll make a small rewrite to not use reflect here
82+
return fmt.Errorf("required parameter %s is not of type %s, but %T", paramName, paramType, paramVal)
83+
}
84+
}
85+
86+
// check if only required/optional are present
87+
for paramName := range params {
88+
if _, isRequired := requiredParams[paramName]; !isRequired {
89+
wantedType, isOptional := optionalParams[paramName]
90+
if !isOptional {
91+
return fmt.Errorf("unexpected parameter %s found in Geotile Grid params %v", paramName, params)
92+
}
93+
switch wantedType {
94+
case "int":
95+
if ok = util.IsInt32(params[paramName]); !ok {
96+
return fmt.Errorf("optional parameter %s is not of type %s, but %T", paramName, wantedType, params[paramName])
97+
}
98+
case "object":
99+
// TODO implement proper check
100+
continue
101+
default:
102+
return fmt.Errorf("unsupported type %s for optional parameter %s", wantedType, paramName)
103+
}
104+
}
105+
}
106+
107+
// additional check for precision is in range [0, 29] (we checked above that it's int as wanted)
108+
if precisionRaw, exists := params["precision"]; exists {
109+
precision := int(util.ExtractNumeric64(precisionRaw))
110+
if precision < 0 || precision > 29 {
111+
return fmt.Errorf("precision value %d is out of bounds", precision)
112+
}
113+
}
114+
115+
// log if you see them
116+
for _, warnParam := range logIfYouSeeThemParams {
117+
if _, exists := params[warnParam]; exists {
118+
logger.WarnWithCtxAndThrottling(ctx, "geotile_grid", warnParam, "we didn't expect %s in Geotile Grid params %v", warnParam, params)
119+
}
120+
}
121+
122+
return nil
123+
}

platform/parsers/elastic_query_dsl/aggregation_parser_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -658,6 +658,7 @@ func allAggregationTests() []testdata.AggregationTestCase {
658658
add(testdata.AggregationTests, "agg_req")
659659
add(testdata.AggregationTests2, "agg_req_2")
660660
add(testdata.AggregationTestsWithDates, "dates")
661+
add(testdata.AggregationTestsWithGeographicalCoordinates, "geo")
661662
add(testdata.GrafanaAggregationTests, "grafana")
662663
add(testdata.KibanaSampleDataEcommerce, "kibana-sample-data-ecommerce")
663664
add(testdata.KibanaSampleDataFlights, "kibana-sample-data-flights")

platform/parsers/elastic_query_dsl/pancake_aggregation_parser_buckets.go

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -325,21 +325,30 @@ func (cw *ClickhouseQueryTranslator) parseMultiTerms(aggregation *pancakeAggrega
325325
}
326326

327327
func (cw *ClickhouseQueryTranslator) parseGeotileGrid(aggregation *pancakeAggregationTreeNode, params QueryMap) error {
328-
const defaultPrecisionZoom = 7.0
329-
precisionZoom := cw.parseFloatField(params, "precision", defaultPrecisionZoom)
328+
if err := bucket_aggregations.CheckParamsGeotileGrid(cw.Ctx, params); err != nil {
329+
return err
330+
}
331+
332+
const (
333+
defaultPrecisionZoom = 7.0
334+
defaultSize = 10000
335+
)
336+
precisionZoom := int(cw.parseFloatField(params, "precision", defaultPrecisionZoom))
330337
field := cw.parseFieldField(params, "geotile_grid")
338+
size := cw.parseIntField(params, "size", defaultSize)
331339

332340
// That's bucket (group by) formula for geotile_grid
333341
// zoom/x/y
334-
// SELECT precisionZoom as zoom,
342+
// SELECT
335343
// FLOOR((("Location::lon" + 180.0) / 360.0) * POWER(2, zoom)) AS x_tile,
336344
// FLOOR(
337345
// (
338346
// 1 - LOG(TAN(RADIANS("Location::lat")) + (1 / COS(RADIANS("Location::lat")))) / PI()
339347
// ) / 2.0 * POWER(2, zoom)
340-
// ) AS y_tile, count()
348+
// ) AS y_tile,
349+
// count()
341350
// FROM
342-
// kibana_sample_data_flights Group by zoom, x_tile, y_tile
351+
// kibana_sample_data_flights Group by x_tile, y_tile
343352

344353
zoomLiteral := model.NewLiteral(precisionZoom)
345354

@@ -371,10 +380,12 @@ func (cw *ClickhouseQueryTranslator) parseGeotileGrid(aggregation *pancakeAggreg
371380
model.NewFunction("POWER", model.NewLiteral(2), zoomLiteral))
372381
yTile := model.NewFunction("FLOOR", FloorContent)
373382

374-
aggregation.queryType = bucket_aggregations.NewGeoTileGrid(cw.Ctx)
375-
aggregation.selectedColumns = append(aggregation.selectedColumns, model.NewLiteral(fmt.Sprintf("CAST(%f AS Float32)", precisionZoom)))
383+
aggregation.queryType = bucket_aggregations.NewGeoTileGrid(cw.Ctx, precisionZoom)
376384
aggregation.selectedColumns = append(aggregation.selectedColumns, xTile)
377385
aggregation.selectedColumns = append(aggregation.selectedColumns, yTile)
386+
// It's not explicitly stated in the Elastic documentation, but Geotile Grid is always ordered by count desc
387+
aggregation.orderBy = append(aggregation.orderBy, model.NewOrderByExpr(model.NewCountFunc(), model.DescOrder))
388+
aggregation.limit = size
378389
return nil
379390
}
380391

platform/testdata/aggregation_requests_2.go

Lines changed: 16 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -4539,38 +4539,35 @@ var AggregationTests2 = []AggregationTestCase{
45394539
}`,
45404540
ExpectedPancakeResults: []model.QueryResultRow{
45414541
{Cols: []model.QueryResultCol{
4542-
model.NewQueryResultCol("aggr__my_buckets__key_0", 8.0),
4543-
model.NewQueryResultCol("aggr__my_buckets__key_1", 20.0),
4544-
model.NewQueryResultCol("aggr__my_buckets__key_2", 44.0),
4542+
model.NewQueryResultCol("aggr__my_buckets__key_0", 20.0),
4543+
model.NewQueryResultCol("aggr__my_buckets__key_1", 44.0),
45454544
model.NewQueryResultCol("aggr__my_buckets__count", int64(12)),
45464545
}},
45474546
{Cols: []model.QueryResultCol{
4548-
model.NewQueryResultCol("aggr__my_buckets__key_0", 8.0),
4549-
model.NewQueryResultCol("aggr__my_buckets__key_1", 20.0),
4550-
model.NewQueryResultCol("aggr__my_buckets__key_2", 45.0),
4547+
model.NewQueryResultCol("aggr__my_buckets__key_0", 20.0),
4548+
model.NewQueryResultCol("aggr__my_buckets__key_1", 45.0),
45514549
model.NewQueryResultCol("aggr__my_buckets__count", int64(22)),
45524550
}},
45534551
{Cols: []model.QueryResultCol{
4554-
model.NewQueryResultCol("aggr__my_buckets__key_0", 8.0),
4555-
model.NewQueryResultCol("aggr__my_buckets__key_1", 21.0),
4556-
model.NewQueryResultCol("aggr__my_buckets__key_2", 49.0),
4552+
model.NewQueryResultCol("aggr__my_buckets__key_0", 21.0),
4553+
model.NewQueryResultCol("aggr__my_buckets__key_1", 49.0),
45574554
model.NewQueryResultCol("aggr__my_buckets__count", int64(1)),
45584555
}},
45594556
},
45604557
ExpectedPancakeSQL: `
4561-
SELECT CAST(8.000000 AS Float32) AS "aggr__my_buckets__key_0",
4562-
FLOOR(((__quesma_geo_lon("OriginLocation")+180)/360)*POWER(2, 8))
4563-
AS "aggr__my_buckets__key_1",
4558+
SELECT FLOOR(((__quesma_geo_lon("OriginLocation")+180)/360)*POWER(2, 8))
4559+
AS "aggr__my_buckets__key_0",
45644560
FLOOR((1-LOG(TAN(RADIANS(__quesma_geo_lat("OriginLocation")))+(1/COS(RADIANS(
45654561
__quesma_geo_lat("OriginLocation")))))/PI())/2*POWER(2, 8))
4566-
AS "aggr__my_buckets__key_2", count(*) AS "aggr__my_buckets__count"
4562+
AS "aggr__my_buckets__key_1", count(*) AS "aggr__my_buckets__count"
45674563
FROM __quesma_table_name
4568-
GROUP BY CAST(8.000000 AS Float32) AS "aggr__my_buckets__key_0",
4569-
FLOOR(((__quesma_geo_lon("OriginLocation")+180)/360)*POWER(2, 8))
4570-
AS "aggr__my_buckets__key_1",
4571-
FLOOR((1-LOG(TAN(RADIANS(__quesma_geo_lat("OriginLocation")))+(1/COS(
4572-
RADIANS(__quesma_geo_lat("OriginLocation")))))/PI())/2*POWER(2, 8))
4573-
AS "aggr__my_buckets__key_2"
4564+
GROUP BY FLOOR(((__quesma_geo_lon("OriginLocation")+180)/360)*POWER(2, 8))
4565+
AS "aggr__my_buckets__key_0",
4566+
FLOOR((1-LOG(TAN(RADIANS(__quesma_geo_lat("OriginLocation")))+(1/COS(RADIANS(
4567+
__quesma_geo_lat("OriginLocation")))))/PI())/2*POWER(2, 8))
4568+
AS "aggr__my_buckets__key_1"
4569+
ORDER BY "aggr__my_buckets__count" DESC, "aggr__my_buckets__key_0" ASC,
4570+
"aggr__my_buckets__key_1" ASC
45744571
LIMIT 10`,
45754572
},
45764573
{ // [69]

0 commit comments

Comments
 (0)