Skip to content
This repository was archived by the owner on Nov 7, 2025. It is now read-only.

Commit 3b79092

Browse files
authored
[flights] Remove cast causing Clickhouse error (#1341)
Casting to simple `Float` is dangerous, as the column can be `Nullable(Float)`, and Clickhouse panics. It's also unnecessary, as we stopped keeping geo as Strings. Gets rid of ``` Error processing request: clickhouse: iterating over rows failed: code: 349, message: Cannot convert NULL value to non-Nullable type: while executing 'FUNCTION CAST(__table1.geoip_location_lat :: 1, 'Float'_String : 9) -> CAST(__table1.geoip_location_lat, 'Float'_String) Float32 : 12' ```
1 parent a65abfd commit 3b79092

File tree

6 files changed

+41
-48
lines changed

6 files changed

+41
-48
lines changed

platform/parsers/elastic_query_dsl/pancake_aggregation_parser_buckets.go

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -332,10 +332,10 @@ func (cw *ClickhouseQueryTranslator) parseGeotileGrid(aggregation *pancakeAggreg
332332
// That's bucket (group by) formula for geotile_grid
333333
// zoom/x/y
334334
// SELECT precisionZoom as zoom,
335-
// FLOOR(((toFloat64("Location::lon") + 180.0) / 360.0) * POWER(2, zoom)) AS x_tile,
335+
// FLOOR((("Location::lon" + 180.0) / 360.0) * POWER(2, zoom)) AS x_tile,
336336
// FLOOR(
337337
// (
338-
// 1 - LOG(TAN(RADIANS(toFloat64("Location::lat"))) + (1 / COS(RADIANS(toFloat64("Location::lat"))))) / PI()
338+
// 1 - LOG(TAN(RADIANS("Location::lat")) + (1 / COS(RADIANS("Location::lat")))) / PI()
339339
// ) / 2.0 * POWER(2, zoom)
340340
// ) AS y_tile, count()
341341
// FROM
@@ -350,15 +350,13 @@ func (cw *ClickhouseQueryTranslator) parseGeotileGrid(aggregation *pancakeAggreg
350350
lon := model.NewGeoLon(fieldName)
351351
lat := model.NewGeoLat(fieldName)
352352

353-
toFloatFunLon := model.NewFunction("toFloat64", lon)
354353
var infixX model.Expr
355-
infixX = model.NewParenExpr(model.NewInfixExpr(toFloatFunLon, "+", model.NewLiteral(180.0)))
354+
infixX = model.NewParenExpr(model.NewInfixExpr(lon, "+", model.NewLiteral(180.0)))
356355
infixX = model.NewParenExpr(model.NewInfixExpr(infixX, "/", model.NewLiteral(360.0)))
357356
infixX = model.NewInfixExpr(infixX, "*",
358357
model.NewFunction("POWER", model.NewLiteral(2), zoomLiteral))
359358
xTile := model.NewFunction("FLOOR", infixX)
360-
toFloatFunLat := model.NewFunction("toFloat64", lat)
361-
radians := model.NewFunction("RADIANS", toFloatFunLat)
359+
radians := model.NewFunction("RADIANS", lat)
362360
tan := model.NewFunction("TAN", radians)
363361
cos := model.NewFunction("COS", radians)
364362
Log := model.NewFunction("LOG", model.NewInfixExpr(tan, "+",

platform/parsers/elastic_query_dsl/pancake_aggregation_parser_metrics.go

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -112,10 +112,8 @@ func generateMetricSelectedColumns(ctx context.Context, metricsAggr metricsAggre
112112
// TODO we have create columns according to the schema
113113
latColumn := model.NewGeoLat(colName)
114114
lonColumn := model.NewGeoLon(colName)
115-
castLat := model.NewFunction("CAST", latColumn, model.NewLiteral(fmt.Sprintf("'%s'", "Float")))
116-
castLon := model.NewFunction("CAST", lonColumn, model.NewLiteral(fmt.Sprintf("'%s'", "Float")))
117-
result = append(result, model.NewFunction("avgOrNull", castLat))
118-
result = append(result, model.NewFunction("avgOrNull", castLon))
115+
result = append(result, model.NewFunction("avgOrNull", latColumn))
116+
result = append(result, model.NewFunction("avgOrNull", lonColumn))
119117
result = append(result, model.NewCountFunc())
120118
}
121119
default:

platform/testdata/aggregation_requests_2.go

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4559,18 +4559,18 @@ var AggregationTests2 = []AggregationTestCase{
45594559
},
45604560
ExpectedPancakeSQL: `
45614561
SELECT CAST(8.000000 AS Float32) AS "aggr__my_buckets__key_0",
4562-
FLOOR(((toFloat64(__quesma_geo_lon("OriginLocation"))+180)/360)*POWER(2, 8))
4562+
FLOOR(((__quesma_geo_lon("OriginLocation")+180)/360)*POWER(2, 8))
45634563
AS "aggr__my_buckets__key_1",
4564-
FLOOR((1-LOG(TAN(RADIANS(toFloat64(__quesma_geo_lat("OriginLocation"))))+(1/
4565-
COS(RADIANS(toFloat64(__quesma_geo_lat("OriginLocation"))))))/PI())/2*POWER(2,
4566-
8)) AS "aggr__my_buckets__key_2", count(*) AS "aggr__my_buckets__count"
4564+
FLOOR((1-LOG(TAN(RADIANS(__quesma_geo_lat("OriginLocation")))+(1/COS(RADIANS(
4565+
__quesma_geo_lat("OriginLocation")))))/PI())/2*POWER(2, 8))
4566+
AS "aggr__my_buckets__key_2", count(*) AS "aggr__my_buckets__count"
45674567
FROM __quesma_table_name
45684568
GROUP BY CAST(8.000000 AS Float32) AS "aggr__my_buckets__key_0",
4569-
FLOOR(((toFloat64(__quesma_geo_lon("OriginLocation"))+180)/360)*POWER(2, 8))
4569+
FLOOR(((__quesma_geo_lon("OriginLocation")+180)/360)*POWER(2, 8))
45704570
AS "aggr__my_buckets__key_1",
4571-
FLOOR((1-LOG(TAN(RADIANS(toFloat64(__quesma_geo_lat("OriginLocation"))))+(1/
4572-
COS(RADIANS(toFloat64(__quesma_geo_lat("OriginLocation"))))))/PI())/2*POWER(2,
4573-
8)) AS "aggr__my_buckets__key_2"
4571+
FLOOR((1-LOG(TAN(RADIANS(__quesma_geo_lat("OriginLocation")))+(1/COS(
4572+
RADIANS(__quesma_geo_lat("OriginLocation")))))/PI())/2*POWER(2, 8))
4573+
AS "aggr__my_buckets__key_2"
45744574
LIMIT 10`,
45754575
},
45764576
{ // [69]

platform/testdata/kibana_sample_data_ecommerce.go

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2468,14 +2468,14 @@ var KibanaSampleDataEcommerce = []AggregationTestCase{
24682468
},
24692469
ExpectedPancakeSQL: `
24702470
SELECT CAST(5.000000 AS Float32) AS "aggr__gridSplit__key_0",
2471-
FLOOR(((toFloat64(__quesma_geo_lon("geoip.location"))+180)/360)*POWER(2, 5))
2471+
FLOOR(((__quesma_geo_lon("geoip.location")+180)/360)*POWER(2, 5))
24722472
AS "aggr__gridSplit__key_1",
2473-
FLOOR((1-LOG(TAN(RADIANS(toFloat64(__quesma_geo_lat("geoip.location"))))+(1/
2474-
COS(RADIANS(toFloat64(__quesma_geo_lat("geoip.location"))))))/PI())/2*POWER(2,
2475-
5)) AS "aggr__gridSplit__key_2", count(*) AS "aggr__gridSplit__count",
2476-
avgOrNull(CAST(__quesma_geo_lat("geoip_location"), 'Float')) AS
2473+
FLOOR((1-LOG(TAN(RADIANS(__quesma_geo_lat("geoip.location")))+(1/COS(RADIANS(
2474+
__quesma_geo_lat("geoip.location")))))/PI())/2*POWER(2, 5))
2475+
AS "aggr__gridSplit__key_2", count(*) AS "aggr__gridSplit__count",
2476+
avgOrNull(__quesma_geo_lat("geoip_location")) AS
24772477
"metric__gridSplit__gridCentroid_col_0",
2478-
avgOrNull(CAST(__quesma_geo_lon("geoip_location"), 'Float')) AS
2478+
avgOrNull(__quesma_geo_lon("geoip_location")) AS
24792479
"metric__gridSplit__gridCentroid_col_1",
24802480
count(*) AS "metric__gridSplit__gridCentroid_col_2",
24812481
sumOrNull("taxful_total_price") AS
@@ -2484,11 +2484,10 @@ var KibanaSampleDataEcommerce = []AggregationTestCase{
24842484
WHERE ("geoip.location" IS NOT NULL AND ("order_date">=fromUnixTimestamp64Milli(
24852485
1740143222223) AND "order_date"<=fromUnixTimestamp64Milli(1740748022223)))
24862486
GROUP BY CAST(5.000000 AS Float32) AS "aggr__gridSplit__key_0",
2487-
FLOOR(((toFloat64(__quesma_geo_lon("geoip.location"))+180)/360)*POWER(2, 5))
2487+
FLOOR(((__quesma_geo_lon("geoip.location")+180)/360)*POWER(2, 5))
24882488
AS "aggr__gridSplit__key_1",
2489-
FLOOR((1-LOG(TAN(RADIANS(toFloat64(__quesma_geo_lat("geoip.location"))))+(1/
2490-
COS(RADIANS(toFloat64(__quesma_geo_lat("geoip.location"))))))/PI())/2*POWER(2,
2491-
5)) AS "aggr__gridSplit__key_2"`,
2489+
FLOOR((1-LOG(TAN(RADIANS(__quesma_geo_lat("geoip.location")))+(1/COS(RADIANS(
2490+
__quesma_geo_lat("geoip.location")))))/PI())/2*POWER(2, 5)) AS "aggr__gridSplit__key_2"`,
24922491
},
24932492
{ // [13]
24942493
TestName: "Orders by Country (request 2/3)",

platform/testdata/kibana_sample_data_flights.go

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2500,14 +2500,14 @@ var KibanaSampleDataFlights = []AggregationTestCase{
25002500
},
25012501
ExpectedPancakeSQL: `
25022502
SELECT CAST(7.000000 AS Float32) AS "aggr__gridSplit__key_0",
2503-
FLOOR(((toFloat64(__quesma_geo_lon("OriginLocation"))+180)/360)*POWER(2, 7))
2503+
FLOOR(((__quesma_geo_lon("OriginLocation")+180)/360)*POWER(2, 7))
25042504
AS "aggr__gridSplit__key_1",
2505-
FLOOR((1-LOG(TAN(RADIANS(toFloat64(__quesma_geo_lat("OriginLocation"))))+(1/
2506-
COS(RADIANS(toFloat64(__quesma_geo_lat("OriginLocation"))))))/PI())/2*POWER(2,
2507-
7)) AS "aggr__gridSplit__key_2", count(*) AS "aggr__gridSplit__count",
2508-
avgOrNull(CAST(__quesma_geo_lat("originlocation"), 'Float')) AS
2505+
FLOOR((1-LOG(TAN(RADIANS(__quesma_geo_lat("OriginLocation")))+(1/COS(RADIANS(
2506+
__quesma_geo_lat("OriginLocation")))))/PI())/2*POWER(2, 7))
2507+
AS "aggr__gridSplit__key_2", count(*) AS "aggr__gridSplit__count",
2508+
avgOrNull(__quesma_geo_lat("originlocation")) AS
25092509
"metric__gridSplit__gridCentroid_col_0",
2510-
avgOrNull(CAST(__quesma_geo_lon("originlocation"), 'Float')) AS
2510+
avgOrNull(__quesma_geo_lon("originlocation")) AS
25112511
"metric__gridSplit__gridCentroid_col_1",
25122512
count(*) AS "metric__gridSplit__gridCentroid_col_2",
25132513
sumOrNull("FlightDelayMin") AS
@@ -2516,11 +2516,10 @@ var KibanaSampleDataFlights = []AggregationTestCase{
25162516
WHERE ("OriginLocation" IS NOT NULL AND ("timestamp">=fromUnixTimestamp64Milli(
25172517
1740230608853) AND "timestamp"<=fromUnixTimestamp64Milli(1740835408853)))
25182518
GROUP BY CAST(7.000000 AS Float32) AS "aggr__gridSplit__key_0",
2519-
FLOOR(((toFloat64(__quesma_geo_lon("OriginLocation"))+180)/360)*POWER(2, 7))
2519+
FLOOR(((__quesma_geo_lon("OriginLocation")+180)/360)*POWER(2, 7))
25202520
AS "aggr__gridSplit__key_1",
2521-
FLOOR((1-LOG(TAN(RADIANS(toFloat64(__quesma_geo_lat("OriginLocation"))))+(1/
2522-
COS(RADIANS(toFloat64(__quesma_geo_lat("OriginLocation"))))))/PI())/2*POWER(2,
2523-
7)) AS "aggr__gridSplit__key_2"`,
2521+
FLOOR((1-LOG(TAN(RADIANS(__quesma_geo_lat("OriginLocation")))+(1/COS(RADIANS(
2522+
__quesma_geo_lat("OriginLocation")))))/PI())/2*POWER(2, 7)) AS "aggr__gridSplit__key_2"`,
25242523
},
25252524
{ // [13]
25262525
TestName: "Delay Buckets",

platform/testdata/kibana_sample_data_logs.go

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1884,26 +1884,25 @@ var KibanaSampleDataLogs = []AggregationTestCase{
18841884
},
18851885
ExpectedPancakeSQL: `
18861886
SELECT CAST(6.000000 AS Float32) AS "aggr__gridSplit__key_0",
1887-
FLOOR(((toFloat64(__quesma_geo_lon("geo.coordinates"))+180)/360)*POWER(2, 6))
1887+
FLOOR(((__quesma_geo_lon("geo.coordinates")+180)/360)*POWER(2, 6))
18881888
AS "aggr__gridSplit__key_1",
1889-
FLOOR((1-LOG(TAN(RADIANS(toFloat64(__quesma_geo_lat("geo.coordinates"))))+(1/
1890-
COS(RADIANS(toFloat64(__quesma_geo_lat("geo.coordinates"))))))/PI())/2*POWER(2
1891-
, 6)) AS "aggr__gridSplit__key_2", count(*) AS "aggr__gridSplit__count",
1892-
avgOrNull(CAST(__quesma_geo_lat("geo_coordinates"), 'Float')) AS
1889+
FLOOR((1-LOG(TAN(RADIANS(__quesma_geo_lat("geo.coordinates")))+(1/COS(RADIANS(
1890+
__quesma_geo_lat("geo.coordinates")))))/PI())/2*POWER(2, 6))
1891+
AS "aggr__gridSplit__key_2", count(*) AS "aggr__gridSplit__count",
1892+
avgOrNull(__quesma_geo_lat("geo_coordinates")) AS
18931893
"metric__gridSplit__gridCentroid_col_0",
1894-
avgOrNull(CAST(__quesma_geo_lon("geo_coordinates"), 'Float')) AS
1894+
avgOrNull(__quesma_geo_lon("geo_coordinates")) AS
18951895
"metric__gridSplit__gridCentroid_col_1",
18961896
count(*) AS "metric__gridSplit__gridCentroid_col_2",
18971897
sumOrNull("bytes") AS "metric__gridSplit__sum_of_bytes_col_0"
18981898
FROM __quesma_table_name
18991899
WHERE ("geo.coordinates" IS NOT NULL AND ("timestamp">=fromUnixTimestamp64Milli(
19001900
1740178800000) AND "timestamp"<=fromUnixTimestamp64Milli(1740831278103)))
19011901
GROUP BY CAST(6.000000 AS Float32) AS "aggr__gridSplit__key_0",
1902-
FLOOR(((toFloat64(__quesma_geo_lon("geo.coordinates"))+180)/360)*POWER(2, 6))
1902+
FLOOR(((__quesma_geo_lon("geo.coordinates")+180)/360)*POWER(2, 6))
19031903
AS "aggr__gridSplit__key_1",
1904-
FLOOR((1-LOG(TAN(RADIANS(toFloat64(__quesma_geo_lat("geo.coordinates"))))+(1/
1905-
COS(RADIANS(toFloat64(__quesma_geo_lat("geo.coordinates"))))))/PI())/2*POWER(2
1906-
, 6)) AS "aggr__gridSplit__key_2"`,
1904+
FLOOR((1-LOG(TAN(RADIANS(__quesma_geo_lat("geo.coordinates")))+(1/COS(
1905+
RADIANS(__quesma_geo_lat("geo.coordinates")))))/PI())/2*POWER(2, 6)) AS "aggr__gridSplit__key_2"`,
19071906
},
19081907
{ // [9]
19091908
TestName: "Total Requests and Bytes (2/2 request)",

0 commit comments

Comments
 (0)