Skip to content
This repository was archived by the owner on Nov 7, 2025. It is now read-only.

Commit 820fdeb

Browse files
authored
new aggregation IP Range (#1100)
1 parent 31a39e1 commit 820fdeb

File tree

8 files changed

+545
-47
lines changed

8 files changed

+545
-47
lines changed

quesma/go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ require (
4040

4141
require (
4242
filippo.io/edwards25519 v1.1.0 // indirect
43+
github.com/H0llyW00dzZ/cidr v1.2.1 // indirect
4344
github.com/go-viper/mapstructure/v2 v2.2.1 // indirect
4445
github.com/hashicorp/errwrap v1.0.0 // indirect
4546
github.com/jackc/chunkreader/v2 v2.0.1 // indirect

quesma/go.sum

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ github.com/ClickHouse/clickhouse-go/v2 v2.30.0 h1:AG4D/hW39qa58+JHQIFOSnxyL46H6h
77
github.com/ClickHouse/clickhouse-go/v2 v2.30.0/go.mod h1:i9ZQAojcayW3RsdCb3YR+n+wC2h65eJsZCscZ1Z1wyo=
88
github.com/DATA-DOG/go-sqlmock v1.5.2 h1:OcvFkGmslmlZibjAjaHm3L//6LiuBgolP7OputlJIzU=
99
github.com/DATA-DOG/go-sqlmock v1.5.2/go.mod h1:88MAG/4G7SMwSE3CeA0ZKzrT5CiOU3OJ+JlNzwDqpNU=
10+
github.com/H0llyW00dzZ/cidr v1.2.1 h1:DfRHX+RqVVKZijQGO1aJSaWvN9Saan8sycK/4wrfY5g=
11+
github.com/H0llyW00dzZ/cidr v1.2.1/go.mod h1:S+EgYkMandSAN27mGNG/CB3jeoXDAyalsvvVFpWdnXc=
1012
github.com/DataDog/go-sqllexer v0.0.18 h1:ErBvoO7/srJLdA2ebwd+HPqD4g1kN++BP64A8qvmh9U=
1113
github.com/DataDog/go-sqllexer v0.0.18/go.mod h1:KwkYhpFEVIq+BfobkTC1vfqm4gTi65skV/DpDBXtexc=
1214
github.com/Masterminds/semver/v3 v3.1.1/go.mod h1:VPu/7SZ7ePZ3QOrcuXROw5FAcLl4a0cBrbBpGY/8hQs=
Lines changed: 185 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,185 @@
1+
// Copyright Quesma, licensed under the Elastic License 2.0.
2+
// SPDX-License-Identifier: Elastic-2.0
3+
package bucket_aggregations
4+
5+
import (
6+
"context"
7+
"fmt"
8+
"quesma/logger"
9+
"quesma/model"
10+
"reflect"
11+
)
12+
13+
// BiggestIpv4 is "255.255.255.255 + 1", so to say. Used in Elastic, because it always uses exclusive upper bounds.
14+
// So instead of "<= 255.255.255.255", it uses "< ::1:0:0:0"
15+
const BiggestIpv4 = "::1:0:0:0"
16+
17+
// Current limitation: we expect Clickhouse field to be IPv4 (and not IPv6)
18+
19+
// Clickhouse table to test SQLs:
20+
// CREATE TABLE __quesma_table_name (clientip IPv4) ENGINE=Log
21+
// INSERT INTO __quesma_table_name VALUES ('0.0.0.0'), ('5.5.5.5'), ('90.180.90.180'), ('128.200.0.8'), ('192.168.1.67'), ('222.168.22.67')
22+
23+
// TODO make part of QueryType interface and implement for all aggregations
24+
// TODO add bad requests to tests
25+
// Doing so will ensure we see 100% of what we're interested in in our logs (now we see ~95%)
26+
func CheckParamsIpRange(ctx context.Context, paramsRaw any) error {
27+
requiredParams := map[string]string{
28+
"field": "string",
29+
"ranges": "map_todo_improve_this_check", // TODO should add same type check to this 'ranges' field, will be fixed
30+
}
31+
optionalParams := map[string]string{
32+
"keyed": "bool",
33+
}
34+
35+
params, ok := paramsRaw.(model.JsonMap)
36+
if !ok {
37+
return fmt.Errorf("params is not a map, but %+v", paramsRaw)
38+
}
39+
40+
// check if required are present
41+
for paramName, paramType := range requiredParams {
42+
paramVal, exists := params[paramName]
43+
if !exists {
44+
return fmt.Errorf("required parameter %s not found in params", paramName)
45+
}
46+
if paramType == "map_todo_improve_this_check" {
47+
continue // uncontinue after TODO is fixed
48+
}
49+
if reflect.TypeOf(paramVal).Name() != paramType { // TODO I'll make a small rewrite to not use reflect here
50+
return fmt.Errorf("required parameter %s is not of type %s, but %T", paramName, paramType, paramVal)
51+
}
52+
}
53+
54+
// check if only required/optional are present
55+
for paramName := range params {
56+
if _, isRequired := requiredParams[paramName]; !isRequired {
57+
wantedType, isOptional := optionalParams[paramName]
58+
if !isOptional {
59+
return fmt.Errorf("unexpected parameter %s found in IP Range params %v", paramName, params)
60+
}
61+
if reflect.TypeOf(params[paramName]).Name() != wantedType { // TODO I'll make a small rewrite to not use reflect here
62+
return fmt.Errorf("optional parameter %s is not of type %s, but %T", paramName, wantedType, params[paramName])
63+
}
64+
}
65+
}
66+
67+
return nil
68+
}
69+
70+
type (
71+
IpRange struct {
72+
ctx context.Context
73+
field model.Expr
74+
intervals []IpInterval
75+
keyed bool
76+
}
77+
IpInterval struct {
78+
begin string
79+
end string
80+
key *string // when nil, key is not present
81+
}
82+
)
83+
84+
func NewIpRange(ctx context.Context, intervals []IpInterval, field model.Expr, keyed bool) *IpRange {
85+
return &IpRange{
86+
ctx: ctx,
87+
field: field,
88+
intervals: intervals,
89+
keyed: keyed,
90+
}
91+
}
92+
93+
func NewIpInterval(begin, end string, key *string) IpInterval {
94+
return IpInterval{begin: begin, end: end, key: key}
95+
}
96+
97+
func (interval IpInterval) ToWhereClause(field model.Expr) model.Expr {
98+
isBegin := interval.begin != UnboundedInterval
99+
isEnd := interval.end != UnboundedInterval && interval.end != BiggestIpv4
100+
101+
begin := model.NewInfixExpr(field, ">=", model.NewLiteralSingleQuoted(interval.begin))
102+
end := model.NewInfixExpr(field, "<", model.NewLiteralSingleQuoted(interval.end))
103+
104+
if isBegin && isEnd {
105+
return model.NewInfixExpr(begin, "AND", end)
106+
} else if isBegin {
107+
return begin
108+
} else if isEnd {
109+
return end
110+
} else {
111+
return model.TrueExpr
112+
}
113+
}
114+
115+
// String returns key part of the response, e.g. "1.0-2.0", or "*-6.55"
116+
func (interval IpInterval) String() string {
117+
if interval.key != nil {
118+
return *interval.key
119+
}
120+
return fmt.Sprintf("%s-%s", interval.begin, interval.end)
121+
}
122+
123+
func (query *IpRange) AggregationType() model.AggregationType {
124+
return model.BucketAggregation
125+
}
126+
127+
func (query *IpRange) TranslateSqlResponseToJson(rows []model.QueryResultRow) model.JsonMap {
128+
return nil
129+
}
130+
131+
func (query *IpRange) String() string {
132+
return "ip_range"
133+
}
134+
135+
func (query *IpRange) DoesNotHaveGroupBy() bool {
136+
return true
137+
}
138+
139+
func (query *IpRange) CombinatorGroups() (result []CombinatorGroup) {
140+
for intervalIdx, interval := range query.intervals {
141+
prefix := fmt.Sprintf("range_%d__", intervalIdx)
142+
if len(query.intervals) == 1 {
143+
prefix = ""
144+
}
145+
result = append(result, CombinatorGroup{
146+
idx: intervalIdx,
147+
Prefix: prefix,
148+
Key: interval.String(),
149+
WhereClause: interval.ToWhereClause(query.field),
150+
})
151+
}
152+
return
153+
}
154+
155+
// bad requests: both to/from and mask
156+
157+
func (query *IpRange) CombinatorTranslateSqlResponseToJson(subGroup CombinatorGroup, rows []model.QueryResultRow) model.JsonMap {
158+
if len(rows) == 0 || len(rows[0].Cols) == 0 {
159+
logger.ErrorWithCtx(query.ctx).Msgf("need at least one row and column in ip_range aggregation response, rows: %d, cols: %d", len(rows), len(rows[0].Cols))
160+
return model.JsonMap{}
161+
}
162+
count := rows[0].Cols[len(rows[0].Cols)-1].Value
163+
response := model.JsonMap{
164+
"key": subGroup.Key,
165+
"doc_count": count,
166+
}
167+
168+
interval := query.intervals[subGroup.idx]
169+
if interval.begin != UnboundedInterval {
170+
response["from"] = interval.begin
171+
}
172+
if interval.end != UnboundedInterval {
173+
response["to"] = interval.end
174+
}
175+
176+
return response
177+
}
178+
179+
func (query *IpRange) CombinatorSplit() []model.QueryType {
180+
result := make([]model.QueryType, 0, len(query.intervals))
181+
for _, interval := range query.intervals {
182+
result = append(result, NewIpRange(query.ctx, []IpInterval{interval}, query.field, query.keyed))
183+
}
184+
return result
185+
}

quesma/model/expr.go

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,10 @@
22
// SPDX-License-Identifier: Elastic-2.0
33
package model
44

5-
import "strconv"
5+
import (
6+
"fmt"
7+
"strconv"
8+
)
69

710
// Expr is a generic representation of an expression which is a part of the SQL query.
811
type Expr interface {
@@ -126,6 +129,10 @@ func NewLiteral(value any) LiteralExpr {
126129
return LiteralExpr{Value: value}
127130
}
128131

132+
func NewLiteralSingleQuoted(value string) LiteralExpr {
133+
return LiteralExpr{Value: fmt.Sprintf("'%s'", value)}
134+
}
135+
129136
// DistinctExpr is a representation of DISTINCT keyword in SQL, e.g. `SELECT DISTINCT` ... or `SELECT COUNT(DISTINCT ...)`
130137
type DistinctExpr struct {
131138
Expr Expr

quesma/queryparser/aggregation_parser.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,16 @@ func (cw *ClickhouseQueryTranslator) parseStringField(queryMap QueryMap, fieldNa
291291
return defaultValue
292292
}
293293

294+
func (cw *ClickhouseQueryTranslator) parseStringFieldExistCheck(queryMap QueryMap, fieldName string) (value string, exists bool) {
295+
if valueRaw, exists := queryMap[fieldName]; exists {
296+
if asString, ok := valueRaw.(string); ok {
297+
return asString, true
298+
}
299+
logger.WarnWithCtx(cw.Ctx).Msgf("%s is not a string, but %T, value: %v", fieldName, valueRaw, valueRaw)
300+
}
301+
return "", false
302+
}
303+
294304
func (cw *ClickhouseQueryTranslator) parseArrayField(queryMap QueryMap, fieldName string) ([]any, error) {
295305
if valueRaw, exists := queryMap[fieldName]; exists {
296306
if asArray, ok := valueRaw.([]any); ok {

quesma/queryparser/pancake_aggregation_parser_buckets.go

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,15 @@ package queryparser
55

66
import (
77
"fmt"
8+
"github.com/H0llyW00dzZ/cidr"
89
"github.com/pkg/errors"
10+
"math"
11+
"net"
912
"quesma/clickhouse"
1013
"quesma/logger"
1114
"quesma/model"
1215
"quesma/model/bucket_aggregations"
16+
"quesma/util"
1317
"sort"
1418
"strconv"
1519
"strings"
@@ -37,6 +41,7 @@ func (cw *ClickhouseQueryTranslator) pancakeTryBucketAggregation(aggregation *pa
3741
}},
3842
{"multi_terms", cw.parseMultiTerms},
3943
{"composite", cw.parseComposite},
44+
{"ip_range", cw.parseIpRange},
4045
{"ip_prefix", cw.parseIpPrefix},
4146
}
4247

@@ -382,6 +387,48 @@ func (cw *ClickhouseQueryTranslator) parseComposite(aggregation *pancakeAggregat
382387
return nil
383388
}
384389

390+
func (cw *ClickhouseQueryTranslator) parseIpRange(aggregation *pancakeAggregationTreeNode, params QueryMap) error {
391+
const defaultKeyed = false
392+
393+
if err := bucket_aggregations.CheckParamsIpRange(cw.Ctx, params); err != nil {
394+
return err
395+
}
396+
397+
rangesRaw := params["ranges"].([]any)
398+
ranges := make([]bucket_aggregations.IpInterval, 0, len(rangesRaw))
399+
for _, rangeRaw := range rangesRaw {
400+
var key *string
401+
if keyIfPresent, exists := cw.parseStringFieldExistCheck(rangeRaw.(QueryMap), "key"); exists {
402+
key = &keyIfPresent
403+
}
404+
var begin, end string
405+
if maskIfExists, exists := cw.parseStringFieldExistCheck(rangeRaw.(QueryMap), "mask"); exists {
406+
_, ipNet, err := net.ParseCIDR(maskIfExists)
407+
if err != nil {
408+
return err
409+
}
410+
beginAsInt, endAsInt := cidr.IPv4ToRange(ipNet)
411+
begin = util.IntToIpv4(beginAsInt)
412+
// endAsInt is inclusive, we do +1, because we need it exclusive
413+
if endAsInt != math.MaxUint32 {
414+
end = util.IntToIpv4(endAsInt + 1)
415+
} else {
416+
end = bucket_aggregations.BiggestIpv4 // "255.255.255.255 + 1", so to say (value in compliance with Elastic)
417+
}
418+
if key == nil {
419+
key = &maskIfExists
420+
}
421+
} else {
422+
begin = cw.parseStringField(rangeRaw.(QueryMap), "from", bucket_aggregations.UnboundedInterval)
423+
end = cw.parseStringField(rangeRaw.(QueryMap), "to", bucket_aggregations.UnboundedInterval)
424+
}
425+
ranges = append(ranges, bucket_aggregations.NewIpInterval(begin, end, key))
426+
}
427+
aggregation.isKeyed = cw.parseBoolField(params, "keyed", defaultKeyed)
428+
aggregation.queryType = bucket_aggregations.NewIpRange(cw.Ctx, ranges, cw.parseFieldField(params, "ip_range"), aggregation.isKeyed)
429+
return nil
430+
}
431+
385432
func (cw *ClickhouseQueryTranslator) parseIpPrefix(aggregation *pancakeAggregationTreeNode, params QueryMap) error {
386433
const (
387434
defaultIsIpv6 = false

0 commit comments

Comments
 (0)