@@ -5,16 +5,19 @@ package bucket_aggregations
55import (
66 "context"
77 "fmt"
8+ "math/big"
89 "quesma/logger"
910 "quesma/model"
11+ "quesma/util"
1012 "reflect"
1113)
1214
13- // Current limitation: we expect Clickhouse field to be IPv4 (and not IPv6)
14-
15- // Clickhouse table to test SQLs:
16- // CREATE TABLE __quesma_table_name (clientip IPv4) ENGINE=Log
17- // INSERT INTO __quesma_table_name VALUES ('0.0.0.0'), ('5.5.5.5'), ('90.180.90.180'), ('128.200.0.8'), ('192.168.1.67'), ('222.168.22.67')
15+ // Testing helpers:
16+ // * (ipv4) Clickhouse table to test SQLs:
17+ // CREATE TABLE __quesma_table_name (clientip IPv4) ENGINE=Log
18+ // INSERT INTO __quesma_table_name VALUES ('0.0.0.0'), ('5.5.5.5'), ('90.180.90.180'), ('128.200.0.8'), ('192.168.1.67'), ('222.168.22.67')
19+ // * (ipv6) If ip field in Clickhouse is string, not IPv6, just change "ip_fieldname" to "ip_fieldname"::IPv6, to test SQLs from tests
20+ // (careful with that, most of the time it works, but sometimes some differences arise, I guess from big/little endian differences)
1821
1922// TODO make part of QueryType interface and implement for all aggregations
2023// TODO add bad requests to tests
@@ -30,7 +33,7 @@ func CheckParamsIpPrefix(ctx context.Context, paramsRaw any) error {
3033 "keyed" : "bool" ,
3134 "min_doc_count" : "int" ,
3235 }
33- logIfYouSeeThemParams := []string {"min_doc_count" } // we don't use min_doc_count yet. We'll log if "is_ipv6" == true, also.
36+ logIfYouSeeThemParams := []string {"min_doc_count" } // we don't use min_doc_count yet.
3437
3538 params , ok := paramsRaw .(model.JsonMap )
3639 if ! ok {
@@ -47,6 +50,15 @@ func CheckParamsIpPrefix(ctx context.Context, paramsRaw any) error {
4750 return fmt .Errorf ("required parameter %s is not of type %s, but %T" , paramName , paramType , paramVal )
4851 }
4952 }
53+ // prefixLength must be [0, 32] for ipv4, [0, 128] for ipv6
54+ prefixLength := params ["prefix_length" ].(float64 ) // will never panic because of checks above
55+ upperBound := 32.0
56+ if ipv6 , exists := params ["is_ipv6" ]; exists && ipv6 .(bool ) {
57+ upperBound = 128.0
58+ }
59+ if util .IsSmaller (prefixLength , 0 ) || util .IsSmaller (upperBound , prefixLength ) {
60+ return fmt .Errorf ("prefix_length must be in range [0, %d], but got %f" , int (upperBound ), prefixLength )
61+ }
5062
5163 // check if only required/optional are present
5264 for paramName := range params {
@@ -67,9 +79,6 @@ func CheckParamsIpPrefix(ctx context.Context, paramsRaw any) error {
6779 logger .WarnWithCtxAndThrottling (ctx , "ip_prefix" , warnParam , "we didn't expect %s in IP Range params %v" , warnParam , params )
6880 }
6981 }
70- if isIpv6 , exists := params ["is_ipv6" ]; exists && isIpv6 .(bool ) {
71- logger .WarnWithCtxAndThrottling (ctx , "ip_prefix" , "is_ipv6" , "is_ipv6 is true in IP Range params %v, we don't support IPv6 yet" , params )
72- }
7382
7483 return nil
7584}
@@ -101,7 +110,12 @@ func (query *IpPrefix) AggregationType() model.AggregationType {
101110}
102111
103112func (query * IpPrefix ) TranslateSqlResponseToJson (rows []model.QueryResultRow ) model.JsonMap {
104- var netmask , keySuffix string
113+ var (
114+ ok bool
115+ key , netmask , keySuffix string
116+ originalKeyIpv4 uint32 // if is_ipv6 is false, Clickhouse will always return uint32 as the key
117+ originalKeyIpv6 big.Int // if is_ipv6 is true, Clickhouse will always return big.Int as the key
118+ )
105119 if ! query .isIpv6 {
106120 netmask = query .calcNetMask ()
107121 }
@@ -111,7 +125,6 @@ func (query *IpPrefix) TranslateSqlResponseToJson(rows []model.QueryResultRow) m
111125 buckets := make ([]model.JsonMap , 0 , len (rows ))
112126 for _ , row := range rows {
113127 var docCount any
114- var originalKey uint32
115128 if query .prefixLength == 0 {
116129 if len (row .Cols ) != 1 {
117130 logger .ErrorWithCtx (query .ctx ).Msgf (
@@ -125,17 +138,26 @@ func (query *IpPrefix) TranslateSqlResponseToJson(rows []model.QueryResultRow) m
125138 "unexpected number of columns in ip_prefix aggregation response, len: %d, row: %v" , len (row .Cols ), row )
126139 continue
127140 }
128- var ok bool
129- originalKey , ok = row .Cols [0 ].Value .(uint32 )
141+
142+ docCount = row .Cols [1 ].Value
143+ if query .isIpv6 {
144+ originalKeyIpv6 , ok = row .Cols [0 ].Value .(big.Int )
145+ } else {
146+ originalKeyIpv4 , ok = row .Cols [0 ].Value .(uint32 )
147+ }
130148 if ! ok {
131149 logger .ErrorWithCtx (query .ctx ).Msgf ("unexpected type of key in ip_prefix aggregation response, got %T" , row .Cols [0 ])
132150 continue
133151 }
134- docCount = row .Cols [1 ].Value
135152 }
136153
154+ if query .isIpv6 {
155+ key = query .calcKeyIPv6 (originalKeyIpv6 ) + keySuffix
156+ } else {
157+ key = query .calcKeyIPv4 (originalKeyIpv4 ) + keySuffix
158+ }
137159 bucket := model.JsonMap {
138- "key" : query . calcKey ( originalKey ) + keySuffix ,
160+ "key" : key ,
139161 "doc_count" : docCount ,
140162 "prefix_length" : query .prefixLength ,
141163 "is_ipv6" : query .isIpv6 ,
@@ -175,18 +197,27 @@ func (query *IpPrefix) SqlSelectQuery() model.Expr {
175197 if query .prefixLength == 0 {
176198 return nil
177199 }
178- return model .NewFunction ("intDiv" , query .field , model .NewLiteral (query .divideByToGroupBy ()))
200+ if query .isIpv6 {
201+ return model .NewFunction ("intDiv" , query .field , model .NewLiteral (query .divideByToGroupByIpv6 ().String ()))
202+ } else {
203+ return model .NewFunction ("intDiv" , query .field , model .NewLiteral (query .divideByToGroupByIpv4 ()))
204+ }
179205}
180206
181- func (query * IpPrefix ) divideByToGroupBy () uint32 {
207+ func (query * IpPrefix ) divideByToGroupByIpv4 () uint32 {
182208 return 1 << (32 - query .prefixLength )
183209}
184210
185- func (query * IpPrefix ) calcKey (originalKey uint32 ) string {
211+ // divideByToGroupByIpv6 returns 2^(128-prefixLength)
212+ func (query * IpPrefix ) divideByToGroupByIpv6 () * big.Int {
213+ return big .NewInt (1 ).Lsh (big .NewInt (1 ), uint (128 - query .prefixLength ))
214+ }
215+
216+ func (query * IpPrefix ) calcKeyIPv4 (originalKey uint32 ) string {
186217 if query .prefixLength == 0 {
187218 return "0.0.0.0"
188219 }
189- ipAsInt := originalKey * query .divideByToGroupBy ()
220+ ipAsInt := originalKey * query .divideByToGroupByIpv4 ()
190221 part4 := ipAsInt % 256
191222 ipAsInt /= 256
192223 part3 := ipAsInt % 256
@@ -197,10 +228,17 @@ func (query *IpPrefix) calcKey(originalKey uint32) string {
197228 return fmt .Sprintf ("%d.%d.%d.%d" , part1 , part2 , part3 , part4 )
198229}
199230
231+ func (query * IpPrefix ) calcKeyIPv6 (originalKey big.Int ) string {
232+ // ipAsInt = originalKey * 2^(128-prefixLength)
233+ ipAsInt := originalKey .Mul (& originalKey , big .NewInt (1 ).Lsh (big .NewInt (1 ), uint (128 - query .prefixLength )))
234+ return util .BigIntToIpv6 (* ipAsInt )
235+ }
236+
237+ // calcNetMask is only called for ipv4, so 1<<(query.prefixLength-1) will never overflow
200238func (query * IpPrefix ) calcNetMask () string {
201239 if query .prefixLength == 0 {
202240 return "0.0.0.0"
203241 }
204242 biggestPossibleKey := uint32 (1 << query .prefixLength - 1 )
205- return query .calcKey (biggestPossibleKey ) // netmask is the same as ip of biggest possible key
243+ return query .calcKeyIPv4 (biggestPossibleKey ) // netmask is the same as ip of biggest possible key
206244}
0 commit comments