Skip to content
This repository was archived by the owner on Nov 7, 2025. It is now read-only.

Commit cea1f7a

Browse files
authored
Support for IPv6 in IP Prefix aggr (#1112)
We had support for IPv4, but Jacek wanted for a) IPv6 and b) IPs stored in Clickhouse as ints. This PR adds support for IPv6, and I think having IPs as ints in Clickhouse will also work, because whenever we query for IPs, we use `intDiv(ip_field, some_number)`. So we already make use of the fact that Clickhouse treats all IPs as ints (`IPv4` type: `uint32`, `IPv6` type: `uint128`) Didn't test it yet, though.
1 parent fe52aa6 commit cea1f7a

File tree

6 files changed

+501
-29
lines changed

6 files changed

+501
-29
lines changed

quesma/clickhouse/util_test.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,3 +59,20 @@ func TestParseTypeFromShowColumnsTuple_2(t *testing.T) {
5959
assert.Equal(t, NewBaseType("String"), mvt.Cols[1].Type)
6060
assert.Equal(t, "c", mvt.Cols[1].Name)
6161
}
62+
63+
// TestWhatDriverWillReturn is a helper test for manual testing of the Clickhouse driver
64+
// E.g. I wasn't sure what type will be returned for intDiv(ipv6, 1) in Clickhouse, so this test gave me the answer
65+
func TestWhatDriverWillReturn(t *testing.T) {
66+
/*
67+
options := clickhouse.Options{Addr: []string{"localhost:9000"}}
68+
db := clickhouse.OpenDB(&options)
69+
defer db.Close()
70+
71+
rows, _ := db.Query("SELECT intDiv(ipv6, 1) from i LIMIT 10")
72+
var q big.Int // replacing big.Int with any might be useful
73+
for rows.Next() {
74+
rows.Scan(&q)
75+
fmt.Printf("%v %T\n", q, q)
76+
}
77+
*/
78+
}

quesma/model/bucket_aggregations/ip_prefix.go

Lines changed: 58 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -5,16 +5,19 @@ package bucket_aggregations
55
import (
66
"context"
77
"fmt"
8+
"math/big"
89
"quesma/logger"
910
"quesma/model"
11+
"quesma/util"
1012
"reflect"
1113
)
1214

13-
// Current limitation: we expect Clickhouse field to be IPv4 (and not IPv6)
14-
15-
// Clickhouse table to test SQLs:
16-
// CREATE TABLE __quesma_table_name (clientip IPv4) ENGINE=Log
17-
// INSERT INTO __quesma_table_name VALUES ('0.0.0.0'), ('5.5.5.5'), ('90.180.90.180'), ('128.200.0.8'), ('192.168.1.67'), ('222.168.22.67')
15+
// Testing helpers:
16+
// * (ipv4) Clickhouse table to test SQLs:
17+
// CREATE TABLE __quesma_table_name (clientip IPv4) ENGINE=Log
18+
// INSERT INTO __quesma_table_name VALUES ('0.0.0.0'), ('5.5.5.5'), ('90.180.90.180'), ('128.200.0.8'), ('192.168.1.67'), ('222.168.22.67')
19+
// * (ipv6) If ip field in Clickhouse is string, not IPv6, just change "ip_fieldname" to "ip_fieldname"::IPv6, to test SQLs from tests
20+
// (careful with that, most of the time it works, but sometimes some differences arise, I guess from big/little endian differences)
1821

1922
// TODO make part of QueryType interface and implement for all aggregations
2023
// TODO add bad requests to tests
@@ -30,7 +33,7 @@ func CheckParamsIpPrefix(ctx context.Context, paramsRaw any) error {
3033
"keyed": "bool",
3134
"min_doc_count": "int",
3235
}
33-
logIfYouSeeThemParams := []string{"min_doc_count"} // we don't use min_doc_count yet. We'll log if "is_ipv6" == true, also.
36+
logIfYouSeeThemParams := []string{"min_doc_count"} // we don't use min_doc_count yet.
3437

3538
params, ok := paramsRaw.(model.JsonMap)
3639
if !ok {
@@ -47,6 +50,15 @@ func CheckParamsIpPrefix(ctx context.Context, paramsRaw any) error {
4750
return fmt.Errorf("required parameter %s is not of type %s, but %T", paramName, paramType, paramVal)
4851
}
4952
}
53+
// prefixLength must be [0, 32] for ipv4, [0, 128] for ipv6
54+
prefixLength := params["prefix_length"].(float64) // will never panic because of checks above
55+
upperBound := 32.0
56+
if ipv6, exists := params["is_ipv6"]; exists && ipv6.(bool) {
57+
upperBound = 128.0
58+
}
59+
if util.IsSmaller(prefixLength, 0) || util.IsSmaller(upperBound, prefixLength) {
60+
return fmt.Errorf("prefix_length must be in range [0, %d], but got %f", int(upperBound), prefixLength)
61+
}
5062

5163
// check if only required/optional are present
5264
for paramName := range params {
@@ -67,9 +79,6 @@ func CheckParamsIpPrefix(ctx context.Context, paramsRaw any) error {
6779
logger.WarnWithCtxAndThrottling(ctx, "ip_prefix", warnParam, "we didn't expect %s in IP Range params %v", warnParam, params)
6880
}
6981
}
70-
if isIpv6, exists := params["is_ipv6"]; exists && isIpv6.(bool) {
71-
logger.WarnWithCtxAndThrottling(ctx, "ip_prefix", "is_ipv6", "is_ipv6 is true in IP Range params %v, we don't support IPv6 yet", params)
72-
}
7382

7483
return nil
7584
}
@@ -101,7 +110,12 @@ func (query *IpPrefix) AggregationType() model.AggregationType {
101110
}
102111

103112
func (query *IpPrefix) TranslateSqlResponseToJson(rows []model.QueryResultRow) model.JsonMap {
104-
var netmask, keySuffix string
113+
var (
114+
ok bool
115+
key, netmask, keySuffix string
116+
originalKeyIpv4 uint32 // if is_ipv6 is false, Clickhouse will always return uint32 as the key
117+
originalKeyIpv6 big.Int // if is_ipv6 is true, Clickhouse will always return big.Int as the key
118+
)
105119
if !query.isIpv6 {
106120
netmask = query.calcNetMask()
107121
}
@@ -111,7 +125,6 @@ func (query *IpPrefix) TranslateSqlResponseToJson(rows []model.QueryResultRow) m
111125
buckets := make([]model.JsonMap, 0, len(rows))
112126
for _, row := range rows {
113127
var docCount any
114-
var originalKey uint32
115128
if query.prefixLength == 0 {
116129
if len(row.Cols) != 1 {
117130
logger.ErrorWithCtx(query.ctx).Msgf(
@@ -125,17 +138,26 @@ func (query *IpPrefix) TranslateSqlResponseToJson(rows []model.QueryResultRow) m
125138
"unexpected number of columns in ip_prefix aggregation response, len: %d, row: %v", len(row.Cols), row)
126139
continue
127140
}
128-
var ok bool
129-
originalKey, ok = row.Cols[0].Value.(uint32)
141+
142+
docCount = row.Cols[1].Value
143+
if query.isIpv6 {
144+
originalKeyIpv6, ok = row.Cols[0].Value.(big.Int)
145+
} else {
146+
originalKeyIpv4, ok = row.Cols[0].Value.(uint32)
147+
}
130148
if !ok {
131149
logger.ErrorWithCtx(query.ctx).Msgf("unexpected type of key in ip_prefix aggregation response, got %T", row.Cols[0])
132150
continue
133151
}
134-
docCount = row.Cols[1].Value
135152
}
136153

154+
if query.isIpv6 {
155+
key = query.calcKeyIPv6(originalKeyIpv6) + keySuffix
156+
} else {
157+
key = query.calcKeyIPv4(originalKeyIpv4) + keySuffix
158+
}
137159
bucket := model.JsonMap{
138-
"key": query.calcKey(originalKey) + keySuffix,
160+
"key": key,
139161
"doc_count": docCount,
140162
"prefix_length": query.prefixLength,
141163
"is_ipv6": query.isIpv6,
@@ -175,18 +197,27 @@ func (query *IpPrefix) SqlSelectQuery() model.Expr {
175197
if query.prefixLength == 0 {
176198
return nil
177199
}
178-
return model.NewFunction("intDiv", query.field, model.NewLiteral(query.divideByToGroupBy()))
200+
if query.isIpv6 {
201+
return model.NewFunction("intDiv", query.field, model.NewLiteral(query.divideByToGroupByIpv6().String()))
202+
} else {
203+
return model.NewFunction("intDiv", query.field, model.NewLiteral(query.divideByToGroupByIpv4()))
204+
}
179205
}
180206

181-
func (query *IpPrefix) divideByToGroupBy() uint32 {
207+
func (query *IpPrefix) divideByToGroupByIpv4() uint32 {
182208
return 1 << (32 - query.prefixLength)
183209
}
184210

185-
func (query *IpPrefix) calcKey(originalKey uint32) string {
211+
// divideByToGroupByIpv6 returns 2^(128-prefixLength)
212+
func (query *IpPrefix) divideByToGroupByIpv6() *big.Int {
213+
return big.NewInt(1).Lsh(big.NewInt(1), uint(128-query.prefixLength))
214+
}
215+
216+
func (query *IpPrefix) calcKeyIPv4(originalKey uint32) string {
186217
if query.prefixLength == 0 {
187218
return "0.0.0.0"
188219
}
189-
ipAsInt := originalKey * query.divideByToGroupBy()
220+
ipAsInt := originalKey * query.divideByToGroupByIpv4()
190221
part4 := ipAsInt % 256
191222
ipAsInt /= 256
192223
part3 := ipAsInt % 256
@@ -197,10 +228,17 @@ func (query *IpPrefix) calcKey(originalKey uint32) string {
197228
return fmt.Sprintf("%d.%d.%d.%d", part1, part2, part3, part4)
198229
}
199230

231+
func (query *IpPrefix) calcKeyIPv6(originalKey big.Int) string {
232+
// ipAsInt = originalKey * 2^(128-prefixLength)
233+
ipAsInt := originalKey.Mul(&originalKey, big.NewInt(1).Lsh(big.NewInt(1), uint(128-query.prefixLength)))
234+
return util.BigIntToIpv6(*ipAsInt)
235+
}
236+
237+
// calcNetMask is only called for ipv4, so 1<<(query.prefixLength-1) will never overflow
200238
func (query *IpPrefix) calcNetMask() string {
201239
if query.prefixLength == 0 {
202240
return "0.0.0.0"
203241
}
204242
biggestPossibleKey := uint32(1<<query.prefixLength - 1)
205-
return query.calcKey(biggestPossibleKey) // netmask is the same as ip of biggest possible key
243+
return query.calcKeyIPv4(biggestPossibleKey) // netmask is the same as ip of biggest possible key
206244
}

quesma/queryparser/pancake_json_rendering.go

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ package queryparser
55
import (
66
"context"
77
"fmt"
8+
"math/big"
89
"quesma/logger"
910
"quesma/model"
1011
"quesma/model/bucket_aggregations"
@@ -105,7 +106,15 @@ func (p *pancakeJSONRenderer) splitBucketRows(bucket *pancakeModelBucketAggregat
105106
if strings.HasPrefix(cols.ColName, bucketKeyName) {
106107
for _, previousCols := range previousBucket.Cols {
107108
if cols.ColName == previousCols.ColName {
108-
if cols.Value != previousCols.Value {
109+
var isEqual bool
110+
switch val := cols.Value.(type) {
111+
case big.Int:
112+
prevVal := previousCols.Value.(big.Int)
113+
isEqual = val.Cmp(&prevVal) == 0
114+
default:
115+
isEqual = val == previousCols.Value
116+
}
117+
if !isEqual {
109118
isNewBucket = true
110119
}
111120
break

0 commit comments

Comments
 (0)