Skip to content
This repository was archived by the owner on Nov 7, 2025. It is now read-only.

Commit d085fa5

Browse files
Fix match query for integer fields (#1037)
The `match` query is in most cases a full-text search, e.g.: ```json { "query": { "match": { "message": "this is a test" } } } ``` However, it can also be used to find an exact value against a integer field: ```json { "query": { "match": { "products_count": "5" } } } ``` In such case Quesma would generate an invalid SQL, trying to do an `ILIKE` against an `Int64` column: ``` Illegal type Int64 of argument of function ilike ``` Fix the issue by introducing an internal __quesma_match operator and a transformation which transforms it either to `ILIKE` or `=`. Fixes #1018 --------- Co-authored-by: Krzysztof Kiewicz <[email protected]>
1 parent c2f696f commit d085fa5

File tree

9 files changed

+173
-23
lines changed

9 files changed

+173
-23
lines changed

quesma/model/constants.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,5 @@ const (
88
TimestampFieldName = "@timestamp"
99

1010
DateHourFunction = "__quesma_date_hour"
11+
MatchOperator = "__quesma_match"
1112
)

quesma/model/expr_string_renderer.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ func (v *renderer) VisitInfix(e InfixExpr) interface{} {
8282
}
8383
// This might look like a strange heuristics to but is aligned with the way we are currently generating the statement
8484
// I think in the future every infix op should be in braces.
85-
if e.Op == "AND" || e.Op == "OR" {
85+
if strings.HasPrefix(e.Op, "_") || e.Op == "AND" || e.Op == "OR" {
8686
return fmt.Sprintf("(%v %v %v)", lhs, e.Op, rhs)
8787
} else if strings.Contains(e.Op, "LIKE") || e.Op == "IS" || e.Op == "IN" || e.Op == "REGEXP" {
8888
return fmt.Sprintf("%v %v %v", lhs, e.Op, rhs)

quesma/model/highlighter.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,13 +57,13 @@ func (h *Highlighter) SetTokensToHighlight(selectCmd SelectCommand) {
5757

5858
visitor.OverrideVisitInfix = func(b *BaseExprVisitor, e InfixExpr) interface{} {
5959
switch e.Op {
60-
case "iLIKE", "LIKE", "IN", "=":
60+
case "iLIKE", "LIKE", "IN", "=", MatchOperator:
6161
lhs, isColumnRef := e.Left.(ColumnRef)
6262
rhs, isLiteral := e.Right.(LiteralExpr)
6363
if isLiteral && isColumnRef { // we only highlight in this case
6464
switch literalAsString := rhs.Value.(type) {
6565
case string:
66-
literalAsString = strings.TrimPrefix(literalAsString, "'%")
66+
literalAsString = strings.TrimPrefix(literalAsString, "'")
6767
literalAsString = strings.TrimPrefix(literalAsString, "%")
6868
literalAsString = strings.TrimSuffix(literalAsString, "'")
6969
literalAsString = strings.TrimSuffix(literalAsString, "%")

quesma/queryparser/query_parser.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -588,7 +588,7 @@ func (cw *ClickhouseQueryTranslator) parseMatch(queryMap QueryMap, matchPhrase b
588588
computedIdMatchingQuery := cw.parseIds(QueryMap{"values": []interface{}{subQuery}})
589589
statements = append(statements, computedIdMatchingQuery.WhereClause)
590590
} else {
591-
simpleStat := model.NewInfixExpr(model.NewColumnRef(fieldName), "iLIKE", model.NewLiteral("'%"+subQuery+"%'"))
591+
simpleStat := model.NewInfixExpr(model.NewColumnRef(fieldName), model.MatchOperator, model.NewLiteral("'"+subQuery+"'"))
592592
statements = append(statements, simpleStat)
593593
}
594594
}

quesma/quesma/schema_transformer.go

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -751,6 +751,7 @@ func (s *SchemaCheckPass) Transform(queries []*model.Query) ([]*model.Query, err
751751
{TransformationName: "GeoTransformation", Transformation: s.applyGeoTransformations},
752752
{TransformationName: "ArrayTransformation", Transformation: s.applyArrayTransformations},
753753
{TransformationName: "MapTransformation", Transformation: s.applyMapTransformations},
754+
{TransformationName: "MatchOperatorTransformation", Transformation: s.applyMatchOperator},
754755

755756
// Section 4: compensations and checks
756757
{TransformationName: "BooleanLiteralTransformation", Transformation: s.applyBooleanLiteralLowering},
@@ -789,3 +790,47 @@ func (s *SchemaCheckPass) Transform(queries []*model.Query) ([]*model.Query, err
789790
}
790791
return queries, nil
791792
}
793+
794+
func (s *SchemaCheckPass) applyMatchOperator(indexSchema schema.Schema, query *model.Query) (*model.Query, error) {
795+
796+
visitor := model.NewBaseVisitor()
797+
798+
var err error
799+
800+
visitor.OverrideVisitInfix = func(b *model.BaseExprVisitor, e model.InfixExpr) interface{} {
801+
lhs, ok := e.Left.(model.ColumnRef)
802+
rhs, ok2 := e.Right.(model.LiteralExpr)
803+
804+
if ok && ok2 && e.Op == model.MatchOperator {
805+
field, found := indexSchema.ResolveFieldByInternalName(lhs.ColumnName)
806+
if !found {
807+
logger.Error().Msgf("Field %s not found in schema for table %s, should never happen here", lhs.ColumnName, query.TableName)
808+
}
809+
810+
rhsValue := rhs.Value.(string)
811+
rhsValue = strings.TrimPrefix(rhsValue, "'")
812+
rhsValue = strings.TrimSuffix(rhsValue, "'")
813+
814+
switch field.Type.String() {
815+
case schema.QuesmaTypeInteger.Name, schema.QuesmaTypeLong.Name, schema.QuesmaTypeUnsignedLong.Name, schema.QuesmaTypeBoolean.Name:
816+
return model.NewInfixExpr(lhs, "=", model.NewLiteral(rhsValue))
817+
default:
818+
return model.NewInfixExpr(lhs, "iLIKE", model.NewLiteral("'%"+rhsValue+"%'"))
819+
}
820+
}
821+
822+
return model.NewInfixExpr(e.Left.Accept(b).(model.Expr), e.Op, e.Right.Accept(b).(model.Expr))
823+
}
824+
825+
expr := query.SelectCommand.Accept(visitor)
826+
827+
if err != nil {
828+
return nil, err
829+
}
830+
831+
if _, ok := expr.(*model.SelectCommand); ok {
832+
query.SelectCommand = *expr.(*model.SelectCommand)
833+
}
834+
return query, nil
835+
836+
}

quesma/quesma/schema_transformer_test.go

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -979,3 +979,107 @@ func TestFullTextFields(t *testing.T) {
979979
})
980980
}
981981
}
982+
983+
func Test_applyMatchOperator(t *testing.T) {
984+
schemaTable := schema.Table{
985+
Columns: map[string]schema.Column{
986+
"message": {Name: "message", Type: "String"},
987+
"count": {Name: "count", Type: "Int64"},
988+
},
989+
}
990+
991+
tests := []struct {
992+
name string
993+
query *model.Query
994+
expected *model.Query
995+
}{
996+
{
997+
name: "match operator transformation for String (ILIKE)",
998+
query: &model.Query{
999+
TableName: "test",
1000+
SelectCommand: model.SelectCommand{
1001+
FromClause: model.NewTableRef("test"),
1002+
Columns: []model.Expr{model.NewColumnRef("message")},
1003+
WhereClause: model.NewInfixExpr(
1004+
model.NewColumnRef("message"),
1005+
model.MatchOperator,
1006+
model.NewLiteral("'needle'"),
1007+
),
1008+
},
1009+
},
1010+
expected: &model.Query{
1011+
TableName: "test",
1012+
SelectCommand: model.SelectCommand{
1013+
FromClause: model.NewTableRef("test"),
1014+
Columns: []model.Expr{model.NewColumnRef("message")},
1015+
WhereClause: model.NewInfixExpr(
1016+
model.NewColumnRef("message"),
1017+
"iLIKE",
1018+
model.NewLiteral("'%needle%'"),
1019+
),
1020+
},
1021+
},
1022+
},
1023+
{
1024+
name: "match operator transformation for Int64 (=)",
1025+
query: &model.Query{
1026+
TableName: "test",
1027+
SelectCommand: model.SelectCommand{
1028+
FromClause: model.NewTableRef("test"),
1029+
Columns: []model.Expr{model.NewColumnRef("message")},
1030+
WhereClause: model.NewInfixExpr(
1031+
model.NewColumnRef("count"),
1032+
model.MatchOperator,
1033+
model.NewLiteral("'123'"),
1034+
),
1035+
},
1036+
},
1037+
expected: &model.Query{
1038+
TableName: "test",
1039+
SelectCommand: model.SelectCommand{
1040+
FromClause: model.NewTableRef("test"),
1041+
Columns: []model.Expr{model.NewColumnRef("message")},
1042+
WhereClause: model.NewInfixExpr(
1043+
model.NewColumnRef("count"),
1044+
"=",
1045+
model.NewLiteral("123"),
1046+
),
1047+
},
1048+
},
1049+
},
1050+
}
1051+
1052+
for _, tt := range tests {
1053+
t.Run(tt.name, func(t *testing.T) {
1054+
tableDiscovery :=
1055+
fixedTableProvider{tables: map[string]schema.Table{
1056+
"test": schemaTable,
1057+
}}
1058+
1059+
indexConfig := map[string]config.IndexConfiguration{
1060+
"test": {
1061+
Name: "test",
1062+
},
1063+
}
1064+
1065+
cfg := config.QuesmaConfiguration{
1066+
IndexConfig: indexConfig,
1067+
}
1068+
1069+
s := schema.NewSchemaRegistry(tableDiscovery, &cfg, clickhouse.SchemaTypeAdapter{})
1070+
transform := &SchemaCheckPass{cfg: &cfg}
1071+
1072+
indexSchema, ok := s.FindSchema("test")
1073+
if !ok {
1074+
t.Fatal("schema not found")
1075+
}
1076+
1077+
actual, err := transform.applyMatchOperator(indexSchema, tt.query)
1078+
if err != nil {
1079+
t.Fatal(err)
1080+
}
1081+
1082+
assert.Equal(t, model.AsString(tt.expected.SelectCommand), model.AsString(actual.SelectCommand))
1083+
})
1084+
}
1085+
}

quesma/testdata/aggregation_requests.go

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1844,7 +1844,7 @@ var AggregationTests = []AggregationTestCase{
18441844
"@timestamp", 'Europe/Warsaw'))*1000) / 10800000) AS "aggr__0__1__key_0",
18451845
count(*) AS "aggr__0__1__count"
18461846
FROM __quesma_table_name
1847-
WHERE ("host.name" iLIKE '%prometheus%' AND ("@timestamp">=
1847+
WHERE (("host.name" __quesma_match 'prometheus') AND ("@timestamp">=
18481848
fromUnixTimestamp64Milli(1706891809940) AND "@timestamp"<=
18491849
fromUnixTimestamp64Milli(1707496609940)))
18501850
GROUP BY "severity" AS "aggr__0__key_0",
@@ -2848,8 +2848,8 @@ var AggregationTests = []AggregationTestCase{
28482848
"@timestamp") AS "metric__earliest_timestamp_col_0", maxOrNull("@timestamp")
28492849
AS "metric__latest_timestamp_col_0"
28502850
FROM ` + TableName + `
2851-
WHERE ((` + fullTextFieldName + ` iLIKE '%posei%' AND "message" iLIKE '%User logged out%') AND
2852-
"host.name" iLIKE '%poseidon%')`,
2851+
WHERE ((` + fullTextFieldName + ` iLIKE '%posei%' AND ("message" __quesma_match 'User logged out')) AND
2852+
("host.name" __quesma_match 'poseidon'))`,
28532853
},
28542854
{ // [15]
28552855
TestName: "date_histogram: regression test",
@@ -6195,7 +6195,7 @@ var AggregationTests = []AggregationTestCase{
61956195
"aggr__0__1__parent_count", "message" AS "aggr__0__1__key_0",
61966196
count(*) AS "aggr__0__1__count"
61976197
FROM __quesma_table_name
6198-
WHERE ("message" IS NOT NULL AND NOT ("message" iLIKE '%US%'))
6198+
WHERE ("message" IS NOT NULL AND NOT (("message" __quesma_match 'US')))
61996199
GROUP BY "host.name" AS "aggr__0__key_0", "message" AS "aggr__0__1__key_0"))
62006200
WHERE ("aggr__0__order_1_rank"<=11 AND "aggr__0__1__order_1_rank"<=4)
62016201
ORDER BY "aggr__0__order_1_rank" ASC, "aggr__0__1__order_1_rank" ASC`,
@@ -6311,7 +6311,7 @@ var AggregationTests = []AggregationTestCase{
63116311
"aggr__0__1__2__parent_count", "message" AS "aggr__0__1__2__key_0",
63126312
count(*) AS "aggr__0__1__2__count"
63136313
FROM __quesma_table_name
6314-
WHERE ("message" IS NOT NULL AND NOT ("message" iLIKE '%US%'))
6314+
WHERE ("message" IS NOT NULL AND NOT (("message" __quesma_match 'US')))
63156315
GROUP BY "host.name" AS "aggr__0__key_0", "message" AS "aggr__0__1__key_0",
63166316
"message" AS "aggr__0__1__2__key_0"))
63176317
WHERE (("aggr__0__order_1_rank"<=11 AND "aggr__0__1__order_1_rank"<=4) AND
@@ -6403,7 +6403,7 @@ var AggregationTests = []AggregationTestCase{
64036403
sum(count(*)) OVER (PARTITION BY "aggr__0__key_0") AS "aggr__0__count",
64046404
"FlightDelayMin" AS "aggr__0__1__key_0", count(*) AS "aggr__0__1__count"
64056405
FROM ` + TableName + `
6406-
WHERE ("message" IS NOT NULL AND NOT ("message" iLIKE '%US%'))
6406+
WHERE ("message" IS NOT NULL AND NOT (("message" __quesma_match 'US')))
64076407
GROUP BY "host.name" AS "aggr__0__key_0",
64086408
"FlightDelayMin" AS "aggr__0__1__key_0"))
64096409
WHERE "aggr__0__order_1_rank"<=9
@@ -6513,7 +6513,7 @@ var AggregationTests = []AggregationTestCase{
65136513
sum(count(*)) OVER (PARTITION BY "aggr__0__key_0") AS "aggr__0__count",
65146514
"FlightDelayMin" AS "aggr__0__1__key_0", count(*) AS "aggr__0__1__count"
65156515
FROM ` + TableName + `
6516-
WHERE ("message" IS NOT NULL AND NOT ("message" iLIKE '%US%'))
6516+
WHERE ("message" IS NOT NULL AND NOT (("message" __quesma_match 'US')))
65176517
GROUP BY "host.name" AS "aggr__0__key_0",
65186518
"FlightDelayMin" AS "aggr__0__1__key_0"))
65196519
WHERE "aggr__0__order_1_rank"<=11
@@ -6610,7 +6610,7 @@ var AggregationTests = []AggregationTestCase{
66106610
sum(count(*)) OVER (PARTITION BY "aggr__0__key_0") AS "aggr__0__count",
66116611
"FlightDelayMin" AS "aggr__0__1__key_0", count(*) AS "aggr__0__1__count"
66126612
FROM __quesma_table_name
6613-
WHERE ("message" IS NOT NULL AND NOT ("message" iLIKE '%US%'))
6613+
WHERE ("message" IS NOT NULL AND NOT (("message" __quesma_match 'US')))
66146614
GROUP BY "host.name" AS "aggr__0__key_0",
66156615
"FlightDelayMin" AS "aggr__0__1__key_0"))
66166616
WHERE "aggr__0__order_1_rank"<=11
@@ -6826,7 +6826,7 @@ var AggregationTests = []AggregationTestCase{
68266826
count(*) AS "aggr__2__count",
68276827
sumOrNull("total") AS "metric__2__1_col_0"
68286828
FROM ` + TableName + `
6829-
WHERE NOT ((("abc">=0 AND "abc"<600) OR "type" iLIKE '%def%'))
6829+
WHERE NOT ((("abc">=0 AND "abc"<600) OR ("type" __quesma_match 'def')))
68306830
GROUP BY "name" AS "aggr__2__key_0"
68316831
ORDER BY "metric__2__1_col_0" DESC, "aggr__2__key_0" ASC
68326832
LIMIT 11`,

quesma/testdata/clients/clover.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -765,7 +765,7 @@ var CloverTests = []testdata.AggregationTestCase{
765765
"field" AS "aggr__other-filter__3__key_0",
766766
count(*) AS "aggr__other-filter__3__count"
767767
FROM __quesma_table_name
768-
WHERE ("a" iLIKE '%b%' AND "c" iLIKE '%d%')
768+
WHERE (("a" __quesma_match 'b') AND ("c" __quesma_match 'd'))
769769
GROUP BY "field" AS "aggr__other-filter__3__key_0"
770770
ORDER BY "aggr__other-filter__3__count" DESC,
771771
"aggr__other-filter__3__key_0" ASC

quesma/testdata/requests.go

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1131,7 +1131,7 @@ var TestsSearch = []SearchTestCase{
11311131
},
11321132
"track_total_hits": false
11331133
}`,
1134-
[]string{`"host_name" iLIKE '%prometheus%'`},
1134+
[]string{`("host_name" __quesma_match 'prometheus')`},
11351135
model.ListAllFields,
11361136
[]string{`SELECT "message" FROM ` + TableName + ` WHERE "host_name" iLIKE '%prometheus%' LIMIT 10`},
11371137
[]string{},
@@ -1148,7 +1148,7 @@ var TestsSearch = []SearchTestCase{
11481148
"size": 100,
11491149
"track_total_hits": false
11501150
}`,
1151-
[]string{`((("message" iLIKE '%this%' OR "message" iLIKE '%is%') OR "message" iLIKE '%a%') OR "message" iLIKE '%test%')`},
1151+
[]string{`(((("message" __quesma_match 'this') OR ("message" __quesma_match 'is')) OR ("message" __quesma_match 'a')) OR ("message" __quesma_match 'test'))`},
11521152
model.ListAllFields,
11531153
[]string{
11541154
`SELECT "message" FROM ` + TableName + ` WHERE ((("message" iLIKE '%this%' OR "message" iLIKE '%is%') ` +
@@ -1405,7 +1405,7 @@ var TestsSearch = []SearchTestCase{
14051405
},
14061406
"track_total_hits": false
14071407
}`,
1408-
[]string{`"message" iLIKE '%this is a test%'`},
1408+
[]string{`("message" __quesma_match 'this is a test')`},
14091409
model.ListAllFields,
14101410
[]string{`SELECT "message" FROM ` + TableName + ` WHERE "message" iLIKE '%this is a test%'`},
14111411
[]string{},
@@ -1423,7 +1423,7 @@ var TestsSearch = []SearchTestCase{
14231423
},
14241424
"track_total_hits": false
14251425
}`,
1426-
[]string{`"message" iLIKE '%this is a test%'`},
1426+
[]string{`("message" __quesma_match 'this is a test')`},
14271427
model.ListAllFields,
14281428
[]string{`SELECT "message" FROM ` + TableName + ` WHERE "message" iLIKE '%this is a test%'`},
14291429
[]string{},
@@ -1687,9 +1687,9 @@ var TestsSearch = []SearchTestCase{
16871687
"track_total_hits": true
16881688
}`,
16891689
[]string{
1690-
`(("message" iLIKE '%User logged out%' AND "host.name" iLIKE '%poseidon%') ` +
1690+
`((("message" __quesma_match 'User logged out') AND ("host.name" __quesma_match 'poseidon')) ` +
16911691
`AND ("@timestamp">=fromUnixTimestamp64Milli(1706542596491) AND "@timestamp"<=fromUnixTimestamp64Milli(1706551896491)))`,
1692-
`((("message" iLIKE '%User logged out%' AND "host.name" iLIKE '%poseidon%') ` +
1692+
`(((("message" __quesma_match 'User logged out') AND ("host.name" __quesma_match 'poseidon')) ` +
16931693
`AND ("@timestamp">=fromUnixTimestamp64Milli(1706542596491) AND "@timestamp"<=fromUnixTimestamp64Milli(1706551896491))) ` +
16941694
`AND "stream.namespace" IS NOT NULL)`,
16951695
},
@@ -1847,10 +1847,10 @@ var TestsSearch = []SearchTestCase{
18471847
"timeout": "1000ms"
18481848
}`,
18491849
[]string{
1850-
`((("message" iLIKE '%User logged out%' AND "host.name" iLIKE '%poseidon%') ` +
1850+
`(((("message" __quesma_match 'User logged out') AND ("host.name" __quesma_match 'poseidon')) ` +
18511851
`AND ("@timestamp">=fromUnixTimestamp64Milli(1706542596491) AND "@timestamp"<=fromUnixTimestamp64Milli(1706551896491))) ` +
18521852
`AND "namespace" IS NOT NULL)`,
1853-
`(("message" iLIKE '%User logged out%' AND "host.name" iLIKE '%poseidon%') ` +
1853+
`((("message" __quesma_match 'User logged out') AND ("host.name" __quesma_match 'poseidon')) ` +
18541854
`AND ("@timestamp">=fromUnixTimestamp64Milli(1706542596491) AND "@timestamp"<=fromUnixTimestamp64Milli(1706551896491)))`,
18551855
},
18561856
model.Normal,
@@ -2085,7 +2085,7 @@ var TestsSearch = []SearchTestCase{
20852085
"track_total_hits": false,
20862086
"size": 12
20872087
}`,
2088-
[]string{`("message" iLIKE '%User logged out%' AND "message" iLIKE '%User logged out%')`},
2088+
[]string{`(("message" __quesma_match 'User logged out') AND ("message" __quesma_match 'User logged out'))`},
20892089
model.ListAllFields,
20902090
[]string{
20912091
`SELECT "message" ` +

0 commit comments

Comments
 (0)