QuesmaOrg · mieciu · May 13, 2025 · May 12, 2025 · May 12, 2025 · May 12, 2025
@@ -1136,10 +1136,12 @@ func (s *SchemaCheckPass) applyMatchOperator(indexSchema schema.Schema, query *m
 			case schema.QuesmaTypeKeyword.Name:
 				return equal()
 			default:
-				// ILIKE '%%' has terrible performance, but semantically means "is not null", hence this transformation
-				if rhsValue == "%%" {
+				if rhsValue == "%%" { // ILIKE '%%' has terrible performance, but semantically means "is not null", hence this transformation
 					return model.NewInfixExpr(lhs, "IS", model.NewLiteral("NOT NULL"))
 				}
+				// we might investigate the potential performance gain of checking
+				// that if rhsValue doesn't contain '%' we could use '=' instead of 'ILIKE'
+				// *however* that'd require few tweaks in the parser
 				return ilike()
 			}
 		}

@@ -49,9 +49,6 @@ func (v termValue) toExpression(fieldName string) model.Expr {
 		if alreadyQuoted(v.term) {
 			termAsStringToClickhouse = termAsStringToClickhouse[1 : len(termAsStringToClickhouse)-1]
 		}
-		if !util.IsSurroundedWithPercents(termAsStringToClickhouse) {
-			termAsStringToClickhouse = util.SurroundWithPercents(termAsStringToClickhouse)
-		}
 		if !util.IsSingleQuoted(termAsStringToClickhouse) {
 			termAsStringToClickhouse = util.SingleQuote(termAsStringToClickhouse)
 		}

@@ -347,7 +347,7 @@ var CloverTests = []testdata.AggregationTestCase{
 		ExpectedPancakeSQL: `
 			SELECT count(*) AS "aggr__timeseries__count",
 			  countIf(true) AS "metric__timeseries__a2-denominator_col_0",
-			  countIf(NOT ("table.flower" __quesma_match '%clover%')) AS
+			  countIf(NOT ("table.flower" __quesma_match 'clover')) AS
 			  "metric__timeseries__a2-numerator_col_0"
 			FROM __quesma_table_name
 			WHERE ("@timestamp">=fromUnixTimestamp64Milli(1728640683723) AND "@timestamp"<=
@@ -1115,7 +1115,7 @@ var CloverTests = []testdata.AggregationTestCase{
 			  "aggr__q__time_buckets__key_0", count(*) AS "aggr__q__time_buckets__count",
 			  sumOrNull("count") AS "metric__q__time_buckets__sum(count)_col_0"
 			FROM __quesma_table_name
-			WHERE NOT ("str_field" __quesma_match '%CRASH%')
+			WHERE NOT ("str_field" __quesma_match 'CRASH')
 			GROUP BY toInt64((toUnixTimestamp64Milli("@timestamp")+timeZoneOffset(toTimezone
 			  ("@timestamp", 'Europe/Warsaw'))*1000) / 1800000) AS
 			  "aggr__q__time_buckets__key_0"

@@ -858,11 +858,11 @@ var KibanaSampleDataEcommerce = []AggregationTestCase{
 			SELECT toInt64((toUnixTimestamp64Milli("order_date")+timeZoneOffset(toTimezone(
 			  "order_date", 'Europe/Warsaw'))*1000) / 43200000) AS "aggr__0__key_0",
 			  count(*) AS "aggr__0__count",
-			  countIf(("products.product_name" __quesma_match '%%cocktail%' OR
-			  "__quesma_fulltext_field_name" __quesma_match '%dress%%')) AS
+			  countIf(("products.product_name" __quesma_match '%cocktail' OR
+			  "__quesma_fulltext_field_name" __quesma_match 'dress%')) AS
 			  "aggr__0__1-bucket__count",
-			  sumOrNullIf("taxful_total_price", ("products.product_name" __quesma_match '%%cocktail%'
-			  OR "__quesma_fulltext_field_name" __quesma_match '%dress%%')) AS
+			  sumOrNullIf("taxful_total_price", ("products.product_name" __quesma_match '%cocktail'
+			  OR "__quesma_fulltext_field_name" __quesma_match 'dress%')) AS
 			  "metric__0__1-bucket__1-metric_col_0"
 			FROM __quesma_table_name
 			WHERE ("order_date">=fromUnixTimestamp64Milli(1740234098238) AND "order_date"<=

@@ -800,8 +800,8 @@ var KibanaSampleDataFlights = []AggregationTestCase{
 			  count(*) AS "aggr__1__2__count"
 			FROM __quesma_table_name
 			WHERE (("timestamp">=fromUnixTimestamp64Milli(1740230608853) AND "timestamp"<=
-			  fromUnixTimestamp64Milli(1740835408853)) AND ("FlightDelay" __quesma_match '%true%' AND
-			  "Cancelled" __quesma_match '%true%'))
+			  fromUnixTimestamp64Milli(1740835408853)) AND ("FlightDelay" __quesma_match 'true' AND
+			  "Cancelled" __quesma_match 'true'))
 			GROUP BY toInt64((toUnixTimestamp64Milli("timestamp")+timeZoneOffset(toTimezone(
 			  "timestamp", 'Europe/Warsaw'))*1000) / 10800000) AS "aggr__1__2__key_0"
 			ORDER BY "aggr__1__2__key_0" ASC`,
@@ -813,8 +813,8 @@ var KibanaSampleDataFlights = []AggregationTestCase{
 					count(*) AS "aggr__1__2__count"
 				  FROM __quesma_table_name
 				  WHERE (("timestamp">=fromUnixTimestamp64Milli(1740230608853) AND "timestamp"<=
-					fromUnixTimestamp64Milli(1740835408853)) AND ("FlightDelay" __quesma_match '%true%'
-					AND "Cancelled" __quesma_match '%true%'))
+					fromUnixTimestamp64Milli(1740835408853)) AND ("FlightDelay" __quesma_match 'true'
+					AND "Cancelled" __quesma_match 'true'))
 				  GROUP BY toInt64((toUnixTimestamp64Milli("timestamp")+timeZoneOffset(
 					toTimezone("timestamp", 'Europe/Warsaw'))*1000) / 10800000) AS
 					"aggr__1__2__key_0"
@@ -832,8 +832,8 @@ var KibanaSampleDataFlights = []AggregationTestCase{
 					toInt64((toUnixTimestamp64Milli("timestamp")+timeZoneOffset(toTimezone(
 					"timestamp", 'Europe/Warsaw'))*1000) / 10800000))
 				  WHERE (("timestamp">=fromUnixTimestamp64Milli(1740230608853) AND "timestamp"<=
-					fromUnixTimestamp64Milli(1740835408853)) AND ("FlightDelay" __quesma_match '%true%'
-					AND "Cancelled" __quesma_match '%true%')))
+					fromUnixTimestamp64Milli(1740835408853)) AND ("FlightDelay" __quesma_match 'true'
+					AND "Cancelled" __quesma_match 'true')))
 				SELECT "aggr__1__count", "aggr__1__2__key_0", "aggr__1__2__count",
 				  "top_metrics__1__2__4_col_0", "top_metrics__1__2__4_col_1", "top_hits_rank"
 				FROM "quesma_top_hits_join"
@@ -847,8 +847,8 @@ var KibanaSampleDataFlights = []AggregationTestCase{
 					count(*) AS "aggr__1__2__count"
 				  FROM __quesma_table_name
 				  WHERE (("timestamp">=fromUnixTimestamp64Milli(1740230608853) AND "timestamp"<=
-					fromUnixTimestamp64Milli(1740835408853)) AND ("FlightDelay" __quesma_match '%true%'
-					AND "Cancelled" __quesma_match '%true%'))
+					fromUnixTimestamp64Milli(1740835408853)) AND ("FlightDelay" __quesma_match 'true'
+					AND "Cancelled" __quesma_match 'true'))
 				  GROUP BY toInt64((toUnixTimestamp64Milli("timestamp")+timeZoneOffset(
 					toTimezone("timestamp", 'Europe/Warsaw'))*1000) / 10800000) AS
 					"aggr__1__2__key_0"
@@ -866,8 +866,8 @@ var KibanaSampleDataFlights = []AggregationTestCase{
 					toInt64((toUnixTimestamp64Milli("timestamp")+timeZoneOffset(toTimezone(
 					"timestamp", 'Europe/Warsaw'))*1000) / 10800000))
 				  WHERE (("timestamp">=fromUnixTimestamp64Milli(1740230608853) AND "timestamp"<=
-					fromUnixTimestamp64Milli(1740835408853)) AND ("FlightDelay" __quesma_match '%true%'
-					AND "Cancelled" __quesma_match '%true%')))
+					fromUnixTimestamp64Milli(1740835408853)) AND ("FlightDelay" __quesma_match 'true'
+					AND "Cancelled" __quesma_match 'true')))
 				SELECT "aggr__1__count", "aggr__1__2__key_0", "aggr__1__2__count",
 				  "top_metrics__1__2__5_col_0", "top_metrics__1__2__5_col_1", "top_hits_rank"
 				FROM "quesma_top_hits_join"
@@ -881,8 +881,8 @@ var KibanaSampleDataFlights = []AggregationTestCase{
 					count(*) AS "aggr__1__2__count"
 				  FROM __quesma_table_name
 				  WHERE (("timestamp">=fromUnixTimestamp64Milli(1740230608853) AND "timestamp"<=
-					fromUnixTimestamp64Milli(1740835408853)) AND ("FlightDelay" __quesma_match '%true%'
-					AND "Cancelled" __quesma_match '%true%'))
+					fromUnixTimestamp64Milli(1740835408853)) AND ("FlightDelay" __quesma_match 'true'
+					AND "Cancelled" __quesma_match 'true'))
 				  GROUP BY toInt64((toUnixTimestamp64Milli("timestamp")+timeZoneOffset(
 					toTimezone("timestamp", 'Europe/Warsaw'))*1000) / 10800000) AS
 					"aggr__1__2__key_0"
@@ -900,8 +900,8 @@ var KibanaSampleDataFlights = []AggregationTestCase{
 					toInt64((toUnixTimestamp64Milli("timestamp")+timeZoneOffset(toTimezone(
 					"timestamp", 'Europe/Warsaw'))*1000) / 10800000))
 				  WHERE (("timestamp">=fromUnixTimestamp64Milli(1740230608853) AND "timestamp"<=
-					fromUnixTimestamp64Milli(1740835408853)) AND ("FlightDelay" __quesma_match '%true%'
-					AND "Cancelled" __quesma_match '%true%')))
+					fromUnixTimestamp64Milli(1740835408853)) AND ("FlightDelay" __quesma_match 'true'
+					AND "Cancelled" __quesma_match 'true')))
 				SELECT "aggr__1__count", "aggr__1__2__key_0", "aggr__1__2__count",
 				  "top_metrics__1__2__6_col_0", "top_metrics__1__2__6_col_1", "top_hits_rank"
 				FROM "quesma_top_hits_join"
@@ -915,8 +915,8 @@ var KibanaSampleDataFlights = []AggregationTestCase{
 					count(*) AS "aggr__1__2__count"
 				  FROM __quesma_table_name
 				  WHERE (("timestamp">=fromUnixTimestamp64Milli(1740230608853) AND "timestamp"<=
-					fromUnixTimestamp64Milli(1740835408853)) AND ("FlightDelay" __quesma_match '%true%'
-					AND "Cancelled" __quesma_match '%true%'))
+					fromUnixTimestamp64Milli(1740835408853)) AND ("FlightDelay" __quesma_match 'true'
+					AND "Cancelled" __quesma_match 'true'))
 				  GROUP BY toInt64((toUnixTimestamp64Milli("timestamp")+timeZoneOffset(
 					toTimezone("timestamp", 'Europe/Warsaw'))*1000) / 10800000) AS
 					"aggr__1__2__key_0"
@@ -934,8 +934,8 @@ var KibanaSampleDataFlights = []AggregationTestCase{
 					toInt64((toUnixTimestamp64Milli("timestamp")+timeZoneOffset(toTimezone(
 					"timestamp", 'Europe/Warsaw'))*1000) / 10800000))
 				  WHERE (("timestamp">=fromUnixTimestamp64Milli(1740230608853) AND "timestamp"<=
-					fromUnixTimestamp64Milli(1740835408853)) AND ("FlightDelay" __quesma_match '%true%'
-					AND "Cancelled" __quesma_match '%true%')))
+					fromUnixTimestamp64Milli(1740835408853)) AND ("FlightDelay" __quesma_match 'true'
+					AND "Cancelled" __quesma_match 'true')))
 				SELECT "aggr__1__count", "aggr__1__2__key_0", "aggr__1__2__count",
 				  "top_metrics__1__2__7_col_0", "top_metrics__1__2__7_col_1", "top_hits_rank"
 				FROM "quesma_top_hits_join"
@@ -1638,7 +1638,7 @@ var KibanaSampleDataFlights = []AggregationTestCase{
 			SELECT toInt64((toUnixTimestamp64Milli("timestamp")+timeZoneOffset(toTimezone(
 			  "timestamp", 'Europe/Warsaw'))*1000) / 10800000) AS "aggr__0__key_0",
 			  count(*) AS "aggr__0__count",
-			  countIf("FlightDelay" __quesma_match '%true%') AS "metric__0__1-bucket_col_0",
+			  countIf("FlightDelay" __quesma_match 'true') AS "metric__0__1-bucket_col_0",
 			  countIf("__quesma_fulltext_field_name" __quesma_match '%') AS "metric__0__2-bucket_col_0"
 			FROM __quesma_table_name
 			WHERE ("timestamp">=fromUnixTimestamp64Milli(1740230608853) AND "timestamp"<=

@@ -348,8 +348,8 @@ var KibanaSampleDataLogs = []AggregationTestCase{
 			  count(*) AS "aggr__1__2__count"
 			FROM __quesma_table_name
 			WHERE (("timestamp">=fromUnixTimestamp64Milli(1740178800000) AND "timestamp"<=
-			  fromUnixTimestamp64Milli(1740831278103)) AND ("tags" __quesma_match '%error%' AND
-			  "tags" __quesma_match '%security%'))
+			  fromUnixTimestamp64Milli(1740831278103)) AND ("tags" __quesma_match 'error' AND
+			  "tags" __quesma_match 'security'))
 			GROUP BY toInt64((toUnixTimestamp64Milli("timestamp")+timeZoneOffset(toTimezone(
 			  "timestamp", 'Europe/Warsaw'))*1000) / 10800000) AS "aggr__1__2__key_0"
 			ORDER BY "aggr__1__2__key_0" ASC`,
@@ -362,8 +362,8 @@ var KibanaSampleDataLogs = []AggregationTestCase{
 				count(*) AS "aggr__1__2__count"
 			  FROM __quesma_table_name
 			  WHERE (("timestamp">=fromUnixTimestamp64Milli(1740178800000) AND "timestamp"<=
-				fromUnixTimestamp64Milli(1740831278103)) AND ("tags" __quesma_match '%error%' AND
-				"tags" __quesma_match '%security%'))
+				fromUnixTimestamp64Milli(1740831278103)) AND ("tags" __quesma_match 'error' AND
+				"tags" __quesma_match 'security'))
 			  GROUP BY toInt64((toUnixTimestamp64Milli("timestamp")+timeZoneOffset(
 				toTimezone("timestamp", 'Europe/Warsaw'))*1000) / 10800000) AS
 				"aggr__1__2__key_0"
@@ -381,8 +381,8 @@ var KibanaSampleDataLogs = []AggregationTestCase{
 				toInt64((toUnixTimestamp64Milli("timestamp")+timeZoneOffset(toTimezone(
 				"timestamp", 'Europe/Warsaw'))*1000) / 10800000))
 			  WHERE (("timestamp">=fromUnixTimestamp64Milli(1740178800000) AND "timestamp"<=
-				fromUnixTimestamp64Milli(1740831278103)) AND ("tags" __quesma_match '%error%' AND
-				"tags" __quesma_match '%security%')))
+				fromUnixTimestamp64Milli(1740831278103)) AND ("tags" __quesma_match 'error' AND
+				"tags" __quesma_match 'security')))
 			SELECT "aggr__1__count", "aggr__1__2__key_0", "aggr__1__2__count",
 			  "top_metrics__1__2__4_col_0", "top_metrics__1__2__4_col_1", "top_hits_rank"
 			FROM "quesma_top_hits_join"
@@ -396,8 +396,8 @@ var KibanaSampleDataLogs = []AggregationTestCase{
 				count(*) AS "aggr__1__2__count"
 			  FROM __quesma_table_name
 			  WHERE (("timestamp">=fromUnixTimestamp64Milli(1740178800000) AND "timestamp"<=
-				fromUnixTimestamp64Milli(1740831278103)) AND ("tags" __quesma_match '%error%' AND
-				"tags" __quesma_match '%security%'))
+				fromUnixTimestamp64Milli(1740831278103)) AND ("tags" __quesma_match 'error' AND
+				"tags" __quesma_match 'security'))
 			  GROUP BY toInt64((toUnixTimestamp64Milli("timestamp")+timeZoneOffset(
 				toTimezone("timestamp", 'Europe/Warsaw'))*1000) / 10800000) AS
 				"aggr__1__2__key_0"
@@ -415,8 +415,8 @@ var KibanaSampleDataLogs = []AggregationTestCase{
 				toInt64((toUnixTimestamp64Milli("timestamp")+timeZoneOffset(toTimezone(
 				"timestamp", 'Europe/Warsaw'))*1000) / 10800000))
 			  WHERE (("timestamp">=fromUnixTimestamp64Milli(1740178800000) AND "timestamp"<=
-				fromUnixTimestamp64Milli(1740831278103)) AND ("tags" __quesma_match '%error%' AND
-				"tags" __quesma_match '%security%')))
+				fromUnixTimestamp64Milli(1740831278103)) AND ("tags" __quesma_match 'error' AND
+				"tags" __quesma_match 'security')))
 			SELECT "aggr__1__count", "aggr__1__2__key_0", "aggr__1__2__count",
 			  "top_metrics__1__2__5_col_0", "top_metrics__1__2__5_col_1", "top_hits_rank"
 			FROM "quesma_top_hits_join"

@@ -1252,9 +1252,9 @@ var TestsSearch = []SearchTestCase{
 			},
 			"track_total_hits": false
 		}`,
-		[]string{`"exception-list-agnostic.list_id" __quesma_match '%endpoint\_event\_filters%'`},
+		[]string{`"exception-list-agnostic.list_id" __quesma_match 'endpoint\_event\_filters'`},
 		model.ListAllFields,
-		[]string{`SELECT "message" FROM ` + TableName + ` WHERE "exception-list-agnostic.list_id"='%endpoint\\_event\\_filters%'`},
+		[]string{`SELECT "message" FROM ` + TableName + ` WHERE "exception-list-agnostic.list_id"='endpoint\\_event\\_filters'`},
 		[]string{},
 	},
 	{ // [10]
@@ -1279,9 +1279,9 @@ var TestsSearch = []SearchTestCase{
 			},
 			"track_total_hits": false
 		}`,
-		[]string{fullTextFieldName + ` __quesma_match '%ingest-agent-policies%'`},
+		[]string{fullTextFieldName + ` __quesma_match 'ingest-agent-policies'`},
 		model.ListAllFields,
-		[]string{`SELECT "message" FROM ` + TableName + ` WHERE ` + fullTextFieldName + ` ILIKE '%ingest-agent-policies%'`},
+		[]string{`SELECT "message" FROM ` + TableName + ` WHERE ` + fullTextFieldName + ` ILIKE 'ingest-agent-policies'`},
 		[]string{},
 	},
 	{ // [11]
@@ -1377,9 +1377,9 @@ var TestsSearch = []SearchTestCase{
 			"track_total_hits": false,
 			"size": 1
 		}`,
-		[]string{`"message" __quesma_match '%% logged%'`},
+		[]string{`"message" __quesma_match '% logged'`},
 		model.ListAllFields,
-		[]string{`SELECT "message" FROM ` + TableName + ` WHERE "message" ILIKE '%% logged%'`},
+		[]string{`SELECT "message" FROM ` + TableName + ` WHERE "message" ILIKE '% logged'`},
 		[]string{},
 	},
 	{ // [16]