Skip to content

Commit d018645

Browse files
authored
[8.18] ESQL: Push down StartsWith and EndsWith functions to Lucene (elastic#124871)
Manual 8.18 backport of: - Main PR: elastic#123381 - Tests fix: elastic#124625 - Tests fix: elastic#124764
1 parent 4d1ff20 commit d018645

File tree

8 files changed

+400
-2
lines changed

8 files changed

+400
-2
lines changed

docs/changelog/123381.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 123381
2+
summary: Push down `StartsWith` and `EndsWith` functions to Lucene
3+
area: ES|QL
4+
type: enhancement
5+
issues:
6+
- 123067

x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec

Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -964,6 +964,47 @@ false | null
964964
false | null
965965
;
966966

967+
startsWithLucenePushdown
968+
969+
from hosts
970+
| where starts_with(host, "bet") and starts_with(host_group, "Kuber")
971+
| keep host, host_group
972+
| sort host, host_group;
973+
974+
host:keyword | host_group:text
975+
beta | Kubernetes cluster
976+
beta | Kubernetes cluster
977+
beta | Kubernetes cluster
978+
;
979+
980+
startsWithLuceneDisabledPushdown
981+
982+
from hosts
983+
| where host == "unknown host" or (starts_with(host, "bet") and starts_with(host_group, "Kuber"))
984+
| keep host, host_group
985+
| sort host, host_group;
986+
987+
host:keyword | host_group:text
988+
beta | Kubernetes cluster
989+
beta | Kubernetes cluster
990+
beta | Kubernetes cluster
991+
;
992+
993+
startsWithLucenePushdownIgnoreMultivalues
994+
required_capability: starts_with_ends_with_lucene_pushdown
995+
996+
from hosts
997+
| where starts_with(description, "epsilon")
998+
| keep description
999+
| sort description;
1000+
1001+
warningRegex: evaluation of \[starts_with\(description, \\\"epsilon\\\"\)\] failed, treating result as null. Only first 20 failures recorded.
1002+
warningRegex: java.lang.IllegalArgumentException: single-value function encountered multi-value
1003+
1004+
description:text
1005+
epsilon gw instance
1006+
;
1007+
9671008
substringOfText
9681009
required_capability: mv_warn
9691010

@@ -1192,6 +1233,144 @@ Bernatsky |false
11921233
;
11931234

11941235

1236+
endsWithLucenePushdown
1237+
1238+
from hosts
1239+
| where ends_with(host, "ta") and ends_with(host_group, "cluster")
1240+
| keep host, host_group
1241+
| sort host, host_group;
1242+
1243+
host:keyword | host_group:text
1244+
beta | Kubernetes cluster
1245+
beta | Kubernetes cluster
1246+
beta | Kubernetes cluster
1247+
;
1248+
1249+
endsWithLuceneDisabledPushdown
1250+
1251+
from hosts
1252+
| where host == "unknown host" or (ends_with(host, "ta") and ends_with(host_group, "cluster"))
1253+
| keep host, host_group
1254+
| sort host, host_group;
1255+
1256+
host:keyword | host_group:text
1257+
beta | Kubernetes cluster
1258+
beta | Kubernetes cluster
1259+
beta | Kubernetes cluster
1260+
;
1261+
1262+
endsWithLucenePushdownIgnoreMultivalues
1263+
required_capability: starts_with_ends_with_lucene_pushdown
1264+
1265+
from hosts
1266+
| where ends_with(description, "host")
1267+
| keep description
1268+
| sort description;
1269+
1270+
warningRegex: evaluation of \[ends_with\(description, \\\"host\\\"\)\] failed, treating result as null. Only first 20 failures recorded.
1271+
warningRegex: java.lang.IllegalArgumentException: single-value function encountered multi-value
1272+
1273+
description:text
1274+
;
1275+
1276+
1277+
lucenePushdownMultipleWhere
1278+
1279+
from hosts
1280+
| where starts_with(host, "bet")
1281+
| keep host, host_group
1282+
| sort host, host_group
1283+
| where ends_with(host_group, "cluster");
1284+
1285+
host:keyword | host_group:text
1286+
beta | Kubernetes cluster
1287+
beta | Kubernetes cluster
1288+
beta | Kubernetes cluster
1289+
;
1290+
1291+
lucenePushdownMultipleIndices
1292+
required_capability: casting_operator
1293+
1294+
from airports*
1295+
| where starts_with(name::keyword, "Sahn") and ends_with(abbrev, "UH")
1296+
| keep abbrev, name
1297+
| sort abbrev, name;
1298+
1299+
abbrev:keyword | name:text
1300+
LUH | Sahnewal
1301+
LUH | Sahnewal
1302+
LUH | Sahnewal
1303+
LUH | Sahnewal
1304+
LUH | Sahnewal
1305+
LUH | Sahnewal
1306+
;
1307+
1308+
lucenePushdownOr
1309+
required_capability: casting_operator
1310+
1311+
from airports
1312+
| where starts_with(name::keyword, "Sahn") or ends_with(abbrev, "UH")
1313+
| keep abbrev, name
1314+
| sort abbrev, name;
1315+
1316+
abbrev:keyword | name:text
1317+
AUH | Abu Dhabi Int'l
1318+
LUH | Sahnewal
1319+
RUH | King Khalid Int'l
1320+
;
1321+
1322+
lucenePushdownMultipleOr
1323+
required_capability: casting_operator
1324+
1325+
from airports
1326+
| where starts_with(name::keyword, "Sahn") or ends_with(abbrev, "UH") or starts_with(abbrev, "OOL")
1327+
| keep abbrev, name
1328+
| sort abbrev, name;
1329+
1330+
abbrev:keyword | name:text
1331+
AUH | Abu Dhabi Int'l
1332+
LUH | Sahnewal
1333+
OOL | Gold Coast
1334+
RUH | King Khalid Int'l
1335+
;
1336+
1337+
lucenePushdownMultipleAnd
1338+
required_capability: casting_operator
1339+
1340+
from airports
1341+
| where starts_with(name::keyword, "Sahn") and ends_with(abbrev, "UH")
1342+
| where ends_with(name::keyword, "al")
1343+
| keep abbrev, name
1344+
| sort abbrev, name;
1345+
1346+
abbrev:keyword | name:text
1347+
LUH | Sahnewal
1348+
;
1349+
1350+
lucenePushdownMixAndOr
1351+
required_capability: casting_operator
1352+
1353+
from airports
1354+
| where starts_with(name::keyword, "Sahn") and (starts_with(name::keyword, "Abc") or ends_with(abbrev, "UH"))
1355+
| keep abbrev, name, scalerank
1356+
| sort abbrev, name;
1357+
1358+
abbrev:keyword | name:text | scalerank:integer
1359+
LUH | Sahnewal | 9
1360+
;
1361+
1362+
lucenePushdownMixOrAnd
1363+
required_capability: casting_operator
1364+
1365+
from airports
1366+
| where starts_with(name::keyword, "Sahn") or (starts_with(abbrev, "G") and ends_with(name::keyword, "Falls Int'l"))
1367+
| keep abbrev, name, scalerank
1368+
| sort abbrev;
1369+
1370+
abbrev:keyword | name:text | scalerank:integer
1371+
GTF | Great Falls Int'l | 8
1372+
LUH | Sahnewal | 9
1373+
;
11951374

11961375
toLowerRow#[skip:-8.12.99]
11971376
// tag::to_lower[]

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -676,6 +676,12 @@ public enum Cap {
676676
*/
677677
REMOVE_REDUNDANT_SORT,
678678

679+
/**
680+
* Lucene query pushdown to StartsWith and EndsWith functions.
681+
* This capability was created to avoid receiving wrong warnings from old nodes in mixed clusters
682+
*/
683+
STARTS_WITH_ENDS_WITH_LUCENE_PUSHDOWN,
684+
679685
/**
680686
* Allow mixed numeric types in conditional functions - case, greatest and least
681687
*/

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/EndsWith.java

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,20 @@
77

88
package org.elasticsearch.xpack.esql.expression.function.scalar.string;
99

10+
import org.apache.lucene.queryparser.classic.QueryParser;
1011
import org.apache.lucene.util.BytesRef;
1112
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
1213
import org.elasticsearch.common.io.stream.StreamInput;
1314
import org.elasticsearch.common.io.stream.StreamOutput;
15+
import org.elasticsearch.common.lucene.BytesRefs;
1416
import org.elasticsearch.compute.ann.Evaluator;
1517
import org.elasticsearch.compute.operator.EvalOperator.ExpressionEvaluator;
18+
import org.elasticsearch.xpack.esql.capabilities.TranslationAware;
1619
import org.elasticsearch.xpack.esql.core.expression.Expression;
20+
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
21+
import org.elasticsearch.xpack.esql.core.expression.FoldContext;
22+
import org.elasticsearch.xpack.esql.core.querydsl.query.Query;
23+
import org.elasticsearch.xpack.esql.core.querydsl.query.WildcardQuery;
1724
import org.elasticsearch.xpack.esql.core.tree.NodeInfo;
1825
import org.elasticsearch.xpack.esql.core.tree.Source;
1926
import org.elasticsearch.xpack.esql.core.type.DataType;
@@ -22,6 +29,8 @@
2229
import org.elasticsearch.xpack.esql.expression.function.Param;
2330
import org.elasticsearch.xpack.esql.expression.function.scalar.EsqlScalarFunction;
2431
import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput;
32+
import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates;
33+
import org.elasticsearch.xpack.esql.planner.TranslatorHandler;
2534

2635
import java.io.IOException;
2736
import java.util.Arrays;
@@ -31,7 +40,7 @@
3140
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.SECOND;
3241
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isString;
3342

34-
public class EndsWith extends EsqlScalarFunction {
43+
public class EndsWith extends EsqlScalarFunction implements TranslationAware.SingleValueTranslationAware {
3544
public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(Expression.class, "EndsWith", EndsWith::new);
3645

3746
private final Expression str;
@@ -129,6 +138,27 @@ public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) {
129138
return new EndsWithEvaluator.Factory(source(), toEvaluator.apply(str), toEvaluator.apply(suffix));
130139
}
131140

141+
@Override
142+
public boolean translatable(LucenePushdownPredicates pushdownPredicates) {
143+
return pushdownPredicates.isPushableAttribute(str) && suffix.foldable();
144+
}
145+
146+
@Override
147+
public Query asQuery(TranslatorHandler handler) {
148+
LucenePushdownPredicates.checkIsPushableAttribute(str);
149+
var fieldName = handler.nameOf(str instanceof FieldAttribute fa ? fa.exactAttribute() : str);
150+
151+
// TODO: Get the real FoldContext here
152+
var wildcardQuery = "*" + QueryParser.escape(BytesRefs.toString(suffix.fold(FoldContext.small())));
153+
154+
return new WildcardQuery(source(), fieldName, wildcardQuery);
155+
}
156+
157+
@Override
158+
public Expression singleValueField() {
159+
return str;
160+
}
161+
132162
Expression str() {
133163
return str;
134164
}

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/StartsWith.java

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,20 @@
77

88
package org.elasticsearch.xpack.esql.expression.function.scalar.string;
99

10+
import org.apache.lucene.queryparser.classic.QueryParser;
1011
import org.apache.lucene.util.BytesRef;
1112
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
1213
import org.elasticsearch.common.io.stream.StreamInput;
1314
import org.elasticsearch.common.io.stream.StreamOutput;
15+
import org.elasticsearch.common.lucene.BytesRefs;
1416
import org.elasticsearch.compute.ann.Evaluator;
1517
import org.elasticsearch.compute.operator.EvalOperator.ExpressionEvaluator;
18+
import org.elasticsearch.xpack.esql.capabilities.TranslationAware;
1619
import org.elasticsearch.xpack.esql.core.expression.Expression;
20+
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
21+
import org.elasticsearch.xpack.esql.core.expression.FoldContext;
22+
import org.elasticsearch.xpack.esql.core.querydsl.query.Query;
23+
import org.elasticsearch.xpack.esql.core.querydsl.query.WildcardQuery;
1724
import org.elasticsearch.xpack.esql.core.tree.NodeInfo;
1825
import org.elasticsearch.xpack.esql.core.tree.Source;
1926
import org.elasticsearch.xpack.esql.core.type.DataType;
@@ -22,6 +29,8 @@
2229
import org.elasticsearch.xpack.esql.expression.function.Param;
2330
import org.elasticsearch.xpack.esql.expression.function.scalar.EsqlScalarFunction;
2431
import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput;
32+
import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates;
33+
import org.elasticsearch.xpack.esql.planner.TranslatorHandler;
2534

2635
import java.io.IOException;
2736
import java.util.Arrays;
@@ -31,7 +40,7 @@
3140
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.SECOND;
3241
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isString;
3342

34-
public class StartsWith extends EsqlScalarFunction {
43+
public class StartsWith extends EsqlScalarFunction implements TranslationAware.SingleValueTranslationAware {
3544
public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(
3645
Expression.class,
3746
"StartsWith",
@@ -126,6 +135,27 @@ public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) {
126135
return new StartsWithEvaluator.Factory(source(), toEvaluator.apply(str), toEvaluator.apply(prefix));
127136
}
128137

138+
@Override
139+
public boolean translatable(LucenePushdownPredicates pushdownPredicates) {
140+
return pushdownPredicates.isPushableAttribute(str) && prefix.foldable();
141+
}
142+
143+
@Override
144+
public Query asQuery(TranslatorHandler handler) {
145+
LucenePushdownPredicates.checkIsPushableAttribute(str);
146+
var fieldName = handler.nameOf(str instanceof FieldAttribute fa ? fa.exactAttribute() : str);
147+
148+
// TODO: Get the real FoldContext here
149+
var wildcardQuery = QueryParser.escape(BytesRefs.toString(prefix.fold(FoldContext.small()))) + "*";
150+
151+
return new WildcardQuery(source(), fieldName, wildcardQuery);
152+
}
153+
154+
@Override
155+
public Expression singleValueField() {
156+
return str;
157+
}
158+
129159
Expression str() {
130160
return str;
131161
}

0 commit comments

Comments
 (0)