Skip to content

Commit 8698705

Browse files
committed
OAK-11555 Elastic: support dot in property and function names
1 parent b58c345 commit 8698705

File tree

7 files changed

+147
-22
lines changed

7 files changed

+147
-22
lines changed

oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/util/ElasticIndexUtils.java

Lines changed: 90 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -35,27 +35,106 @@ public class ElasticIndexUtils {
3535
* Convert a JCR property name to a Elasticsearch field name.
3636
* Notice that "|" is not allowed in JCR names.
3737
*
38-
* "." is converted to "|dot|"
39-
* "/" is converted to "||"
40-
*
4138
* @param propertyName the property name
4239
* @return the field name
4340
*/
4441
public static String fieldName(String propertyName) {
42+
if(propertyName.startsWith(":")) {
43+
// there are some hardcoded field names
44+
return propertyName;
45+
}
4546
String fieldName = propertyName;
46-
// 99% property names don't contain a slash or dot,
47-
// so for performance reason use indexOf
48-
int slashIndex = fieldName.indexOf('|');
49-
if (slashIndex >= 0) {
50-
fieldName = fieldName.replaceAll("\\|", "\\|\\|");
47+
boolean escape = false;
48+
if (fieldName.isBlank()) {
49+
// empty field name or field names that only consist of spaces
50+
escape = true;
51+
} else {
52+
// 99.99% property names are OK,
53+
// so we loop over the characters first
54+
for (int i = 0; i < fieldName.length(); i++) {
55+
switch (fieldName.charAt(i)) {
56+
case '|':
57+
case '.':
58+
case '^':
59+
case '_':
60+
escape = true;
61+
}
62+
}
5163
}
52-
int dotIndex = fieldName.indexOf('.');
53-
if (dotIndex >= 0) {
54-
fieldName = fieldName.replaceAll("\\.", "|dot|");
64+
if (escape) {
65+
StringBuilder buff = new StringBuilder(fieldName.length());
66+
for (int i = 0; i < fieldName.length(); i++) {
67+
char c = fieldName.charAt(i);
68+
switch (c) {
69+
case '|':
70+
buff.append("||");
71+
break;
72+
case '.':
73+
case '^':
74+
buff.append('|').append(Integer.toHexString(c)).append('|');
75+
break;
76+
default:
77+
buff.append(c);
78+
}
79+
}
80+
fieldName = buff.toString();
81+
// internal field start with a _
82+
// we also support empty or just spaces
83+
if (fieldName.startsWith("_") || fieldName.isBlank()) {
84+
fieldName = "|" + fieldName;
85+
}
5586
}
5687
return fieldName;
5788
}
5889

90+
/**
91+
* Convert an elasticsearch field name to a JCR property name.
92+
*
93+
* @param fieldName the field name
94+
* @return the property name
95+
*/
96+
public static String propertyNameFromFieldName(String fieldName) {
97+
if (fieldName.indexOf('|') < 0) {
98+
return fieldName;
99+
}
100+
if (fieldName.startsWith("|")) {
101+
if (fieldName.equals("|")) {
102+
return "";
103+
} if (fieldName.startsWith("|_") || fieldName.substring(1).isBlank()) {
104+
fieldName = fieldName.substring(1);
105+
}
106+
}
107+
StringBuilder buff = new StringBuilder(fieldName.length());
108+
for (int i = 0; i < fieldName.length(); i++) {
109+
char c = fieldName.charAt(i);
110+
switch (c) {
111+
case '|':
112+
String next = fieldName.substring(i + 1);
113+
if (next.startsWith("|")) {
114+
buff.append('|');
115+
i++;
116+
} else {
117+
int end = next.indexOf('|');
118+
if (end < 0) {
119+
buff.append(next);
120+
break;
121+
}
122+
String code = next.substring(0, end);
123+
try {
124+
buff.append((char) Integer.parseInt(code, 16));
125+
} catch (NumberFormatException e) {
126+
buff.append(code);
127+
}
128+
i += code.length() + 1;
129+
}
130+
break;
131+
default:
132+
buff.append(c);
133+
}
134+
}
135+
return buff.toString();
136+
}
137+
59138
/**
60139
* Transforms a path into an _id compatible with Elasticsearch specification. The path cannot be larger than 512
61140
* bytes. For performance reasons paths that are already compatible are returned untouched. Otherwise, SHA-256

oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticConnectionRule.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ public ElasticConnectionRule() {
6161
public ElasticConnectionRule(String elasticConnectionString) {
6262
this(elasticConnectionString,
6363
"elastic_test_" +
64-
RandomStringUtils.random(5, true, false).toLowerCase() +
64+
RandomStringUtils.insecure().next(5, true, false).toLowerCase() +
6565
System.currentTimeMillis()
6666
);
6767
}

oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexSuggestionCommonTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ public void explain() throws Exception {
6363
String expected = "{\"_source\":{\"includes\":[\":path\"]},\"query\":{\"bool\":{\"must\":[{\"nested\":{\"inner_hits\":" +
6464
"{\"size\":100},\"path\":\":suggest\",\"query\":{\"match_bool_prefix\":{\":suggest.value\":{\"operator\":\"and\",\"query\":\"boo\"}}},\"score_mode\":\"max\"}}]}},\"size\":100}";
6565

66-
Query q = qm.createQuery(sql, Query.SQL);
66+
Query q = qm.createQuery(sql, Query.JCR_SQL2);
6767
Row row = q.execute().getRows().nextRow();
6868
MatcherAssert.assertThat(row.getValue("plan").getString(), CoreMatchers.containsString(expected));
6969
}

oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticPropertyIndexTest.java

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import org.apache.jackrabbit.oak.api.Tree;
2323
import org.apache.jackrabbit.oak.commons.junit.LogCustomizer;
2424
import org.apache.jackrabbit.oak.plugins.index.elastic.util.ElasticIndexDefinitionBuilder;
25+
import org.apache.jackrabbit.oak.plugins.index.elastic.util.ElasticIndexUtils;
2526
import org.apache.jackrabbit.oak.plugins.index.search.FulltextIndexConstants;
2627
import org.apache.jackrabbit.oak.plugins.index.search.util.IndexDefinitionBuilder;
2728
import org.apache.jackrabbit.oak.plugins.index.search.util.IndexDefinitionBuilder.PropertyRule;
@@ -235,7 +236,9 @@ public void propertyWithDot() throws Exception {
235236
String explanation = explain(query);
236237
assertThat(explanation, containsString("/oak:index/test1"));
237238
assertThat(explanation, containsString(
238-
"{\"term\":{\"first|dot|name\":{\"value\":\"Antonio\""));
239+
"{\"term\":{\"" +
240+
ElasticIndexUtils.fieldName("first.name") +
241+
"\":{\"value\":\"Antonio\""));
239242
assertQuery(query, List.of("/test"));
240243
});
241244

@@ -246,7 +249,9 @@ public void propertyWithDot() throws Exception {
246249
String explanation = explain(lowerQuery);
247250
assertThat(explanation, containsString("/oak:index/test1"));
248251
assertThat(explanation, containsString(
249-
"{\"term\":{\"function*lower*@first|dot|name\":{\"value\":\"antonio\""));
252+
"{\"term\":{\"" +
253+
ElasticIndexUtils.fieldName("function*lower*@first.name") +
254+
"\":{\"value\":\"antonio\""));
250255
assertQuery(lowerQuery, List.of("/test"));
251256
});
252257
}

oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticRegexPropertyIndexTest.java

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525

2626
import org.apache.jackrabbit.oak.api.CommitFailedException;
2727
import org.apache.jackrabbit.oak.api.Tree;
28+
import org.apache.jackrabbit.oak.plugins.index.elastic.util.ElasticIndexUtils;
2829
import org.apache.jackrabbit.oak.plugins.index.search.FulltextIndexConstants;
2930
import org.apache.jackrabbit.oak.plugins.index.search.util.IndexDefinitionBuilder;
3031
import org.apache.jackrabbit.oak.plugins.index.search.util.IndexDefinitionBuilder.PropertyRule;
@@ -65,7 +66,10 @@ public void regexPropertyWithFlattened() throws Exception {
6566
assertEventually(() -> {
6667
String explain = explain(propaQuery);
6768
assertThat(explain, containsString("elasticsearch:test1"));
68-
assertThat(explain, containsString("[{\"term\":{\"flat:allProperties.propa\":{\"value\":\"foo\"}}}]"));
69+
assertThat(explain, containsString("[{\"term\":{\"flat:" +
70+
ElasticIndexUtils.fieldName("allProperties") + "." +
71+
ElasticIndexUtils.fieldName("propa") +
72+
"\":{\"value\":\"foo\"}}}]"));
6973
assertQuery(propaQuery, List.of("/test/a", "/test/b"));
7074
});
7175

@@ -74,8 +78,14 @@ public void regexPropertyWithFlattened() throws Exception {
7478
assertEventually(() -> {
7579
String explain = explain(propaOrderQuery);
7680
assertThat(explain, containsString("elasticsearch:test1"));
77-
assertThat(explain, containsString("\"query\":{\"bool\":{\"filter\":[{\"prefix\":{\"flat:allProperties.propd\":{\"value\":\"foo\"}}}]}}"));
78-
assertThat(explain, containsString("\"sort\":[{\"flat:allProperties.propd\":{\"order\":\"asc\"}},{\":path\":{\"order\":\"asc\"}}]"));
81+
assertThat(explain, containsString("\"query\":{\"bool\":{\"filter\":[{\"prefix\":{\"flat:" +
82+
ElasticIndexUtils.fieldName("allProperties") + "." +
83+
ElasticIndexUtils.fieldName("propd") +
84+
"\":{\"value\":\"foo\"}}}]}}"));
85+
assertThat(explain, containsString("\"sort\":[{\"flat:" +
86+
ElasticIndexUtils.fieldName("allProperties") + "." +
87+
ElasticIndexUtils.fieldName("propd") +
88+
"\":{\"order\":\"asc\"}},{\":path\":{\"order\":\"asc\"}}]"));
7989
assertThat(explain, containsString("sortOrder: [{ propertyName : propd, propertyType : UNDEFINED, order : ASCENDING }]"));
8090
assertQuery(propaOrderQuery, List.of("/test/f", "/test/e"));
8191
});

oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticDocumentMakerLargeStringPropertiesLogTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ private void setThresholdLimit(String threshold) {
7373
System.setProperty(FulltextDocumentMaker.WARN_LOG_STRING_SIZE_THRESHOLD_KEY, threshold);
7474
}
7575

76-
private ElasticDocumentMaker addPropertyAccordingToType(NodeBuilder test, Type type, String... str) {
76+
private <T> ElasticDocumentMaker addPropertyAccordingToType(NodeBuilder test, Type<T> type, String... str) {
7777
NodeState root = INITIAL_CONTENT;
7878
ElasticIndexDefinitionBuilder builder = new ElasticIndexDefinitionBuilder();
7979
builder.indexRule("nt:base")

oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/util/ElasticIndexUtilsTest.java

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,16 +21,47 @@
2121
import java.net.URLEncoder;
2222
import java.nio.charset.StandardCharsets;
2323
import java.util.List;
24+
import java.util.Random;
2425

2526
import org.junit.Test;
2627

2728
public class ElasticIndexUtilsTest {
2829

2930
@Test
3031
public void fieldName() {
31-
assertEquals("a", ElasticIndexUtils.fieldName("a"));
32-
assertEquals("first|dot|name", ElasticIndexUtils.fieldName("first.name"));
33-
assertEquals("first||name", ElasticIndexUtils.fieldName("first|name"));
32+
assertEquals("regular", ElasticIndexUtils.fieldName("regular"));
33+
assertEquals(":nodeName", ElasticIndexUtils.fieldName(":nodeName"));
34+
assertEquals("first|2e|name", ElasticIndexUtils.fieldName("first.name"));
35+
assertEquals("weird|5e|", ElasticIndexUtils.fieldName("weird^"));
36+
assertEquals("embedded_is_fine", ElasticIndexUtils.fieldName("embedded_is_fine"));
37+
assertEquals("|_id", ElasticIndexUtils.fieldName("_id"));
38+
assertEquals("|", ElasticIndexUtils.fieldName(""));
39+
assertEquals("| ", ElasticIndexUtils.fieldName(" "));
40+
assertEquals("||", ElasticIndexUtils.fieldName("|"));
41+
assertEquals("||test||", ElasticIndexUtils.fieldName("|test|"));
42+
}
43+
44+
@Test
45+
public void randomFieldNames() {
46+
ElasticIndexUtils.propertyNameFromFieldName("");
47+
Random r = new Random(1);
48+
for (int i = 0; i < 1000; i++) {
49+
StringBuilder buff = new StringBuilder();
50+
int len = 1 + r.nextInt(5);
51+
String chars = "|^._ 25ex";
52+
for (int j = 0; j < len; j++) {
53+
buff.append(chars.charAt(r.nextInt(chars.length())));
54+
}
55+
String p = buff.toString();
56+
String f = ElasticIndexUtils.fieldName(p);
57+
String p2 = ElasticIndexUtils.propertyNameFromFieldName(f);
58+
if (!p.equals(p2)) {
59+
p2 = ElasticIndexUtils.propertyNameFromFieldName(f);
60+
assertEquals(p, p2);
61+
}
62+
// just to make sure there are no exceptions (within some limits)
63+
ElasticIndexUtils.propertyNameFromFieldName(p);
64+
}
3465
}
3566

3667
@Test

0 commit comments

Comments
 (0)