Skip to content

Commit 6549b7f

Browse files
committed
Merge branch '7.8' of github.com:gchq/stroom
2 parents 4fe2fe1 + b4629e6 commit 6549b7f

File tree

3 files changed

+108
-37
lines changed

3 files changed

+108
-37
lines changed

Diff for: stroom-query/stroom-query-common/src/main/java/stroom/query/common/v2/ExpressionPredicateFactory.java

+76-36
Original file line numberDiff line numberDiff line change
@@ -1207,13 +1207,13 @@ private static <T> Optional<ScoringPredicate<T>> create(final ExpressionTerm ter
12071207
final Function<T, String> extractionFunction) {
12081208
return ifValue(term, () -> {
12091209
// See if this is a wildcard equals.
1210-
final String replaced = makePattern(term.getValue());
1211-
if (!Objects.equals(term.getValue(), replaced)) {
1210+
if (containsWildcard(term.getValue())) {
1211+
final String wildcardPattern = replaceWildcards(term.getValue());
12121212
return new StringRegex<>(term, extractionFunction,
1213-
Pattern.compile(replaced, Pattern.CASE_INSENSITIVE));
1213+
Pattern.compile(wildcardPattern, Pattern.CASE_INSENSITIVE));
12141214
}
12151215

1216-
return new StringEquals<T>(term, term.getValue(), extractionFunction);
1216+
return new StringEquals<T>(term, unescape(term.getValue()), extractionFunction);
12171217
});
12181218
}
12191219

@@ -1235,13 +1235,13 @@ private static <T> Optional<ScoringPredicate<T>> create(final ExpressionTerm ter
12351235
final Function<T, String> extractionFunction) {
12361236
return ifValue(term, () -> {
12371237
// See if this is a wildcard equals.
1238-
final String replaced = makePattern(term.getValue());
1239-
if (!Objects.equals(term.getValue(), replaced)) {
1238+
if (containsWildcard(term.getValue())) {
1239+
final String wildcardPattern = replaceWildcards(term.getValue());
12401240
return new StringRegex<>(term, extractionFunction,
1241-
Pattern.compile(replaced));
1241+
Pattern.compile(wildcardPattern));
12421242
}
12431243

1244-
return new StringEqualsCaseSensitive<T>(term, term.getValue(), extractionFunction);
1244+
return new StringEqualsCaseSensitive<T>(term, unescape(term.getValue()), extractionFunction);
12451245
});
12461246
}
12471247

@@ -1257,23 +1257,24 @@ private static class StringContains<T> extends ExpressionTermPredicate<T> {
12571257
private final String value;
12581258

12591259
private StringContains(final ExpressionTerm term,
1260+
final String value,
12601261
final Function<T, String> extractionFunction) {
12611262
super(term);
12621263
this.extractionFunction = extractionFunction;
1263-
value = term.getValue().toLowerCase();
1264+
this.value = value;
12641265
}
12651266

12661267
private static <T> Optional<ScoringPredicate<T>> create(final ExpressionTerm term,
12671268
final Function<T, String> extractionFunction) {
12681269
return ifValue(term, () -> {
12691270
// See if this is a wildcard contains.
1270-
final String replaced = makePattern(term.getValue());
1271-
if (!Objects.equals(term.getValue(), replaced)) {
1271+
if (containsWildcard(term.getValue())) {
1272+
final String wildcardPattern = replaceWildcards(term.getValue());
12721273
return new StringRegex<>(term, extractionFunction,
1273-
Pattern.compile(".*" + replaced + ".*", Pattern.CASE_INSENSITIVE));
1274+
Pattern.compile(".*" + wildcardPattern + ".*", Pattern.CASE_INSENSITIVE));
12741275
}
12751276

1276-
return new StringContains<>(term, extractionFunction);
1277+
return new StringContains<>(term, unescape(term.getValue()).toLowerCase(), extractionFunction);
12771278
});
12781279
}
12791280

@@ -1290,21 +1291,22 @@ public boolean test(final T values) {
12901291
private static class StringContainsCaseSensitive<T> extends StringExpressionTermPredicate<T> {
12911292

12921293
private StringContainsCaseSensitive(final ExpressionTerm term,
1294+
final String value,
12931295
final Function<T, String> extractionFunction) {
1294-
super(term, term.getValue(), extractionFunction);
1296+
super(term, value, extractionFunction);
12951297
}
12961298

12971299
private static <T> Optional<ScoringPredicate<T>> create(final ExpressionTerm term,
12981300
final Function<T, String> extractionFunction) {
12991301
return ifValue(term, () -> {
13001302
// See if this is a wildcard contains.
1301-
final String replaced = makePattern(term.getValue());
1302-
if (!Objects.equals(term.getValue(), replaced)) {
1303+
if (containsWildcard(term.getValue())) {
1304+
final String wildcardPattern = replaceWildcards(term.getValue());
13031305
return new StringRegex<>(term, extractionFunction,
1304-
Pattern.compile(".*" + replaced + ".*"));
1306+
Pattern.compile(".*" + wildcardPattern + ".*"));
13051307
}
13061308

1307-
return new StringContainsCaseSensitive<>(term, extractionFunction);
1309+
return new StringContainsCaseSensitive<>(term, unescape(term.getValue()), extractionFunction);
13081310
});
13091311
}
13101312

@@ -1711,34 +1713,72 @@ public boolean test(final T values) {
17111713
}
17121714
}
17131715

1714-
public static String makePattern(final String value) {
1715-
int index = 0;
1716+
public static String replaceWildcards(final String value) {
1717+
boolean escaped = false;
1718+
17161719
final char[] chars = value.toCharArray();
1717-
final char[] out = new char[chars.length * 2];
1720+
final StringBuilder sb = new StringBuilder();
17181721
for (int i = 0; i < chars.length; i++) {
17191722
final char c = chars[i];
1720-
if (c == '\\') {
1721-
if (i < chars.length - 1) {
1722-
final char c2 = chars[i + 1];
1723-
if (c2 == '*' || c2 == '?') {
1724-
out[index++] = c2;
1725-
i++;
1726-
} else {
1727-
out[index++] = c;
1728-
}
1723+
if (escaped) {
1724+
if (Character.isLetterOrDigit(c)) {
1725+
sb.append(c);
17291726
} else {
1730-
out[index++] = c;
1727+
// Might be a special char so escape it
1728+
sb.append(Pattern.quote(String.valueOf(c)));
17311729
}
1730+
escaped = false;
1731+
} else if (c == '\\' && chars.length > i + 1 && (chars[i + 1] == '*' || chars[i + 1] == '?')) {
1732+
escaped = true;
1733+
} else if (c == '*') {
1734+
sb.append('.');
1735+
sb.append(c);
1736+
} else if (c == '?') {
1737+
sb.append('.');
1738+
} else if (Character.isLetterOrDigit(c)) {
1739+
sb.append(c);
1740+
} else {
1741+
// Might be a special char so escape it
1742+
sb.append(Pattern.quote(String.valueOf(c)));
1743+
}
1744+
}
1745+
return sb.toString();
1746+
}
1747+
1748+
public static boolean containsWildcard(final String value) {
1749+
boolean escaped = false;
1750+
final char[] chars = value.toCharArray();
1751+
for (int i = 0; i < chars.length; i++) {
1752+
final char c = chars[i];
1753+
if (escaped) {
1754+
escaped = false;
1755+
} else if (c == '\\' && chars.length > i + 1 && (chars[i + 1] == '*' || chars[i + 1] == '?')) {
1756+
escaped = true;
17321757
} else if (c == '*') {
1733-
out[index++] = '.';
1734-
out[index++] = c;
1758+
return true;
17351759
} else if (c == '?') {
1736-
out[index++] = '.';
1760+
return true;
1761+
}
1762+
}
1763+
return false;
1764+
}
1765+
1766+
public static String unescape(final String value) {
1767+
boolean escaped = false;
1768+
final char[] chars = value.toCharArray();
1769+
final StringBuilder sb = new StringBuilder(chars.length);
1770+
for (int i = 0; i < chars.length; i++) {
1771+
final char c = chars[i];
1772+
if (escaped) {
1773+
sb.append(c);
1774+
escaped = false;
1775+
} else if (c == '\\' && chars.length > i + 1 && (chars[i + 1] == '*' || chars[i + 1] == '?')) {
1776+
escaped = true;
17371777
} else {
1738-
out[index++] = c;
1778+
sb.append(c);
17391779
}
17401780
}
1741-
return new String(out, 0, index);
1781+
return sb.toString();
17421782
}
17431783

17441784
private static String[] loadDictionary(final WordListProvider wordListProvider,

Diff for: stroom-query/stroom-query-common/src/test/java/stroom/query/common/v2/TestExpressionPredicateFactory.java

+8-1
Original file line numberDiff line numberDiff line change
@@ -410,10 +410,17 @@ void testWildcardReplacement() {
410410
testWildcardReplacement("th\\*is", "th*is");
411411
testWildcardReplacement("th\\?is", "th?is");
412412
testWildcardReplacement("th\\is", "th\\is");
413+
testWildcardReplacement("*user1 (xxx) yyy*", ".*user1\\Q \\E\\Q(\\Exxx\\Q)\\E\\Q \\Eyyy.*");
413414
}
414415

415416
private void testWildcardReplacement(final String in, final String expected) {
416-
assertThat(ExpressionPredicateFactory.makePattern(in)).isEqualTo(expected);
417+
final String out;
418+
if (ExpressionPredicateFactory.containsWildcard(in)) {
419+
out = ExpressionPredicateFactory.replaceWildcards(in);
420+
} else {
421+
out = ExpressionPredicateFactory.unescape(in);
422+
}
423+
assertThat(out).isEqualTo(expected);
417424
}
418425

419426
private void doStringMatchTest(final String userInput,

Diff for: unreleased_changes/20250317_163725_832__4821.md

+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
* Issue **#4821** : Fix wildcard replacement.
2+
3+
4+
```sh
5+
# ********************************************************************************
6+
# Issue title: selection handler is breaking on non alpha chars
7+
# Issue link: https://github.com/gchq/stroom/issues/4821
8+
# ********************************************************************************
9+
10+
# ONLY the top line will be included as a change entry in the CHANGELOG.
11+
# The entry should be in GitHub flavour markdown and should be written on a SINGLE
12+
# line with no hard breaks. You can have multiple change files for a single GitHub issue.
13+
# The entry should be written in the imperative mood, i.e. 'Fix nasty bug' rather than
14+
# 'Fixed nasty bug'.
15+
#
16+
# Examples of acceptable entries are:
17+
#
18+
#
19+
# * Issue **123** : Fix bug with an associated GitHub issue in this repository
20+
#
21+
# * Issue **namespace/other-repo#456** : Fix bug with an associated GitHub issue in another repository
22+
#
23+
# * Fix bug with no associated GitHub issue.
24+
```

0 commit comments

Comments
 (0)