Skip to content

Enable synthetic source on normalized keyword mappings #126623

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 13 commits into
base: main
Choose a base branch
from
Draft
7 changes: 7 additions & 0 deletions docs/changelog/126623.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
pr: 126623
summary: Enable synthetic source on normalized keyword mappings
area: Mapping
type: enhancement
issues:
- 124369
- 121358
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,93 @@ keyword with normalizer:
keyword_with_ignore_above: [ "May the FORCE be with You!", "Do or Do Not, There is no Try" ]
keyword_without_doc_values: [ "May the FORCE be with You!", "Do or Do Not, There is no Try" ]

- do:
mget:
index: test-keyword-with-normalizer
body:
ids: [ 1, 2, 3 ]
- match: { docs.0._index: "test-keyword-with-normalizer" }
- match: { docs.0._id: "1" }
- match:
docs.0._source:
keyword: "the quick brown fox jumps over the lazy dog"
keyword_with_ignore_above: "the Quick Brown Fox jumps over the lazy Dog"
keyword_without_doc_values: "the Quick Brown Fox jumps over the lazy Dog"

- match: { docs.1._index: "test-keyword-with-normalizer" }
- match: { docs.1._id: "2" }
- match:
docs.1._source:
keyword: "the five boxing wizards jump quickly"
keyword_with_ignore_above: "The five BOXING wizards jump Quickly"
keyword_without_doc_values: "The five BOXING wizards jump Quickly"

- match: { docs.2._index: "test-keyword-with-normalizer" }
- match: { docs.2._id: "3" }
- match:
docs.2._source:
keyword: [ "do or do not, there is no try", "may the force be with you!" ]
keyword_with_ignore_above: [ "May the FORCE be with You!", "Do or Do Not, There is no Try" ]
keyword_without_doc_values: [ "May the FORCE be with You!", "Do or Do Not, There is no Try" ]

---
keyword with normalizer, source keep mode all:
- do:
indices.create:
index: test-keyword-with-normalizer
body:
settings:
analysis:
normalizer:
lowercase:
type: custom
filter:
- lowercase
index:
mapping.source.mode: synthetic

mappings:
properties:
keyword:
type: keyword
normalizer: lowercase
synthetic_source_keep: all
keyword_with_ignore_above:
type: keyword
normalizer: lowercase
ignore_above: 10
keyword_without_doc_values:
type: keyword
normalizer: lowercase
doc_values: false

- do:
index:
index: test-keyword-with-normalizer
id: 1
body:
keyword: "the Quick Brown Fox jumps over the lazy Dog"
keyword_with_ignore_above: "the Quick Brown Fox jumps over the lazy Dog"
keyword_without_doc_values: "the Quick Brown Fox jumps over the lazy Dog"

- do:
index:
index: test-keyword-with-normalizer
id: 2
body:
keyword: "The five BOXING wizards jump Quickly"
keyword_with_ignore_above: "The five BOXING wizards jump Quickly"
keyword_without_doc_values: "The five BOXING wizards jump Quickly"

- do:
index:
index: test-keyword-with-normalizer
id: 3
body:
keyword: [ "May the FORCE be with You!", "Do or Do Not, There is no Try" ]
keyword_with_ignore_above: [ "May the FORCE be with You!", "Do or Do Not, There is no Try" ]
keyword_without_doc_values: [ "May the FORCE be with You!", "Do or Do Not, There is no Try" ]

- do:
mget:
index: test-keyword-with-normalizer
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1263,11 +1263,9 @@ private String originalName() {

@Override
protected SyntheticSourceSupport syntheticSourceSupport() {
if (hasNormalizer()) {
// NOTE: no matter if we have doc values or not we use fallback synthetic source
// to store the original value whose doc values would be altered by the normalizer
return SyntheticSourceSupport.FALLBACK;
}
/* NOTE: we allow enabling synthetic source on Keyword fields with a Normalizer, even though the returned synthetic value
may not perfectly match the original, pre-normalization, value.
*/

if (fieldType.stored() || hasDocValues) {
return new SyntheticSourceSupport.Native(() -> syntheticFieldLoader(fullPath(), leafName()));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import org.elasticsearch.logsdb.datageneration.FieldType;

import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.function.Function;
Expand All @@ -34,19 +35,21 @@ protected Object expected(Map<String, Object> fieldMapping, Object value, TestCo
public static Object expectedValue(Map<String, Object> fieldMapping, Object value, Params params, TestContext testContext) {
var nullValue = (String) fieldMapping.get("null_value");

var ignoreAbove = fieldMapping.get("ignore_above") == null
int ignoreAbove = fieldMapping.get("ignore_above") == null
? Integer.MAX_VALUE
: ((Number) fieldMapping.get("ignore_above")).intValue();

String normalizerName = (String) fieldMapping.get("normalizer");

if (value == null) {
return convert(null, nullValue, ignoreAbove);
return convert(null, nullValue, ignoreAbove, normalizerName);
}

if (value instanceof String s) {
return convert(s, nullValue, ignoreAbove);
return convert(s, nullValue, ignoreAbove, normalizerName);
}

Function<Stream<String>, Stream<BytesRef>> convertValues = s -> s.map(v -> convert(v, nullValue, ignoreAbove))
Function<Stream<String>, Stream<BytesRef>> convertValues = s -> s.map(v -> convert(v, nullValue, ignoreAbove, normalizerName))
.filter(Objects::nonNull);

boolean hasDocValues = hasDocValues(fieldMapping, true);
Expand All @@ -67,15 +70,21 @@ public static Object expectedValue(Map<String, Object> fieldMapping, Object valu
return maybeFoldList(resultList);
}

private static BytesRef convert(String value, String nullValue, int ignoreAbove) {
private static BytesRef convert(String value, String nullValue, int ignoreAbove, String normalizer) {
if (value == null) {
if (nullValue != null) {
value = nullValue;
} else {
return null;
}
}

if (Objects.equals(normalizer, "lowercase")) {
// hopefully not Turkish...
value = value.toLowerCase(Locale.ROOT);
} else if (normalizer != null) {
// we probably can't get here anyway, since MapperServiceTestCase only initializes the lowercase normalizer
throw new IllegalArgumentException("normalizer [" + normalizer + "] not supported for block loader tests");
}
return value.length() <= ignoreAbove ? new BytesRef(value) : null;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
import org.elasticsearch.index.IndexVersion;
import org.elasticsearch.index.analysis.AnalyzerScope;
import org.elasticsearch.index.analysis.IndexAnalyzers;
import org.elasticsearch.index.analysis.LowercaseNormalizer;
import org.elasticsearch.index.analysis.NameOrDefinition;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.cache.bitset.BitsetFilterCache;
Expand Down Expand Up @@ -132,7 +133,11 @@ protected IndexAnalyzers createIndexAnalyzers(IndexSettings indexSettings) {
}

protected static IndexAnalyzers createIndexAnalyzers() {
return IndexAnalyzers.of(Map.of("default", new NamedAnalyzer("default", AnalyzerScope.INDEX, new StandardAnalyzer())));
return IndexAnalyzers.of(
Map.of("default", new NamedAnalyzer("default", AnalyzerScope.INDEX, new StandardAnalyzer())),
Map.of("lowercase", new NamedAnalyzer("lowercase", AnalyzerScope.INDEX, new LowercaseNormalizer())),
Map.of()
);
}

protected static String randomIndexOptions() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,8 @@ private Supplier<Map<String, Object>> keywordMapping(
if (ESTestCase.randomDouble() <= 0.2) {
injected.put("null_value", ESTestCase.randomAlphaOfLengthBetween(0, 10));
}

// NOCOMMIT - randomize this
injected.put("normalizer", "lowercase");
return injected;
};
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,8 @@ Object convert(Object value, Object nullValue) {
}

class KeywordMatcher extends GenericMappingAwareMatcher {
String normalizer;

KeywordMatcher(
XContentBuilder actualMappings,
Settings.Builder actualSettings,
Expand All @@ -337,10 +339,26 @@ class KeywordMatcher extends GenericMappingAwareMatcher {
super("keyword", actualMappings, actualSettings, expectedMappings, expectedSettings);
}

@Override
public MatchResult match(
List<Object> actual,
List<Object> expected,
Map<String, Object> actualMapping,
Map<String, Object> expectedMapping
) {
this.normalizer = (String) FieldSpecificMatcher.getMappingParameter("normalizer", actualMapping, expectedMapping);
return super.match(actual, expected, actualMapping, expectedMapping);
}

@Override
Object convert(Object value, Object nullValue) {
if (value == null) {
return nullValue;
// Normalization could also be applied to the null value
value = nullValue;
}
if (value instanceof String s && this.normalizer != null && this.normalizer.equals("lowercase")) {
// Currently, tests only support lowercase for normalization.
value = s.toLowerCase(Locale.ROOT);
}

return value;
Expand Down Expand Up @@ -699,10 +717,10 @@ public MatchResult match(
Map<String, Object> actualMapping,
Map<String, Object> expectedMapping
) {
var nullValue = getNullValue(actualMapping, expectedMapping);
Object nullValue = getNullValue(actualMapping, expectedMapping);

var expectedNormalized = normalize(expected, nullValue);
var actualNormalized = normalize(actual, nullValue);
Set<Object> expectedNormalized = normalize(expected, nullValue);
Set<Object> actualNormalized = normalize(actual, nullValue);

return actualNormalized.equals(expectedNormalized)
? MatchResult.match()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,10 +76,10 @@ public MatchResult match() {
var sortedAndFlattenedExpected = expected.stream().map(s -> SourceTransforms.normalize(s, mappingLookup)).toList();

for (int i = 0; i < sortedAndFlattenedActual.size(); i++) {
var actual = sortedAndFlattenedActual.get(i);
var expected = sortedAndFlattenedExpected.get(i);
Map<String, List<Object>> actual = sortedAndFlattenedActual.get(i);
Map<String, List<Object>> expected = sortedAndFlattenedExpected.get(i);

var result = compareSource(actual, expected);
MatchResult result = compareSource(actual, expected);
if (result.isMatch() == false) {
var message = "Source matching failed at document id [" + i + "]. " + result.getMessage();
return MatchResult.noMatch(message);
Expand All @@ -90,23 +90,26 @@ public MatchResult match() {
}

private MatchResult compareSource(Map<String, List<Object>> actual, Map<String, List<Object>> expected) {
for (var expectedFieldEntry : expected.entrySet()) {
var name = expectedFieldEntry.getKey();
for (Map.Entry<String, List<Object>> expectedFieldEntry : expected.entrySet()) {
String name = expectedFieldEntry.getKey();

var actualValues = actual.get(name);
var expectedValues = expectedFieldEntry.getValue();
List<Object> actualValues = actual.get(name);
List<Object> expectedValues = expectedFieldEntry.getValue();

var matchIncludingFieldSpecificMatchers = matchWithFieldSpecificMatcher(name, actualValues, expectedValues);
MatchResult matchIncludingFieldSpecificMatchers = matchWithFieldSpecificMatcher(name, actualValues, expectedValues);
if (matchIncludingFieldSpecificMatchers.isMatch() == false) {
var message = "Source documents don't match for field [" + name + "]: " + matchIncludingFieldSpecificMatchers.getMessage();
String message = "Source documents don't match for field ["
+ name
+ "]: "
+ matchIncludingFieldSpecificMatchers.getMessage();
return MatchResult.noMatch(message);
}
}
return MatchResult.match();
}

private MatchResult matchWithFieldSpecificMatcher(String fieldName, List<Object> actualValues, List<Object> expectedValues) {
var actualFieldMapping = actualNormalizedMapping.get(fieldName);
MappingTransforms.FieldMapping actualFieldMapping = actualNormalizedMapping.get(fieldName);
if (actualFieldMapping == null) {
if (expectedNormalizedMapping.get(fieldName) != null
// Special cases due to fields being defined in default mapping for logsdb index mode
Expand All @@ -126,7 +129,7 @@ private MatchResult matchWithFieldSpecificMatcher(String fieldName, List<Object>
throw new IllegalStateException("Field type is missing from leaf field Leaf field [" + fieldName + "] mapping parameters");
}

var expectedFieldMapping = expectedNormalizedMapping.get(fieldName);
MappingTransforms.FieldMapping expectedFieldMapping = expectedNormalizedMapping.get(fieldName);
if (expectedFieldMapping == null) {
throw new IllegalStateException("Leaf field [" + fieldName + "] is present in actual mapping but absent in expected mapping");
} else {
Expand All @@ -144,7 +147,7 @@ private MatchResult matchWithFieldSpecificMatcher(String fieldName, List<Object>
}
}

var fieldSpecificMatcher = fieldSpecificMatchers.get(actualFieldType);
FieldSpecificMatcher fieldSpecificMatcher = fieldSpecificMatchers.get(actualFieldType);
assert fieldSpecificMatcher != null : "Missing matcher for field type [" + actualFieldType + "]";

return fieldSpecificMatcher.match(
Expand Down
Loading