Skip to content

Commit 9635838

Browse files
committed
OAK-11568 Elastic: improved compatibility for aggregation definitions
1 parent 057b30a commit 9635838

File tree

9 files changed

+99
-83
lines changed

9 files changed

+99
-83
lines changed

oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexProviderService.java

Lines changed: 2 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,6 @@
1616
*/
1717
package org.apache.jackrabbit.oak.plugins.index.elastic;
1818

19-
import org.apache.commons.io.FilenameUtils;
20-
import org.apache.jackrabbit.oak.api.jmx.CacheStatsMBean;
21-
import org.apache.jackrabbit.oak.cache.CacheStats;
2219
import org.apache.jackrabbit.oak.commons.IOUtils;
2320
import org.apache.jackrabbit.oak.osgi.OsgiWhiteboard;
2421
import org.apache.jackrabbit.oak.plugins.index.AsyncIndexInfoService;
@@ -50,13 +47,11 @@
5047
import org.slf4j.Logger;
5148
import org.slf4j.LoggerFactory;
5249

53-
import java.io.File;
5450
import java.util.ArrayList;
5551
import java.util.Dictionary;
5652
import java.util.Hashtable;
5753
import java.util.List;
5854

59-
import static org.apache.commons.io.FileUtils.ONE_MB;
6055
import static org.apache.jackrabbit.oak.spi.whiteboard.WhiteboardUtils.registerMBean;
6156
import static org.apache.jackrabbit.oak.spi.whiteboard.WhiteboardUtils.scheduleWithFixedDelay;
6257

@@ -130,8 +125,6 @@ public class ElasticIndexProviderService {
130125

131126
private static final Logger LOG = LoggerFactory.getLogger(ElasticIndexProviderService.class);
132127

133-
private static final String REPOSITORY_HOME = "repository.home";
134-
135128
@Reference
136129
private StatisticsProvider statisticsProvider;
137130

@@ -149,11 +142,10 @@ public class ElasticIndexProviderService {
149142

150143
private ExtractedTextCache extractedTextCache;
151144

152-
private final List<ServiceRegistration> regs = new ArrayList<>();
145+
private final List<ServiceRegistration<?>> regs = new ArrayList<>();
153146
private final List<Registration> oakRegs = new ArrayList<>();
154147

155148
private Whiteboard whiteboard;
156-
private File textExtractionDir;
157149

158150
private ElasticConnection elasticConnection;
159151
private ElasticMetricHandler metricHandler;
@@ -200,7 +192,7 @@ private void activate(BundleContext bundleContext, Config config) {
200192

201193
@Deactivate
202194
private void deactivate() {
203-
for (ServiceRegistration reg : regs) {
195+
for (ServiceRegistration<?> reg : regs) {
204196
reg.unregister();
205197
}
206198

@@ -242,63 +234,6 @@ private void registerIndexEditor(BundleContext bundleContext) {
242234
Dictionary<String, Object> props = new Hashtable<>();
243235
props.put("type", ElasticIndexDefinition.TYPE_ELASTICSEARCH);
244236
regs.add(bundleContext.registerService(IndexEditorProvider.class.getName(), editorProvider, props));
245-
// oakRegs.add(registerMBean(whiteboard,
246-
// TextExtractionStatsMBean.class,
247-
// editorProvider.getExtractedTextCache().getStatsMBean(),
248-
// TextExtractionStatsMBean.TYPE,
249-
// "TextExtraction statistics"));
250-
}
251-
252-
private void initializeExtractedTextCache(final Config config, StatisticsProvider statisticsProvider) {
253-
254-
extractedTextCache = new ExtractedTextCache(
255-
config.extractedTextCacheSizeInMB() * ONE_MB,
256-
config.extractedTextCacheExpiryInSecs(),
257-
config.alwaysUsePreExtractedCache(),
258-
textExtractionDir,
259-
statisticsProvider);
260-
if (extractedTextProvider != null) {
261-
registerExtractedTextProvider(extractedTextProvider);
262-
}
263-
CacheStats stats = extractedTextCache.getCacheStats();
264-
if (stats != null) {
265-
oakRegs.add(registerMBean(whiteboard,
266-
CacheStatsMBean.class, stats,
267-
CacheStatsMBean.TYPE, stats.getName()));
268-
LOG.info("Extracted text caching enabled with maxSize {} MB, expiry time {} secs",
269-
config.extractedTextCacheSizeInMB(), config.extractedTextCacheExpiryInSecs());
270-
}
271-
}
272-
273-
private void initializeTextExtractionDir(BundleContext bundleContext, Config config) {
274-
String textExtractionDir = config.localTextExtractionDir();
275-
if (textExtractionDir.trim().isEmpty()) {
276-
String repoHome = bundleContext.getProperty(REPOSITORY_HOME);
277-
if (repoHome != null) {
278-
textExtractionDir = FilenameUtils.concat(repoHome, "index");
279-
}
280-
}
281-
282-
if (textExtractionDir == null) {
283-
throw new IllegalStateException(String.format("Text extraction directory cannot be determined as neither " +
284-
"directory path [%s] nor repository home [%s] defined", PROP_LOCAL_TEXT_EXTRACTION_DIR, REPOSITORY_HOME));
285-
}
286-
287-
this.textExtractionDir = new File(textExtractionDir);
288-
}
289-
290-
private void registerExtractedTextProvider(PreExtractedTextProvider provider) {
291-
if (extractedTextCache != null) {
292-
if (provider != null) {
293-
String usage = extractedTextCache.isAlwaysUsePreExtractedCache() ?
294-
"always" : "only during reindexing phase";
295-
LOG.info("Registering PreExtractedTextProvider {} with extracted text cache. " +
296-
"It would be used {}", provider, usage);
297-
} else {
298-
LOG.info("Unregistering PreExtractedTextProvider with extracted text cache");
299-
}
300-
extractedTextCache.setExtractedTextProvider(provider);
301-
}
302237
}
303238

304239
private ElasticConnection getElasticConnection(Config contextConfig) {

oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticCustomAnalyzer.java

Lines changed: 54 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@
5555
import java.nio.charset.StandardCharsets;
5656
import java.util.Arrays;
5757
import java.util.Collections;
58+
import java.util.HashMap;
5859
import java.util.LinkedHashMap;
5960
import java.util.List;
6061
import java.util.Map;
@@ -97,7 +98,13 @@ public static IndexSettingsAnalysis.Builder buildCustomAnalyzers(NodeState state
9798
NodeState defaultAnalyzer = state.getChildNode(FulltextIndexConstants.ANL_DEFAULT);
9899
if (defaultAnalyzer.exists()) {
99100
IndexSettingsAnalysis.Builder builder = new IndexSettingsAnalysis.Builder();
100-
Map<String, Object> analyzer = convertNodeState(defaultAnalyzer);
101+
Map<String, Object> analyzer;
102+
try {
103+
analyzer = convertNodeState(defaultAnalyzer);
104+
} catch (IOException e) {
105+
LOG.warn("Can not load analyzer; using an empty configuration", e);
106+
analyzer = Map.of();
107+
}
101108
String builtIn = defaultAnalyzer.getString(FulltextIndexConstants.ANL_CLASS);
102109
if (builtIn == null) {
103110
builtIn = defaultAnalyzer.getString(FulltextIndexConstants.ANL_NAME);
@@ -107,11 +114,14 @@ public static IndexSettingsAnalysis.Builder buildCustomAnalyzers(NodeState state
107114

108115
// content params, usually stop words
109116
for (ChildNodeEntry nodeEntry : defaultAnalyzer.getChildNodeEntries()) {
117+
List<String> list;
110118
try {
111-
analyzer.put(normalize(nodeEntry.getName()), loadContent(nodeEntry.getNodeState(), nodeEntry.getName(), NOOP_TRANSFORMATION));
119+
list = loadContent(nodeEntry.getNodeState(), nodeEntry.getName(), NOOP_TRANSFORMATION);
112120
} catch (IOException e) {
113-
throw new IllegalStateException("Unable to load content for node entry " + nodeEntry.getName(), e);
121+
LOG.warn("Unable to load analyzer content for entry '" + nodeEntry.getName() + "'; using empty list", e);
122+
list = List.of();
114123
}
124+
analyzer.put(normalize(nodeEntry.getName()), list);
115125
}
116126

117127
builder.analyzer(analyzerName, new Analyzer(null, JsonData.of(analyzer)));
@@ -145,8 +155,22 @@ public static IndexSettingsAnalysis.Builder buildCustomAnalyzers(NodeState state
145155

146156
@NotNull
147157
private static TokenizerDefinition loadTokenizer(NodeState state) {
148-
String name = normalize(Objects.requireNonNull(state.getString(FulltextIndexConstants.ANL_NAME)));
149-
Map<String, Object> args = convertNodeState(state);
158+
String name;
159+
Map<String, Object> args;
160+
if (!state.exists()) {
161+
LOG.warn("No tokenizer specified; the standard with an empty configuration");
162+
name = "Standard";
163+
args = new HashMap<String, Object>();
164+
} else {
165+
name = Objects.requireNonNull(state.getString(FulltextIndexConstants.ANL_NAME));
166+
try {
167+
args = convertNodeState(state);
168+
} catch (IOException e) {
169+
LOG.warn("Can not load tokenizer; using an empty configuration", e);
170+
args = new HashMap<String, Object>();
171+
}
172+
}
173+
name = normalize(name);
150174
args.put(ANALYZER_TYPE, name);
151175
return new TokenizerDefinition(name, JsonData.of(args));
152176
}
@@ -228,7 +252,12 @@ private static <FD> LinkedHashMap<String, FD> loadFilters(NodeState state,
228252
}
229253

230254
private static List<String> loadContent(NodeState file, String name, ContentTransformer transformer) throws IOException {
231-
Blob blob = ConfigUtil.getBlob(file, name);
255+
Blob blob;
256+
try {
257+
blob = ConfigUtil.getBlob(file, name);
258+
} catch (IllegalArgumentException | IllegalStateException e) {
259+
throw new IOException("Could not load " + name, e);
260+
}
232261
try (Reader content = new InputStreamReader(Objects.requireNonNull(blob).getNewStream(), StandardCharsets.UTF_8)) {
233262
try (BufferedReader br = new BufferedReader(content)) {
234263
return br.lines()
@@ -264,11 +293,25 @@ private static String normalize(String value) {
264293
return name;
265294
}
266295

267-
private static Map<String, Object> convertNodeState(NodeState state) {
268-
return convertNodeState(state, List.of(), List.of());
296+
private static Map<String, Object> convertNodeState(NodeState state) throws IOException {
297+
try {
298+
return convertNodeState(state, List.of(), List.of());
299+
} catch (IllegalStateException e) {
300+
// convert runtime exception back to checked exception
301+
throw new IOException("Can not convert", e);
302+
}
269303
}
270304

271-
private static Map<String, Object> convertNodeState(NodeState state, List<ParameterTransformer> transformers, List<String> preloadedContent) {
305+
/**
306+
* Read analyzer configuration.
307+
*
308+
* @param state the node state
309+
* @param transformers
310+
* @param preloadedContent
311+
* @return
312+
* @throws IllegalStateException
313+
*/
314+
private static Map<String, Object> convertNodeState(NodeState state, List<ParameterTransformer> transformers, List<String> preloadedContent) throws IllegalStateException {
272315
Map<String, Object> luceneParams = StreamSupport.stream(Spliterators.spliteratorUnknownSize(state.getProperties().iterator(), Spliterator.ORDERED), false)
273316
.filter(ElasticCustomAnalyzer::isPropertySupported)
274317
.collect(Collectors.toMap(PropertyState::getName, ps -> {
@@ -280,6 +323,8 @@ private static Map<String, Object> convertNodeState(NodeState state, List<Parame
280323
return loadContent(state.getChildNode(v.trim()), v.trim(),
281324
CONTENT_TRANSFORMERS.getOrDefault(ps.getName(), NOOP_TRANSFORMATION)).stream();
282325
} catch (IOException e) {
326+
// convert checked exception to runtime exception to runtime exception,
327+
// because the stream API doesn't support checked exceptions
283328
throw new IllegalStateException(e);
284329
}
285330
}).collect(Collectors.toList()));

oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticDocument.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ void addFulltextRelative(String path, String value) {
9797
map -> {
9898
Object existingValue = map.get(ElasticIndexHelper.DYNAMIC_PROPERTY_VALUE);
9999
if (existingValue instanceof Set) {
100+
@SuppressWarnings("unchecked")
100101
Set<Object> existingSet = (Set<Object>) existingValue;
101102
existingSet.add(value);
102103
} else {
@@ -134,6 +135,7 @@ void addProperty(String fieldName, Object value) {
134135
if (existingValue == null) {
135136
finalValue = value;
136137
} else if (existingValue instanceof Set) {
138+
@SuppressWarnings("unchecked")
137139
Set<Object> existingSet = (Set<Object>) existingValue;
138140
existingSet.add(value);
139141
finalValue = existingSet;

oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexEditorContext.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ class ElasticIndexEditorContext extends FulltextIndexEditorContext<ElasticDocume
4040
}
4141

4242
@Override
43-
public IndexDefinition.Builder newDefinitionBuilder() {
43+
public ElasticIndexDefinition.Builder newDefinitionBuilder() {
4444
return new ElasticIndexDefinition.Builder(((ElasticIndexDefinition) definition).getIndexPrefix());
4545
}
4646

oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticInferenceTest.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -145,12 +145,13 @@ public void hybridSearch() throws Exception {
145145
for (String path : paths) {
146146
URL json = this.getClass().getResource("/inference" + path + ".json");
147147
if (json != null) {
148-
Map<String, Object> map = mapper.readValue(json, Map.class);
148+
@SuppressWarnings("unchecked")
149+
Map<String, Collection<Double>> map = mapper.readValue(json, Map.class);
149150
ObjectNode updateDoc = mapper.createObjectNode();
150151
ObjectNode inferenceNode = updateDoc.putObject(ElasticIndexDefinition.INFERENCE);
151152
ArrayNode embeddingsNode = inferenceNode.putObject("embeddings").putArray("value");
152153
inferenceNode.putObject("metadata").put("updatedAt", Instant.now().toEpochMilli());
153-
for (Double d : (Collection<Double>) map.get("embedding")) {
154+
for (Double d : map.get("embedding")) {
154155
embeddingsNode.add(d);
155156
}
156157
updateDocument(index, path, updateDoc);

oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticPerfTest.java

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -167,12 +167,11 @@ private void createTestData(Supplier<String> extraContentSupplier) throws Except
167167

168168
private void testQuery(String query, String language) throws Exception {
169169
Result result = executeQuery(query, language, NO_BINDINGS);
170-
Iterable<ResultRow> it = (Iterable<ResultRow>) result.getRows();
171-
Iterator<ResultRow> iterator = it.iterator();
170+
Iterator<? extends ResultRow> iterator = result.getRows().iterator();
172171
long start = LOG_PERF.startForInfoLog("Getting result rows");
173172
int i = 0;
174173
while (iterator.hasNext()) {
175-
ResultRow row = iterator.next();
174+
iterator.next();
176175
i++;
177176
}
178177
LOG_PERF.end(start, -1,-1, "{} Results fetched", i);

oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticTestServer.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ public static synchronized ElasticsearchContainer getESTestServer() {
6262
return CONTAINER;
6363
}
6464

65+
@SuppressWarnings("resource")
6566
private synchronized void setup() {
6667
String esDockerImageVersion = ELASTIC_DOCKER_IMAGE_VERSION != null ? ELASTIC_DOCKER_IMAGE_VERSION : Version.VERSION.toString();
6768
LOG.info("Elasticsearch test Docker image version: {}.", esDockerImageVersion);

oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelperTest.java

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,39 @@ public void multiRulesWithSamePropertyNamesDifferentTypes() {
9595
ElasticIndexHelper.createIndexRequest("prefix.path", definition);
9696
}
9797

98+
@Test
99+
public void analyzerWithEmptyTokenizer() {
100+
IndexDefinitionBuilder builder = new ElasticIndexDefinitionBuilder();
101+
IndexDefinitionBuilder.IndexRule indexRule = builder.indexRule("idxRule");
102+
indexRule.property("foo").type("String").useInSimilarity();
103+
104+
Tree analyzer = builder.getBuilderTree().addChild("analyzers");
105+
Tree defaultAnalyzer = analyzer.addChild("default");
106+
defaultAnalyzer.setProperty(FulltextIndexConstants.ANL_CLASS, "org.apache.lucene.analysis.en.EnglishAnalyzer");
107+
defaultAnalyzer.addChild("tokenizer");
108+
defaultAnalyzer.addChild("filter");
109+
110+
NodeState nodeState = builder.build();
111+
ElasticIndexDefinition definition =
112+
new ElasticIndexDefinition(nodeState, nodeState, "path", "prefix");
113+
ElasticIndexHelper.createIndexRequest("prefix.path", definition);
114+
}
115+
116+
@Test
117+
public void analyzerWithEmptyDefault() {
118+
IndexDefinitionBuilder builder = new ElasticIndexDefinitionBuilder();
119+
IndexDefinitionBuilder.IndexRule indexRule = builder.indexRule("idxRule");
120+
indexRule.property("foo").type("String").useInSimilarity();
121+
122+
Tree analyzer = builder.getBuilderTree().addChild("analyzers");
123+
analyzer.addChild("default");
124+
125+
NodeState nodeState = builder.build();
126+
ElasticIndexDefinition definition =
127+
new ElasticIndexDefinition(nodeState, nodeState, "path", "prefix");
128+
ElasticIndexHelper.createIndexRequest("prefix.path", definition);
129+
}
130+
98131
@Test()
99132
public void indexSettingsAreCorrectlySet() {
100133
IndexDefinitionBuilder builder = new ElasticIndexDefinitionBuilder();

oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/util/ConfigUtil.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ public static Iterable<String> getMixinNames(NodeState nodeState) {
110110
* the jcr:content/@jcr:data property to get the binary content
111111
*/
112112
@Nullable
113-
public static Blob getBlob(NodeState state, String resourceName){
113+
public static Blob getBlob(NodeState state, String resourceName) {
114114
NodeState contentNode = state.getChildNode(JcrConstants.JCR_CONTENT);
115115
checkArgument(contentNode.exists(), "Was expecting to find jcr:content node to read resource %s", resourceName);
116116
PropertyState property = contentNode.getProperty(JcrConstants.JCR_DATA);

0 commit comments

Comments
 (0)