Skip to content

Commit 8adcc05

Browse files
committed
Fix tests.
Improve logging of statistics. Make names of Oak configuration properties consistent with names used by Bulk ingester.
1 parent 650e279 commit 8adcc05

File tree

3 files changed

+48
-15
lines changed

3 files changed

+48
-15
lines changed

oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticBulkProcessorHandler.java

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import org.apache.jackrabbit.oak.api.PropertyState;
2828
import org.apache.jackrabbit.oak.api.Type;
2929
import org.apache.jackrabbit.oak.plugins.index.ConfigHelper;
30+
import org.apache.jackrabbit.oak.plugins.index.FormattingUtils;
3031
import org.apache.jackrabbit.oak.plugins.index.elastic.ElasticConnection;
3132
import org.apache.jackrabbit.oak.plugins.index.elastic.ElasticIndexDefinition;
3233
import org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition;
@@ -57,6 +58,9 @@ public class ElasticBulkProcessorHandler {
5758

5859
private static final Logger LOG = LoggerFactory.getLogger(ElasticBulkProcessorHandler.class);
5960

61+
/**
62+
* Keeps information about an index that is being written by the bulk processor
63+
*/
6064
static class IndexInfo {
6165
public final String indexName;
6266
public final ElasticIndexDefinition indexDefinition;
@@ -82,6 +86,9 @@ static class IndexInfo {
8286
}
8387
}
8488

89+
/**
90+
* Context object associated with each operation passed to the bulk processor
91+
*/
8592
public final static class OperationContext {
8693
final IndexInfo indexInfo;
8794
final String documentId;
@@ -106,22 +113,26 @@ public String toString() {
106113
public static final int BULK_SIZE_BYTES_DEFAULT = 8 * 1024 * 1024; // 8MB
107114
public static final String BULK_FLUSH_INTERVAL_MS_PROP = "oak.indexer.elastic.bulkProcessor.bulkFlushIntervalMs";
108115
public static final int BULK_FLUSH_INTERVAL_MS_DEFAULT = 5000;
109-
public static final String BULK_PROCESSOR_CONCURRENCY_PROP = "oak.indexer.elastic.bulkProcessor.concurrency";
116+
public static final String BULK_MAX_CONCURRENT_REQUESTS_PROP = "oak.indexer.elastic.bulkProcessor.maxConcurrentRequests";
117+
private static final int BULK_MAX_CONCURRENT_REQUESTS_DEFAULT = 1;
110118
// when true, fails indexing in case of bulk failures
111119
public static final String FAIL_ON_ERROR_PROP = "oak.indexer.elastic.bulkProcessor.failOnError";
112120
public static final boolean FAIL_ON_ERROR_DEFAULT = true;
113121

114122
private static final String SYNC_MODE_PROPERTY = "sync-mode";
115123
private static final String SYNC_RT_MODE = "rt";
124+
private static final int MAX_SUPPRESSED_ERROR_CAUSES = 50;
116125

117126
private final int FAILED_DOC_COUNT_FOR_STATUS_NODE = ConfigHelper.getSystemPropertyAsInt("oak.failedDocStatusLimit", 10000);
118127
private final int BULK_MAX_OPERATIONS = ConfigHelper.getSystemPropertyAsInt(BULK_ACTIONS_PROP, BULK_ACTIONS_DEFAULT);
119128
private final int BULK_MAX_SIZE_BYTES = ConfigHelper.getSystemPropertyAsInt(BULK_SIZE_BYTES_PROP, BULK_SIZE_BYTES_DEFAULT);
120129
private final int BULK_FLUSH_INTERVAL_MS = ConfigHelper.getSystemPropertyAsInt(BULK_FLUSH_INTERVAL_MS_PROP, BULK_FLUSH_INTERVAL_MS_DEFAULT);
121-
private final int BULK_PROCESSOR_CONCURRENCY = ConfigHelper.getSystemPropertyAsInt(BULK_PROCESSOR_CONCURRENCY_PROP, 1);
130+
private final int BULK_MAX_CONCURRENT_REQUESTS = ConfigHelper.getSystemPropertyAsInt(BULK_MAX_CONCURRENT_REQUESTS_PROP, BULK_MAX_CONCURRENT_REQUESTS_DEFAULT);
122131
private final boolean FAIL_ON_ERROR = ConfigHelper.getSystemPropertyAsBoolean(FAIL_ON_ERROR_PROP, FAIL_ON_ERROR_DEFAULT);
132+
123133
private final ElasticConnection elasticConnection;
124134
private final BulkIngester<OperationContext> bulkIngester;
135+
125136
// Used to keep track of the sequence number of the batches that are currently being processed.
126137
// This is used to wait until all operations for a writer are processed before closing it.
127138
private final ReentrantLock lock = new ReentrantLock();
@@ -138,7 +149,7 @@ public ElasticBulkProcessorHandler(@NotNull ElasticConnection elasticConnection)
138149
// BulkIngester does not support retry policies. Some retries though are already implemented in the transport layer.
139150
// More details here: https://github.com/elastic/elasticsearch-java/issues/478
140151
LOG.info("Creating bulk ingester [maxActions: {}, maxSizeBytes: {} flushInterval {}, concurrency {}]",
141-
BULK_MAX_OPERATIONS, BULK_MAX_SIZE_BYTES, BULK_FLUSH_INTERVAL_MS, BULK_PROCESSOR_CONCURRENCY_PROP);
152+
BULK_MAX_OPERATIONS, BULK_MAX_SIZE_BYTES, BULK_FLUSH_INTERVAL_MS, BULK_MAX_CONCURRENT_REQUESTS_PROP);
142153
this.bulkIngester = BulkIngester.of(b -> {
143154
b = b.client(elasticConnection.getAsyncClient())
144155
.listener(new OakBulkListener());
@@ -151,8 +162,8 @@ public ElasticBulkProcessorHandler(@NotNull ElasticConnection elasticConnection)
151162
if (BULK_FLUSH_INTERVAL_MS > 0) {
152163
b = b.flushInterval(BULK_FLUSH_INTERVAL_MS, TimeUnit.MILLISECONDS);
153164
}
154-
if (BULK_PROCESSOR_CONCURRENCY > 0) {
155-
b = b.maxConcurrentRequests(BULK_PROCESSOR_CONCURRENCY);
165+
if (BULK_MAX_CONCURRENT_REQUESTS > 0) {
166+
b = b.maxConcurrentRequests(BULK_MAX_CONCURRENT_REQUESTS);
156167
}
157168
return b;
158169
});
@@ -368,7 +379,9 @@ public void close() throws IOException {
368379

369380
private void checkFailuresForIndex(IndexInfo indexInfo) throws IOException {
370381
if (!indexInfo.suppressedErrorCauses.isEmpty()) {
371-
IOException ioe = new IOException("Exception while indexing. See suppressed for details");
382+
String overflowMessage = (indexInfo.suppressedErrorCauses.size() >= MAX_SUPPRESSED_ERROR_CAUSES) ?
383+
". (Too many failed operations in last bulk request, including only " + indexInfo.suppressedErrorCauses.size() + " errors)" : "";
384+
IOException ioe = new IOException("Exception while indexing. See suppressed for details" + overflowMessage);
372385
indexInfo.suppressedErrorCauses.stream().map(ec -> new IllegalStateException(ec.reason())).forEach(ioe::addSuppressed);
373386
throw ioe;
374387
}
@@ -383,10 +396,9 @@ private IndexInfo getIndexInfoOrFail(String indexName) {
383396
}
384397

385398
private long totalWaitTimeNanos = 0;
399+
386400
private void add(BulkOperation operation, OperationContext context) throws IOException {
387-
// if (totalOperations % 4096 == 0) {
388-
// LOG.info("Adding operation: [{}]: {}", context.indexInfo.indexName, context.documentId);
389-
// }
401+
LOG.info("Adding operation: [{}]: {}", context.indexInfo.indexName, context.documentId);
390402
// fail fast: we don't want to wait until the processor gets closed to fail
391403
checkFailuresForIndex(context.indexInfo);
392404
long start = System.nanoTime();
@@ -397,7 +409,8 @@ private void add(BulkOperation operation, OperationContext context) throws IOExc
397409

398410
public void printStatistics() {
399411
LOG.info("BulkIngester statistics: [operationsCount: {}, requestCount: {}, avgOperationsPerBulk: {}, operationContentionsCount: {}, requestContentionsCount: {}, waitTimeMs: {}]",
400-
bulkIngester.operationsCount(), bulkIngester.requestCount(), bulkIngester.operationsCount()/bulkIngester.requestCount(),
412+
bulkIngester.operationsCount(), bulkIngester.requestCount(),
413+
FormattingUtils.safeComputeAverage(bulkIngester.operationsCount(), bulkIngester.requestCount()),
401414
bulkIngester.operationContentionsCount(), bulkIngester.requestContentionsCount(),
402415
TimeUnit.NANOSECONDS.toMillis(totalWaitTimeNanos));
403416
}
@@ -485,7 +498,7 @@ public void afterBulk(long executionId, BulkRequest request, List<OperationConte
485498
// However, this is not performance critical so we can use coarse grained locking
486499
k -> new FailedDocSetTracker(indexInfo.definitionBuilder));
487500

488-
if (FAIL_ON_ERROR) {
501+
if (FAIL_ON_ERROR && indexInfo.suppressedErrorCauses.size() < MAX_SUPPRESSED_ERROR_CAUSES) {
489502
indexInfo.suppressedErrorCauses.add(item.error());
490503
}
491504
String documentId = contexts.get(i).documentId;

oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticPropertyIndexTest.java

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,16 @@
2121
import org.apache.jackrabbit.oak.api.CommitFailedException;
2222
import org.apache.jackrabbit.oak.api.Tree;
2323
import org.apache.jackrabbit.oak.commons.junit.LogCustomizer;
24+
import org.apache.jackrabbit.oak.commons.junit.TemporarySystemProperty;
2425
import org.apache.jackrabbit.oak.plugins.index.elastic.index.ElasticBulkProcessorHandler;
2526
import org.apache.jackrabbit.oak.plugins.index.elastic.util.ElasticIndexDefinitionBuilder;
2627
import org.apache.jackrabbit.oak.plugins.index.elastic.util.ElasticIndexUtils;
2728
import org.apache.jackrabbit.oak.plugins.index.search.FulltextIndexConstants;
2829
import org.apache.jackrabbit.oak.plugins.index.search.util.IndexDefinitionBuilder;
2930
import org.apache.jackrabbit.oak.plugins.index.search.util.IndexDefinitionBuilder.PropertyRule;
3031
import org.junit.Assert;
32+
import org.junit.Before;
33+
import org.junit.Rule;
3134
import org.junit.Test;
3235

3336
import java.io.IOException;
@@ -38,6 +41,20 @@
3841
import static org.hamcrest.MatcherAssert.assertThat;
3942

4043
public class ElasticPropertyIndexTest extends ElasticAbstractQueryTest {
44+
@Rule
45+
public TemporarySystemProperty temporarySystemProperty = new TemporarySystemProperty();
46+
47+
// Tests are hardcoded for these values
48+
private final static int BULK_ACTIONS_TEST = 250;
49+
private final static int BULK_SIZE_BYTES_TEST = 1024 * 1024;
50+
51+
@Before
52+
public void before() throws Exception {
53+
// Use a low value for the tests
54+
System.setProperty(ElasticBulkProcessorHandler.BULK_ACTIONS_PROP, Integer.toString(BULK_ACTIONS_TEST));
55+
System.setProperty(ElasticBulkProcessorHandler.BULK_SIZE_BYTES_PROP, Integer.toString(BULK_SIZE_BYTES_TEST));
56+
super.before();
57+
}
4158

4259
@Test
4360
public void bulkProcessorEventsFlushLimit() throws Exception {
@@ -80,7 +97,7 @@ public void bulkProcessorSizeFlushLimit() throws Exception {
8097
customLogger.starting();
8198
/*
8299
Below are the conditions to flush data from bulk processor.
83-
1. Based on events by default 250 events.
100+
1. Based on events reaching 250. (BULK_ACTIONS_TEST)
84101
2. Based on size of estimated bulk request size.
85102
3. When index writer is closed.
86103
To trigger flush on bulk request size, we will load large documents so that
@@ -89,7 +106,7 @@ public void bulkProcessorSizeFlushLimit() throws Exception {
89106
setIndex("test1", createIndex("propa", "propb"));
90107
int docSize = 1024 * 16;
91108
// +1 at end leads to bulk size breach, leading to two bulkIds.
92-
int docCountBreachingBulkSize = (ElasticBulkProcessorHandler.BULK_SIZE_BYTES_DEFAULT / docSize) + 1;
109+
int docCountBreachingBulkSize = (BULK_SIZE_BYTES_TEST / docSize) + 1;
93110
// 250 is the default flush limit for bulk processor
94111
Assert.assertTrue(docCountBreachingBulkSize < 250);
95112
String random = RandomStringUtils.insecure().next(docSize, true, true);

oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticRegexPropertyIndexTest.java

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import static org.junit.Assert.assertTrue;
2222
import static org.junit.Assert.fail;
2323

24+
import java.io.IOException;
2425
import java.util.List;
2526

2627
import org.apache.jackrabbit.oak.api.CommitFailedException;
@@ -121,8 +122,10 @@ public void regexPropertyWithoutFlattened() throws Exception {
121122
root.commit();
122123
fail();
123124
} catch (CommitFailedException e) {
124-
String msg = e.getMessage();
125-
assertTrue(msg, msg.contains("Failed to index the node"));
125+
Throwable cause = e.getCause();
126+
String msg = cause.getMessage();
127+
assertTrue("Unexpected exception type. Expected IOException. Was " + cause, cause instanceof IOException);
128+
assertTrue(msg, msg.contains("Exception while indexing."));
126129
// Typically, the root cause is "Limit of total fields [1000] has been exceeded"
127130
// but something this is suppressed, and so we can not have an assertion on it
128131
}

0 commit comments

Comments
 (0)