Skip to content

Commit aa69293

Browse files
committed
Merge remote-tracking branch 'upstream/trunk' into OAK-11438
2 parents cb54dd8 + 080a08b commit aa69293

File tree

5 files changed

+132
-4
lines changed

5 files changed

+132
-4
lines changed

oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/progress/IndexingProgressReporter.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ public void setMessagePrefix(String messagePrefix) {
9191
}
9292

9393
public void traversedNode(PathSource pathSource) throws CommitFailedException {
94-
if (++traversalCount % 100_000 == 0) {
94+
if (++traversalCount % 10_000 == 0) {
9595
double rate = traversalRateEstimator.getNodesTraversedPerSecond();
9696
String formattedRate = String.format("%1.2f nodes/s, %1.2f nodes/hr", rate, rate * 3600);
9797
String estimate = estimatePendingTraversal(rate);

oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/writer/DefaultIndexWriter.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,7 @@ private boolean updateSuggester(Analyzer analyzer, Calendar currentTime) throws
216216
* Checks if last suggestion build time was done sufficiently in the past AND that there were non-zero indexedNodes
217217
* stored in the last run. Note, if index is updated only to rebuild suggestions, even then we update indexedNodes,
218218
* which would be zero in case it was a forced update of suggestions.
219+
*
219220
* @return is suggest dict should be updated
220221
*/
221222
private boolean shouldUpdateSuggestions(Calendar currentTime) {

oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/writer/LuceneIndexWriterConfig.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,10 +39,10 @@ public class LuceneIndexWriterConfig {
3939
public final static String RAM_PER_THREAD_HARD_LIMIT_MB_KEY = "oak.index.lucene.ramPerThreadHardLimitMB";
4040

4141
private final int maxBufferedDeleteTerms = SystemPropertySupplier.create(
42-
MAX_BUFFERED_DELETE_TERMS_KEY, IndexWriterConfig.DISABLE_AUTO_FLUSH)
42+
MAX_BUFFERED_DELETE_TERMS_KEY, IndexWriterConfig.DISABLE_AUTO_FLUSH)
4343
.loggingTo(LOG).get();
4444
private final int ramPerThreadHardLimitMB = SystemPropertySupplier.create(
45-
RAM_PER_THREAD_HARD_LIMIT_MB_KEY, IndexWriterConfig.DEFAULT_RAM_PER_THREAD_HARD_LIMIT_MB)
45+
RAM_PER_THREAD_HARD_LIMIT_MB_KEY, IndexWriterConfig.DEFAULT_RAM_PER_THREAD_HARD_LIMIT_MB)
4646
.loggingTo(LOG).get();
4747

4848
private final double ramBufferSizeMB;

oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/pipelined/PipelinedTreeStoreTask.java

Lines changed: 72 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,12 +34,17 @@
3434
import java.util.concurrent.TimeUnit;
3535

3636
import org.apache.jackrabbit.guava.common.base.Stopwatch;
37+
import org.apache.jackrabbit.oak.commons.json.JsopBuilder;
38+
import org.apache.jackrabbit.oak.commons.json.JsopReader;
39+
import org.apache.jackrabbit.oak.commons.json.JsopTokenizer;
40+
import org.apache.jackrabbit.oak.index.indexer.document.flatfile.NodeStateEntryReader;
3741
import org.apache.jackrabbit.oak.index.indexer.document.flatfile.pipelined.PipelinedSortBatchTask.Result;
3842
import org.apache.jackrabbit.oak.index.indexer.document.tree.TreeStore;
3943
import org.apache.jackrabbit.oak.index.indexer.document.tree.store.TreeSession;
4044
import org.apache.jackrabbit.oak.plugins.index.IndexingReporter;
4145
import org.apache.jackrabbit.oak.plugins.index.MetricsFormatter;
4246
import org.apache.jackrabbit.oak.plugins.index.MetricsUtils;
47+
import org.apache.jackrabbit.oak.spi.blob.MemoryBlobStore;
4348
import org.apache.jackrabbit.oak.stats.StatisticsProvider;
4449
import org.slf4j.Logger;
4550
import org.slf4j.LoggerFactory;
@@ -207,7 +212,9 @@ private void sortAndSaveBatch(NodeStateEntryBatch nseb) throws Exception {
207212
int valueLength = buffer.getInt();
208213
String value = new String(buffer.array(), buffer.arrayOffset() + buffer.position(), valueLength, StandardCharsets.UTF_8);
209214
textSize += entry.getPath().length() + value.length() + 2;
210-
treeStore.putNode(entry.getPath(), value);
215+
String path = entry.getPath();
216+
value = removePropertiesOfBundledNodes(path, value);
217+
treeStore.putNode(path, value);
211218
}
212219
session.checkpoint();
213220
unmergedRoots++;
@@ -230,4 +237,68 @@ private void sortAndSaveBatch(NodeStateEntryBatch nseb) throws Exception {
230237
}
231238
}
232239

240+
/**
241+
* If there are any, remove properties of bundled nodes (jcr:content/...) from the JSON-encoded node.
242+
*
243+
* @param path the path
244+
* @param value the JSON-encoded node
245+
* @return the cleaned JSON
246+
*/
247+
public static String removePropertiesOfBundledNodes(String path, String value) {
248+
if (value.indexOf("\"jcr:content/") < 0) {
249+
return value;
250+
}
251+
// possibly the node contains a bundled property, but we are not sure
252+
// try to de-serialize
253+
NodeStateEntryReader nodeReader = new NodeStateEntryReader(new MemoryBlobStore());
254+
try {
255+
// the following line will throw an exception if de-serialization fails
256+
nodeReader.read(path + "|" + value);
257+
// ok it did not: it was a false positive
258+
return value;
259+
} catch (Exception e) {
260+
LOG.warn("Unable to de-serialize due to presence of bundled properties: {} = {}", path, value);
261+
JsopReader reader = new JsopTokenizer(value);
262+
JsopBuilder writer = new JsopBuilder();
263+
reader.read('{');
264+
writer.object();
265+
if (!reader.matches('}')) {
266+
do {
267+
String key = reader.readString();
268+
reader.read(':');
269+
// skip properties that contain "/"
270+
boolean skip = key.indexOf('/') >= 0;
271+
if (!skip) {
272+
writer.key(key);
273+
}
274+
if (reader.matches('[')) {
275+
if (!skip) {
276+
writer.array();
277+
}
278+
do {
279+
String raw = reader.readRawValue();
280+
if (!skip) {
281+
writer.encodedValue(raw);
282+
}
283+
} while (reader.matches(','));
284+
reader.read(']');
285+
if (!skip) {
286+
writer.endArray();
287+
}
288+
} else {
289+
String raw = reader.readRawValue();
290+
if (!skip) {
291+
writer.encodedValue(raw);
292+
}
293+
}
294+
} while (reader.matches(','));
295+
}
296+
reader.read('}');
297+
writer.endObject();
298+
String result = writer.toString();
299+
LOG.warn("Cleaned bundled properties: {} = {}", path, result);
300+
return result;
301+
}
302+
}
303+
233304
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
package org.apache.jackrabbit.oak.index.indexer.document.tree.store;
20+
21+
import static org.junit.Assert.assertEquals;
22+
23+
import org.apache.jackrabbit.oak.index.indexer.document.flatfile.pipelined.PipelinedTreeStoreTask;
24+
import org.junit.Test;
25+
26+
public class RemovePropertiesOfBundledNodesTest {
27+
28+
@Test
29+
public void cleanUp() {
30+
// this is similar to the real-world case
31+
verify("{\"jcr:created\":\"dat:2020-05-06T17:15:13.971Z\",\"jcr:primaryType\":\"nam:nt:file\",\"jcr:createdBy\":\"admin\",\"jcr:content/jcr:lastModified\":\"dat:2025-01-21T03:37:42.095Z\",\"jcr:content/jcr:lastModifiedBy\":\"test\"}",
32+
"{\"jcr:created\":\"dat:2020-05-06T17:15:13.971Z\",\"jcr:primaryType\":\"nam:nt:file\",\"jcr:createdBy\":\"admin\"}");
33+
34+
// generic entries
35+
verify("{}", "{}");
36+
verify("{\"c\":null,\"b\":\"x\",\"a\":123,\"d\":[1,2,null,\"x\"]}",
37+
"{\"c\":null,\"b\":\"x\",\"a\":123,\"d\":[1,2,null,\"x\"]}");
38+
39+
// false positive
40+
verify("{\"c\":\"jcr:content/that\"}",
41+
"{\"c\":\"jcr:content/that\"}");
42+
43+
// generic entries that need cleaning
44+
verify("{\"c\":null,\"jcr:content/this\":null,\"a\":123,\"jcr:content/that\":[1,2,null,\"x\"]}",
45+
"{\"c\":null,\"a\":123}");
46+
verify("{\"c\":null,\"jcr:content/this\":null,\"a\":123,\"array\":[1,2,null,\"x\"]}",
47+
"{\"c\":null,\"a\":123,\"array\":[1,2,null,\"x\"]}");
48+
49+
}
50+
51+
static void verify(String input, String expected) {
52+
String v2 = PipelinedTreeStoreTask.removePropertiesOfBundledNodes("/test", input);
53+
assertEquals(expected, v2);
54+
55+
}
56+
}

0 commit comments

Comments
 (0)