Skip to content

Commit 7d46f39

Browse files
OAK-11457 Tree store sometimes contains bundled properties (#2053)
* OAK-11457 Tree store sometimes contains bundled properties * OAK-11457 Tree store sometimes contains bundled properties
1 parent 7d24112 commit 7d46f39

File tree

2 files changed

+128
-1
lines changed

2 files changed

+128
-1
lines changed

oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/pipelined/PipelinedTreeStoreTask.java

Lines changed: 72 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,12 +34,17 @@
3434
import java.util.concurrent.TimeUnit;
3535

3636
import org.apache.jackrabbit.guava.common.base.Stopwatch;
37+
import org.apache.jackrabbit.oak.commons.json.JsopBuilder;
38+
import org.apache.jackrabbit.oak.commons.json.JsopReader;
39+
import org.apache.jackrabbit.oak.commons.json.JsopTokenizer;
40+
import org.apache.jackrabbit.oak.index.indexer.document.flatfile.NodeStateEntryReader;
3741
import org.apache.jackrabbit.oak.index.indexer.document.flatfile.pipelined.PipelinedSortBatchTask.Result;
3842
import org.apache.jackrabbit.oak.index.indexer.document.tree.TreeStore;
3943
import org.apache.jackrabbit.oak.index.indexer.document.tree.store.TreeSession;
4044
import org.apache.jackrabbit.oak.plugins.index.IndexingReporter;
4145
import org.apache.jackrabbit.oak.plugins.index.MetricsFormatter;
4246
import org.apache.jackrabbit.oak.plugins.index.MetricsUtils;
47+
import org.apache.jackrabbit.oak.spi.blob.MemoryBlobStore;
4348
import org.apache.jackrabbit.oak.stats.StatisticsProvider;
4449
import org.slf4j.Logger;
4550
import org.slf4j.LoggerFactory;
@@ -207,7 +212,9 @@ private void sortAndSaveBatch(NodeStateEntryBatch nseb) throws Exception {
207212
int valueLength = buffer.getInt();
208213
String value = new String(buffer.array(), buffer.arrayOffset() + buffer.position(), valueLength, StandardCharsets.UTF_8);
209214
textSize += entry.getPath().length() + value.length() + 2;
210-
treeStore.putNode(entry.getPath(), value);
215+
String path = entry.getPath();
216+
value = removePropertiesOfBundledNodes(path, value);
217+
treeStore.putNode(path, value);
211218
}
212219
session.checkpoint();
213220
unmergedRoots++;
@@ -230,4 +237,68 @@ private void sortAndSaveBatch(NodeStateEntryBatch nseb) throws Exception {
230237
}
231238
}
232239

240+
/**
241+
* If there are any, remove properties of bundled nodes (jcr:content/...) from the JSON-encoded node.
242+
*
243+
* @param path the path
244+
* @param value the JSON-encoded node
245+
* @return the cleaned JSON
246+
*/
247+
public static String removePropertiesOfBundledNodes(String path, String value) {
248+
if (value.indexOf("\"jcr:content/") < 0) {
249+
return value;
250+
}
251+
// possibly the node contains a bundled property, but we are not sure
252+
// try to de-serialize
253+
NodeStateEntryReader nodeReader = new NodeStateEntryReader(new MemoryBlobStore());
254+
try {
255+
// the following line will throw an exception if de-serialization fails
256+
nodeReader.read(path + "|" + value);
257+
// ok it did not: it was a false positive
258+
return value;
259+
} catch (Exception e) {
260+
LOG.warn("Unable to de-serialize due to presence of bundled properties: {} = {}", path, value);
261+
JsopReader reader = new JsopTokenizer(value);
262+
JsopBuilder writer = new JsopBuilder();
263+
reader.read('{');
264+
writer.object();
265+
if (!reader.matches('}')) {
266+
do {
267+
String key = reader.readString();
268+
reader.read(':');
269+
// skip properties that contain "/"
270+
boolean skip = key.indexOf('/') >= 0;
271+
if (!skip) {
272+
writer.key(key);
273+
}
274+
if (reader.matches('[')) {
275+
if (!skip) {
276+
writer.array();
277+
}
278+
do {
279+
String raw = reader.readRawValue();
280+
if (!skip) {
281+
writer.encodedValue(raw);
282+
}
283+
} while (reader.matches(','));
284+
reader.read(']');
285+
if (!skip) {
286+
writer.endArray();
287+
}
288+
} else {
289+
String raw = reader.readRawValue();
290+
if (!skip) {
291+
writer.encodedValue(raw);
292+
}
293+
}
294+
} while (reader.matches(','));
295+
}
296+
reader.read('}');
297+
writer.endObject();
298+
String result = writer.toString();
299+
LOG.warn("Cleaned bundled properties: {} = {}", path, result);
300+
return result;
301+
}
302+
}
303+
233304
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
package org.apache.jackrabbit.oak.index.indexer.document.tree.store;
20+
21+
import static org.junit.Assert.assertEquals;
22+
23+
import org.apache.jackrabbit.oak.index.indexer.document.flatfile.pipelined.PipelinedTreeStoreTask;
24+
import org.junit.Test;
25+
26+
public class RemovePropertiesOfBundledNodesTest {
27+
28+
@Test
29+
public void cleanUp() {
30+
// this is similar to the real-world case
31+
verify("{\"jcr:created\":\"dat:2020-05-06T17:15:13.971Z\",\"jcr:primaryType\":\"nam:nt:file\",\"jcr:createdBy\":\"admin\",\"jcr:content/jcr:lastModified\":\"dat:2025-01-21T03:37:42.095Z\",\"jcr:content/jcr:lastModifiedBy\":\"test\"}",
32+
"{\"jcr:created\":\"dat:2020-05-06T17:15:13.971Z\",\"jcr:primaryType\":\"nam:nt:file\",\"jcr:createdBy\":\"admin\"}");
33+
34+
// generic entries
35+
verify("{}", "{}");
36+
verify("{\"c\":null,\"b\":\"x\",\"a\":123,\"d\":[1,2,null,\"x\"]}",
37+
"{\"c\":null,\"b\":\"x\",\"a\":123,\"d\":[1,2,null,\"x\"]}");
38+
39+
// false positive
40+
verify("{\"c\":\"jcr:content/that\"}",
41+
"{\"c\":\"jcr:content/that\"}");
42+
43+
// generic entries that need cleaning
44+
verify("{\"c\":null,\"jcr:content/this\":null,\"a\":123,\"jcr:content/that\":[1,2,null,\"x\"]}",
45+
"{\"c\":null,\"a\":123}");
46+
verify("{\"c\":null,\"jcr:content/this\":null,\"a\":123,\"array\":[1,2,null,\"x\"]}",
47+
"{\"c\":null,\"a\":123,\"array\":[1,2,null,\"x\"]}");
48+
49+
}
50+
51+
static void verify(String input, String expected) {
52+
String v2 = PipelinedTreeStoreTask.removePropertiesOfBundledNodes("/test", input);
53+
assertEquals(expected, v2);
54+
55+
}
56+
}

0 commit comments

Comments
 (0)