3434import java .util .concurrent .TimeUnit ;
3535
3636import org .apache .jackrabbit .guava .common .base .Stopwatch ;
37+ import org .apache .jackrabbit .oak .commons .json .JsopBuilder ;
38+ import org .apache .jackrabbit .oak .commons .json .JsopReader ;
39+ import org .apache .jackrabbit .oak .commons .json .JsopTokenizer ;
40+ import org .apache .jackrabbit .oak .index .indexer .document .flatfile .NodeStateEntryReader ;
3741import org .apache .jackrabbit .oak .index .indexer .document .flatfile .pipelined .PipelinedSortBatchTask .Result ;
3842import org .apache .jackrabbit .oak .index .indexer .document .tree .TreeStore ;
3943import org .apache .jackrabbit .oak .index .indexer .document .tree .store .TreeSession ;
4044import org .apache .jackrabbit .oak .plugins .index .IndexingReporter ;
4145import org .apache .jackrabbit .oak .plugins .index .MetricsFormatter ;
4246import org .apache .jackrabbit .oak .plugins .index .MetricsUtils ;
47+ import org .apache .jackrabbit .oak .spi .blob .MemoryBlobStore ;
4348import org .apache .jackrabbit .oak .stats .StatisticsProvider ;
4449import org .slf4j .Logger ;
4550import org .slf4j .LoggerFactory ;
@@ -207,7 +212,9 @@ private void sortAndSaveBatch(NodeStateEntryBatch nseb) throws Exception {
207212 int valueLength = buffer .getInt ();
208213 String value = new String (buffer .array (), buffer .arrayOffset () + buffer .position (), valueLength , StandardCharsets .UTF_8 );
209214 textSize += entry .getPath ().length () + value .length () + 2 ;
210- treeStore .putNode (entry .getPath (), value );
215+ String path = entry .getPath ();
216+ value = removePropertiesOfBundledNodes (path , value );
217+ treeStore .putNode (path , value );
211218 }
212219 session .checkpoint ();
213220 unmergedRoots ++;
@@ -230,4 +237,68 @@ private void sortAndSaveBatch(NodeStateEntryBatch nseb) throws Exception {
230237 }
231238 }
232239
240+ /**
241+ * If there are any, remove properties of bundled nodes (jcr:content/...) from the JSON-encoded node.
242+ *
243+ * @param path the path
244+ * @param value the JSON-encoded node
245+ * @return the cleaned JSON
246+ */
247+ public static String removePropertiesOfBundledNodes (String path , String value ) {
248+ if (value .indexOf ("\" jcr:content/" ) < 0 ) {
249+ return value ;
250+ }
251+ // possibly the node contains a bundled property, but we are not sure
252+ // try to de-serialize
253+ NodeStateEntryReader nodeReader = new NodeStateEntryReader (new MemoryBlobStore ());
254+ try {
255+ // the following line will throw an exception if de-serialization fails
256+ nodeReader .read (path + "|" + value );
257+ // ok it did not: it was a false positive
258+ return value ;
259+ } catch (Exception e ) {
260+ LOG .warn ("Unable to de-serialize due to presence of bundled properties: {} = {}" , path , value );
261+ JsopReader reader = new JsopTokenizer (value );
262+ JsopBuilder writer = new JsopBuilder ();
263+ reader .read ('{' );
264+ writer .object ();
265+ if (!reader .matches ('}' )) {
266+ do {
267+ String key = reader .readString ();
268+ reader .read (':' );
269+ // skip properties that contain "/"
270+ boolean skip = key .indexOf ('/' ) >= 0 ;
271+ if (!skip ) {
272+ writer .key (key );
273+ }
274+ if (reader .matches ('[' )) {
275+ if (!skip ) {
276+ writer .array ();
277+ }
278+ do {
279+ String raw = reader .readRawValue ();
280+ if (!skip ) {
281+ writer .encodedValue (raw );
282+ }
283+ } while (reader .matches (',' ));
284+ reader .read (']' );
285+ if (!skip ) {
286+ writer .endArray ();
287+ }
288+ } else {
289+ String raw = reader .readRawValue ();
290+ if (!skip ) {
291+ writer .encodedValue (raw );
292+ }
293+ }
294+ } while (reader .matches (',' ));
295+ }
296+ reader .read ('}' );
297+ writer .endObject ();
298+ String result = writer .toString ();
299+ LOG .warn ("Cleaned bundled properties: {} = {}" , path , result );
300+ return result ;
301+ }
302+ }
303+
233304}
0 commit comments