Skip to content

Commit 6ca71c7

Browse files
authored
OAK-11444 [full-gc] Save document id and empty properties names before deletion (#2038)
1 parent 71875f1 commit 6ca71c7

16 files changed

+569
-15
lines changed

oak-run/src/main/java/org/apache/jackrabbit/oak/run/RevisionsCommand.java

+9
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,7 @@ private static class RevisionsOptions extends Utils.NodeStoreOptions {
147147
final OptionSpec<Boolean> dryRun;
148148
final OptionSpec<Boolean> embeddedVerification;
149149
final OptionSpec<Integer> fullGcMode;
150+
final OptionSpec<Boolean> fullGCAuditLoggingEnabled;
150151

151152
RevisionsOptions(String usage) {
152153
super(usage);
@@ -208,6 +209,8 @@ private static class RevisionsOptions extends Utils.NodeStoreOptions {
208209
"to be considered for Full GC i.e. Version Garbage Collector (Full GC) logic will only consider those " +
209210
"nodes for Full GC which are not accessed recently (currentTime - lastModifiedTime > fullGcMaxAge). Default: 86400 (one day)")
210211
.withOptionalArg().ofType(Long.class).defaultsTo(TimeUnit.DAYS.toSeconds(1));
212+
fullGCAuditLoggingEnabled = parser.accepts("fullGCAuditLoggingEnabled", "Enable audit logging for Full GC")
213+
.withOptionalArg().ofType(Boolean.class).defaultsTo(FALSE);
211214
}
212215

213216
public RevisionsOptions parse(String[] args) {
@@ -306,6 +309,10 @@ boolean isEntireRepo() {
306309
boolean doCompaction() {
307310
return options.has(compact);
308311
}
312+
313+
Boolean isFullGCAuditLoggingEnabled() {
314+
return options.has(fullGCAuditLoggingEnabled);
315+
}
309316
}
310317

311318
@Override
@@ -375,6 +382,7 @@ private VersionGarbageCollector bootstrapVGC(RevisionsOptions options, Closer cl
375382
builder.setFullGCBatchSize(options.getFullGcBatchSize());
376383
builder.setFullGCProgressSize(options.getFullGcProgressSize());
377384
builder.setFullGcMaxAgeMillis(SECONDS.toMillis(options.getFullGcMaxAge()));
385+
builder.setFullGCAuditLoggingEnabled(options.isFullGCAuditLoggingEnabled());
378386

379387
// create a VersionGCSupport while builder is read-write
380388
VersionGCSupport gcSupport = builder.createVersionGCSupport();
@@ -408,6 +416,7 @@ private VersionGarbageCollector bootstrapVGC(RevisionsOptions options, Closer cl
408416
System.out.println("FullGcProgressSize is : " + options.getFullGcProgressSize());
409417
System.out.println("FullGcMaxAgeInSecs is : " + options.getFullGcMaxAge());
410418
System.out.println("FullGcMaxAgeMillis is : " + builder.getFullGcMaxAgeMillis());
419+
System.out.println("FullGCAuditLoggingEnabled is : " + options.isFullGCAuditLoggingEnabled());
411420
VersionGarbageCollector gc = createVersionGC(builder.build(), gcSupport, options.isDryRun(), builder);
412421

413422
VersionGCOptions gcOptions = gc.getOptions();

oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/Configuration.java

+5
Original file line numberDiff line numberDiff line change
@@ -400,4 +400,9 @@
400400
name = "Invisible for discovery",
401401
description = "Boolean value indicating whether the instance should be discoverable by the cluster. The default value is " + DocumentNodeStoreService.DEFAULT_INVISIBLE_FOR_DISCOVERY)
402402
boolean invisibleForDiscovery() default DocumentNodeStoreService.DEFAULT_INVISIBLE_FOR_DISCOVERY;
403+
404+
@AttributeDefinition(
405+
name = "Enable Full GC Persistent Audit Logging",
406+
description = "This parameter will enable/disable the saving of deleted document IDs and properties during FullGC into a persistent storage, e.g Mongo collection")
407+
boolean fullGCAuditLoggingEnabled() default false;
403408
}

oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreBuilder.java

+10
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,7 @@ public class DocumentNodeStoreBuilder<T extends DocumentNodeStoreBuilder<T>> {
185185
private int fullGCBatchSize = DocumentNodeStoreService.DEFAULT_FGC_BATCH_SIZE;
186186
private int fullGCProgressSize = DocumentNodeStoreService.DEFAULT_FGC_PROGRESS_SIZE;
187187
private double fullGCDelayFactor = DocumentNodeStoreService.DEFAULT_FGC_DELAY_FACTOR;
188+
private boolean fullGCAuditLoggingEnabled;
188189
private long suspendTimeoutMillis = DEFAULT_SUSPEND_TIMEOUT;
189190

190191
/**
@@ -317,6 +318,15 @@ public boolean isFullGCEnabled() {
317318
return this.fullGCEnabled;
318319
}
319320

321+
public T setFullGCAuditLoggingEnabled(boolean b) {
322+
this.fullGCAuditLoggingEnabled = b;
323+
return thisBuilder();
324+
}
325+
326+
public boolean isFullGCAuditLoggingEnabled() {
327+
return this.fullGCAuditLoggingEnabled;
328+
}
329+
320330
public T setFullGCIncludePaths(@Nullable String[] includePaths) {
321331
if (isNull(includePaths) || includePaths.length == 0 || Arrays.equals(includePaths, new String[]{"/"})) {
322332
this.fullGCIncludePaths = Set.of();

oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java

+1
Original file line numberDiff line numberDiff line change
@@ -534,6 +534,7 @@ private void configureBuilder(DocumentNodeStoreBuilder<?> builder) {
534534
setFullGCBatchSize(config.fullGCBatchSize()).
535535
setFullGCProgressSize(config.fullGCProgressSize()).
536536
setFullGCDelayFactor(config.fullGCDelayFactor()).
537+
setFullGCAuditLoggingEnabled(config.fullGCAuditLoggingEnabled()).
537538
setSuspendTimeoutMillis(config.suspendTimeoutMillis()).
538539
setClusterIdReuseDelayAfterRecovery(config.clusterIdReuseDelayAfterRecoveryMillis()).
539540
setRecoveryDelayMillis(config.recoveryDelayMillis()).
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.jackrabbit.oak.plugins.document;
19+
20+
import java.util.List;
21+
import java.util.Map;
22+
/**
23+
* This class is as a wrapper around DocumentStore that expose two methods used to clean garbage from NODES collection
24+
* public int remove(Map<String, Long> orphanOrDeletedRemovalMap)
25+
* public List<NodeDocument> findAndUpdate(List<UpdateOp> updateOpList)
26+
* When enabled
27+
* Each method saves the document ID or empty properties names (that will be deleted) to a separate _bin collection as a BinDocument then delegates deletion to DocumentStore
28+
*
29+
* When disabled (default)
30+
* Each method delegates directly to DocumentStore
31+
*/
32+
public interface FullGcNodeBin {
33+
34+
static FullGcNodeBin noBin(DocumentStore store) {
35+
return new FullGcNodeBin() {
36+
@Override
37+
public int remove(Map<String, Long> orphanOrDeletedRemovalMap) {
38+
return store.remove(Collection.NODES, orphanOrDeletedRemovalMap);
39+
}
40+
41+
@Override
42+
public List<NodeDocument> findAndUpdate(List<UpdateOp> updateOpList) {
43+
return store.findAndUpdate(Collection.NODES, updateOpList);
44+
}
45+
46+
@Override
47+
public void setEnabled(boolean value) {
48+
// no-op
49+
}
50+
};
51+
}
52+
53+
/**
54+
* Remove orphaned or deleted documents from the NODES collection
55+
* If bin is enabled, the document IDs are saved to the SETTINGS collection with ID prefixed with '/bin/'
56+
* If document ID cannot be saved then the removal of the document fails
57+
* If the bin is disabled, the document IDs are directly removed from the NODES collection
58+
*
59+
* @param orphanOrDeletedRemovalMap the keys of the documents to remove with the corresponding timestamps
60+
* @return the number of documents removed
61+
* @see DocumentStore#remove(Collection, Map)
62+
*/
63+
int remove(Map<String, Long> orphanOrDeletedRemovalMap);
64+
65+
/**
66+
* Performs a conditional update
67+
* If the bin is enabled, the removed properties are saved to the SETTINGS collection with ID prefixed with '/bin/' and empty value
68+
* If the document ID and properties cannot be saved then the removal of the property fails
69+
* If bin is disabled, the removed properties are directly removed from the NODES collection
70+
*
71+
* @param updateOpList the update operation List
72+
* @return the list containing old documents
73+
* @see DocumentStore#findAndUpdate(Collection, List)
74+
*/
75+
List<NodeDocument> findAndUpdate(List<UpdateOp> updateOpList);
76+
77+
/**
78+
* Enable or disable the bin
79+
* @param value true to enable, false to disable
80+
*/
81+
void setEnabled(boolean value);
82+
}

oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java

+4
Original file line numberDiff line numberDiff line change
@@ -307,4 +307,8 @@ protected static boolean isDefaultNoBranchSplitNewerThan(NodeDocument doc,
307307
Revision r = IterableUtils.getFirst(doc.getAllChanges(), null);
308308
return r != null && sweepRevs.isRevisionNewer(r);
309309
}
310+
311+
public FullGcNodeBin getFullGCBin() {
312+
return FullGcNodeBin.noBin(store);
313+
}
310314
}

oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java

+3-9
Original file line numberDiff line numberDiff line change
@@ -1947,15 +1947,9 @@ public void removeGarbage(final VersionGCStats stats) {
19471947
}
19481948
if (!isFullGCDryRun) {
19491949
// only delete these in case it is not a dryRun
1950-
19511950
if (!orphanOrDeletedRemovalMap.isEmpty()) {
1952-
// use remove() with the modified check to rule
1953-
// out any further race-condition where this removal
1954-
// races with a un-orphan/re-creation as a result of which
1955-
// the node should now not be removed. The modified check
1956-
// ensures a node would then not be removed
1957-
// (and as a result the removedSize != map.size())
1958-
final int removedSize = ds.remove(NODES, orphanOrDeletedRemovalMap);
1951+
1952+
final int removedSize = versionStore.getFullGCBin().remove(orphanOrDeletedRemovalMap);
19591953
stats.updatedFullGCDocsCount += removedSize;
19601954
stats.deletedDocGCCount += removedSize;
19611955
stats.deletedOrphanNodesCount += removedSize;
@@ -1973,7 +1967,7 @@ public void removeGarbage(final VersionGCStats stats) {
19731967
}
19741968

19751969
if (!updateOpList.isEmpty()) {
1976-
List<NodeDocument> oldDocs = ds.findAndUpdate(NODES, updateOpList);
1970+
List<NodeDocument> oldDocs = versionStore.getFullGCBin().findAndUpdate(updateOpList);
19771971

19781972

19791973
int deletedProps = oldDocs.stream().filter(Objects::nonNull).mapToInt(d -> deletedPropsCountMap.getOrDefault(d.getId(), 0)).sum();

oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoDocumentNodeStoreBuilderBase.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,7 @@ public T setCollectionCompressionType(String compressionType) {
172172
public VersionGCSupport createVersionGCSupport() {
173173
DocumentStore store = getDocumentStore();
174174
if (store instanceof MongoDocumentStore) {
175-
return new MongoVersionGCSupport((MongoDocumentStore) store);
175+
return new MongoVersionGCSupport((MongoDocumentStore) store, isFullGCAuditLoggingEnabled());
176176
} else {
177177
return super.createVersionGCSupport();
178178
}

oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoDocumentStore.java

+17
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
import java.util.stream.StreamSupport;
4343

4444
import org.apache.commons.io.IOUtils;
45+
import com.mongodb.client.model.IndexOptions;
4546
import org.apache.jackrabbit.guava.common.base.Stopwatch;
4647
import org.apache.jackrabbit.guava.common.collect.Iterators;
4748
import org.apache.jackrabbit.guava.common.util.concurrent.AtomicDouble;
@@ -166,6 +167,8 @@ public class MongoDocumentStore implements DocumentStore {
166167
* which we block any data modification operation when system has been throttled.
167168
*/
168169
public static final long DEFAULT_THROTTLING_TIME_MS = Long.getLong("oak.mongo.throttlingTime", 20);
170+
171+
private static final @NotNull String BIN_COLLECTION = "bin";
169172
/**
170173
* nodeNameLimit for node name based on Mongo Version
171174
*/
@@ -348,6 +351,9 @@ public MongoDocumentStore(MongoClient connection, MongoDatabase db,
348351

349352
if (!readOnly) {
350353
ensureIndexes(db, status);
354+
if (builder.isFullGCAuditLoggingEnabled()) {
355+
ensureFullGcTTLIndex();
356+
}
351357
}
352358

353359
this.nodeLocks = new StripedNodeDocumentLocks();
@@ -465,6 +471,13 @@ private void ensureIndexes(@NotNull MongoDatabase db, @NotNull MongoStatus mongo
465471
createIndex(journal, JournalEntry.MODIFIED, true, false, false);
466472
}
467473

474+
private void ensureFullGcTTLIndex() {
475+
//TTL index for full GC bin documents to expire after 90 days
476+
//see https://issues.apache.org/jira/browse/OAK-11444
477+
IndexOptions indexOptions = new IndexOptions().expireAfter(TimeUnit.DAYS.toSeconds(90), TimeUnit.SECONDS);
478+
connection.getCollection(BIN_COLLECTION).createIndex(new org.bson.Document(MongoFullGcNodeBin.GC_COLLECTED_AT, 1), indexOptions);
479+
}
480+
468481
private void createCollection(MongoDatabase db, String collectionName, MongoStatus mongoStatus) {
469482
CreateCollectionOptions options = new CreateCollectionOptions();
470483

@@ -2011,6 +2024,10 @@ <T extends Document> MongoCollection<BasicDBObject> getDBCollection(Collection<T
20112024
return getDBCollection(collection).withReadPreference(readPreference);
20122025
}
20132026

2027+
<T extends Document> MongoCollection<BasicDBObject> getBinCollection() {
2028+
return this.connection.getCollection(BIN_COLLECTION);
2029+
}
2030+
20142031
MongoDatabase getDatabase() {
20152032
return connection.getDatabase();
20162033
}

0 commit comments

Comments
 (0)