@@ -60,6 +60,8 @@ protected boolean removeEldestEntry (Map.Entry eldest) {
6060
6161 public static final Set <String > vcToCleanUp = Collections
6262 .synchronizedSet (new HashSet <>());
63+
64+ public static volatile boolean isCleaning = false ;
6365
6466
6567 public VirtualCorpusCache () {
@@ -86,13 +88,11 @@ private static boolean isVcIdValid (String vcId) {
8688
8789 String [] parts = vcId .split ("/" );
8890 if (parts .length > 2 ) {
89- vcToCleanUp .remove (vcId );
9091 return false ;
9192 }
9293
9394 String vcName = parts .length == 2 ? parts [1 ] : parts [0 ];
9495 if (!vcNamePattern .matcher (vcName ).matches ()) {
95- vcToCleanUp .remove (vcId );
9696 return false ;
9797 }
9898
@@ -163,6 +163,11 @@ public static void store (String vcId, KrillIndex index) {
163163 }
164164
165165
166+ /** Retrieve a VC from the cache, either from the memory map or disk.
167+ *
168+ * @param vcId
169+ * @return a map of index leaves and DocBits, otherwise null if not found.
170+ */
166171 public static Map <String , DocBits > retrieve (String vcId ) {
167172 Map <String , DocBits > vcData = map .get (vcId );
168173 if (vcData != null ) {
@@ -218,8 +223,11 @@ public static void delete (String vcId) {
218223 return ;
219224 }
220225
221- vcToCleanUp .remove (vcId );
222226 map .remove (vcId );
227+ if (!isCleaning ) {
228+ vcToCleanUp .remove (vcId );
229+ }
230+
223231 File vc = new File (CACHE_LOCATION + "/" + vcId );
224232 if (vc .exists ()) {
225233 for (File f : vc .listFiles ()) {
@@ -262,14 +270,20 @@ public static void reset () {
262270 */
263271 public static void setIndexInfo (IndexInfo indexInfo ) {
264272 VirtualCorpusCache .indexInfo = indexInfo ;
265- synchronized (vcToCleanUp ) {
266- if (!vcToCleanUp .isEmpty ()) {
273+ //synchronized (vcToCleanUp) {
274+ if (!vcToCleanUp .isEmpty () && !isCleaning ) {
275+ isCleaning = true ;
267276 cleanup ();
277+ isCleaning = false ;
268278 }
269- }
279+ // }
270280 }
271281
272282
283+ /** Remove out-dated leaves that are not used anymore due to index update
284+ * (i.e., by sending a close-index-reader-API request)
285+ *
286+ */
273287 private static void cleanup () {
274288 final Set <String > currentLeafFingerprints = indexInfo
275289 .getAllLeafFingerprints ();
@@ -314,18 +328,32 @@ public static DocBits getDocBits (String vcId, String leafFingerprint,
314328 Supplier <DocBits > calculateDocBits ) {
315329 DocBits docBits = null ;
316330 Map <String , DocBits > leafToDocBitMap = retrieve (vcId );
331+ // if VC is not in the cache (both memory and disk),
332+ // put it in the memory map
317333 if (leafToDocBitMap == null ) {
318334 leafToDocBitMap = Collections
319335 .synchronizedMap (new HashMap <String , DocBits >());
320336 map .put (vcId , leafToDocBitMap );
321337 }
322338 else {
323339 docBits = leafToDocBitMap .get (leafFingerprint );
324- if (docBits == null ) {
340+ // VC-id is the cache but there is no data for the leaf
341+ if (docBits == null && !isCleaning ) {
325342 vcToCleanUp .add (vcId );
326343 }
327344 }
328345 if (docBits == null ) {
346+ /* Calculating docBits and storing in the cache
347+ *
348+ * This process is triggered when finding a JSON-LD file at
349+ * the named-vc folder that doesn't exist in the cache.
350+ *
351+ * It should only happens at server start-up, or index update
352+ * for a small number of new leaves.
353+ *
354+ * New named VC should *not* be added at a running instance, as
355+ * it would trigger this process.
356+ */
329357 docBits = calculateDocBits .get ();
330358 leafToDocBitMap .put (leafFingerprint , docBits );
331359 storeOnDisk (vcId , leafFingerprint , docBits );
0 commit comments