diff --git a/core/src/main/java/org/apache/accumulo/core/clientImpl/ClientTabletCacheImpl.java b/core/src/main/java/org/apache/accumulo/core/clientImpl/ClientTabletCacheImpl.java index 803188b5a29..e0c6fbe5774 100644 --- a/core/src/main/java/org/apache/accumulo/core/clientImpl/ClientTabletCacheImpl.java +++ b/core/src/main/java/org/apache/accumulo/core/clientImpl/ClientTabletCacheImpl.java @@ -690,25 +690,28 @@ private void lookupTablet(ClientContext context, Text row, LockCheckerSession lc metadataRow.append(row.getBytes(), 0, row.getLength()); CachedTablet ptl = parent.findTablet(context, metadataRow, false, LocationNeed.REQUIRED); - if (ptl != null) { - // Only allow a single lookup at time per parent tablet. For example if a tables tablets are - // all stored in three metadata tablets, then that table could have up to three concurrent - // metadata lookups. - Timer timer = Timer.startNew(); - try (var unused = lookupLocks.lock(ptl.getExtent())) { - // See if entry was added to cache by another thread while we were waiting on the lock - var cached = findTabletInCache(row); - if (cached != null && cached.getCreationTimer().startedAfter(timer)) { - // This cache entry was added after we started waiting on the lock so lets use it and not - // go to the metadata table. This means another thread was holding the lock and doing - // metadata lookups when we requested the lock. - return; - } - // Lookup tablets in metadata table and update cache. Also updating the cache while holding - // the lock is important as it ensures other threads that are waiting on the lock will see - // what this thread found and may be able to avoid metadata lookups. - lookupTablet(context, lcSession, ptl, metadataRow); + if (ptl == null) { + return; + } + + CachedTablet now = findTabletInCache(row); + if (now != null) { + if (now.getTserverLocation().isPresent() && lcSession.checkLock(now) != null) { + return; + } + } + // Only allow a single lookup at time per parent tablet. For example if a tables tablets are + // all stored in three metadata tablets, then that table could have up to three concurrent + // metadata lookups. + try (var unused = lookupLocks.lock(ptl.getExtent())) { + now = findTabletInCache(row); + if (now != null && now.getTserverLocation().isPresent() && lcSession.checkLock(now) != null) { + return; } + // Lookup tablets in metadata table and update cache. Also updating the cache while holding + // the lock is important as it ensures other threads that are waiting on the lock will see + // what this thread found and may be able to avoid metadata lookups. + lookupTablet(context, lcSession, ptl, metadataRow); } } diff --git a/core/src/test/java/org/apache/accumulo/core/clientImpl/ClientTabletCacheImplTest.java b/core/src/test/java/org/apache/accumulo/core/clientImpl/ClientTabletCacheImplTest.java index 87855d2e12c..372af800838 100644 --- a/core/src/test/java/org/apache/accumulo/core/clientImpl/ClientTabletCacheImplTest.java +++ b/core/src/test/java/org/apache/accumulo/core/clientImpl/ClientTabletCacheImplTest.java @@ -19,6 +19,7 @@ package org.apache.accumulo.core.clientImpl; import static java.nio.charset.StandardCharsets.UTF_8; +import static org.apache.accumulo.core.util.LazySingletons.RANDOM; import static org.easymock.EasyMock.replay; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; @@ -1986,45 +1987,56 @@ public void testMultithreadedLookups() throws Exception { setLocation(tservers, "tserver3", mte2, ke3, "tserver9"); var executor = Executors.newCachedThreadPool(); - List> futures = new ArrayList<>(); + final int lookupCount = 128; + final int roundCount = 8; - // start 64 threads all trying to lookup data in the cache, should see only two threads do a - // concurrent lookup in the metadata table and no more or less. - List rowsToLookup = new ArrayList<>(); + List> futures = new ArrayList<>(roundCount * lookupCount); - for (int i = 0; i < 64; i++) { - String lookup = (char) ('a' + (i % 26)) + ""; - rowsToLookup.add(lookup); - } + // multiple rounds to increase the chance of contention + for (int round = 0; round < roundCount; round++) { - Collections.shuffle(rowsToLookup); - - for (var lookup : rowsToLookup) { - var future = executor.submit(() -> { - var loc = metaCache.findTablet(context, new Text(lookup), false, LocationNeed.REQUIRED); - if (lookup.compareTo("m") <= 0) { - assertEquals("tserver7", loc.getTserverLocation().orElseThrow()); - } else if (lookup.compareTo("q") <= 0) { - assertEquals("tserver8", loc.getTserverLocation().orElseThrow()); - } else { - assertEquals("tserver9", loc.getTserverLocation().orElseThrow()); - } - return loc; - }); - futures.add(future); + // start a bunch of threads all trying to lookup data in the cache + // should see exactly 2 threads doing metadata lookups at a time + List rowsToLookup = new ArrayList<>(lookupCount); + + for (int i = 0; i < lookupCount; i++) { + String lookup = (char) ('a' + (i % 26)) + ""; + rowsToLookup.add(lookup); + } + + Collections.shuffle(rowsToLookup, RANDOM.get()); + + for (var lookup : rowsToLookup) { + var future = executor.submit(() -> { + if (RANDOM.get().nextInt(10) < 3) { + Thread.yield(); + } + var loc = metaCache.findTablet(context, new Text(lookup), false, LocationNeed.REQUIRED); + if (lookup.compareTo("m") <= 0) { + assertEquals("tserver7", loc.getTserverLocation().orElseThrow()); + } else if (lookup.compareTo("q") <= 0) { + assertEquals("tserver8", loc.getTserverLocation().orElseThrow()); + } else { + assertEquals("tserver9", loc.getTserverLocation().orElseThrow()); + } + return loc; + }); + futures.add(future); + } } for (var future : futures) { assertNotNull(future.get()); } - assertTrue(sawTwoActive.get()); + assertTrue(sawTwoActive.get(), "Expected to see exactly two lookups."); // The second metadata tablet (mte2) contains two user tablets (ke2 and ke3). Depending on which // of these two user tablets is looked up in the metadata table first will see a total of 2 or 3 // lookups. If the location of ke2 is looked up first then it will get the locations of ke2 and // ke3 from mte2 and put them in the cache. If the location of ke3 is looked up first then it // will only get the location of ke3 from mte2 and not ke2. - assertTrue(lookups.size() == 2 || lookups.size() == 3, lookups::toString); + assertTrue(lookups.size() == 2 || lookups.size() == 3, + "Expected 2 or 3 lookups, got " + lookups.size() + " : " + lookups); assertEquals(1, lookups.stream().filter(metadataExtent -> metadataExtent.equals(mte1)).count(), lookups::toString); var mte2Lookups =