Skip to content

Commit 140dcc1

Browse files
committed
Fail query runner when nodes do not come up
1 parent 003d86a commit 140dcc1

File tree

1 file changed

+31
-14
lines changed

1 file changed

+31
-14
lines changed

presto-tests/src/main/java/com/facebook/presto/tests/DistributedQueryRunner.java

Lines changed: 31 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@
7676
import java.util.Optional;
7777
import java.util.Set;
7878
import java.util.concurrent.ThreadLocalRandom;
79+
import java.util.concurrent.TimeoutException;
7980
import java.util.concurrent.atomic.AtomicReference;
8081
import java.util.concurrent.locks.Lock;
8182
import java.util.concurrent.locks.ReadWriteLock;
@@ -363,9 +364,12 @@ private DistributedQueryRunner(
363364
prestoClients = prestoClientsBuilder.build();
364365

365366
long start = nanoTime();
366-
while (!allNodesGloballyVisible()) {
367-
Assertions.assertLessThan(nanosSince(start), new Duration(100, SECONDS));
368-
MILLISECONDS.sleep(10);
367+
try {
368+
waitForAllNodesGloballyVisible(start);
369+
}
370+
catch (TimeoutException e) {
371+
closer.close();
372+
throw e;
369373
}
370374
log.info("Announced servers in %s", nanosSince(start).convertToMostSuccinctTimeUnit());
371375

@@ -517,22 +521,35 @@ else if (coordinatorSidecar) {
517521
return server;
518522
}
519523

520-
private boolean allNodesGloballyVisible()
524+
private void waitForAllNodesGloballyVisible(long startTimeInMs)
525+
throws Exception
521526
{
522-
int expectedActiveNodesForRm = externalWorkers.size() + servers.size();
523-
int expectedActiveNodesForCoordinator = externalWorkers.size() + servers.size();
527+
int expectedActiveNodes = externalWorkers.size() + servers.size();
528+
Duration timeout = new Duration(100, SECONDS);
524529

525-
for (TestingPrestoServer server : servers) {
526-
AllNodes allNodes = server.refreshNodes();
527-
int activeNodeCount = allNodes.getActiveNodes().size();
530+
while (true) {
531+
for (TestingPrestoServer server : servers) {
532+
AllNodes allNodes = server.refreshNodes();
533+
int activeNodeCount = allNodes.getActiveNodes().size();
528534

529-
if (!allNodes.getInactiveNodes().isEmpty() ||
530-
(server.isCoordinator() && activeNodeCount != expectedActiveNodesForCoordinator) ||
531-
(server.isResourceManager() && activeNodeCount != expectedActiveNodesForRm)) {
532-
return false;
535+
if (!allNodes.getInactiveNodes().isEmpty()) {
536+
if (nanosSince(startTimeInMs).compareTo(timeout) >= 0) {
537+
throw new TimeoutException(format("Timed out waiting for all nodes to be globally visible. Inactive nodes: %s", allNodes.getInactiveNodes()));
538+
}
539+
break;
540+
}
541+
else if ((server.isCoordinator() || server.isResourceManager()) && activeNodeCount != expectedActiveNodes) {
542+
if (nanosSince(startTimeInMs).compareTo(timeout) >= 0) {
543+
throw new TimeoutException(format(
544+
"Timed out waiting for all nodes to be globally visible. Node count: %s, expected: %s",
545+
activeNodeCount, expectedActiveNodes));
546+
}
547+
break;
548+
}
549+
return;
533550
}
551+
MILLISECONDS.sleep(10);
534552
}
535-
return true;
536553
}
537554

538555
public TestingPrestoClient getRandomClient()

0 commit comments

Comments
 (0)