Skip to content

Commit d986163

Browse files
bigelephant29copybara-github
authored andcommitted
Throw an exception when the local resources will never be available.
PiperOrigin-RevId: 755749640 Change-Id: I9216a472bc443c92c2a341044e96ff90296f8ada
1 parent a2a55f7 commit d986163

File tree

4 files changed

+68
-27
lines changed

4 files changed

+68
-27
lines changed

src/main/java/com/google/devtools/build/lib/actions/ResourceManager.java

Lines changed: 42 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -112,14 +112,15 @@ ResourceRequest getRequest() {
112112

113113
/** Closing the ResourceHandle releases the resources associated with it. */
114114
@Override
115-
public void close() throws IOException, InterruptedException {
115+
public void close() throws IOException, InterruptedException, UserExecException {
116116
manager.releaseResources(request, worker);
117117
Profiler.instance()
118118
.completeTask(
119119
resourceAcquiredTime, ProfilerTask.LOCAL_ACTION_COUNTS, "Resources acquired");
120120
}
121121

122-
public void invalidateAndClose(@Nullable Exception e) throws IOException, InterruptedException {
122+
public void invalidateAndClose(@Nullable Exception e)
123+
throws IOException, InterruptedException, UserExecException {
123124
// If there is an exception, we need to set the kill cause before invalidating the object.
124125
// This ensures that the worker implementation updates their worker metrics accordingly
125126
// if/when it destroys itself.
@@ -279,14 +280,14 @@ public WindowUpdateRunner(String name) {
279280
public void run() {
280281
try {
281282
windowUpdate();
282-
} catch (IOException | InterruptedException e) {
283+
} catch (IOException | InterruptedException | UserExecException e) {
283284
logger.atWarning().withCause(e).log(
284285
"Exception while updating window of locally scheduled action: %s", e);
285286
}
286287
}
287288
}
288289

289-
synchronized void windowUpdate() throws IOException, InterruptedException {
290+
synchronized void windowUpdate() throws IOException, InterruptedException, UserExecException {
290291
windowRequestIds.clear();
291292
windowEstimationCpu = 0.0;
292293
processAllWaitingRequests();
@@ -468,7 +469,7 @@ public boolean threadHasResources() {
468469
* @throws java.io.IOException if could not return worker to the workerPool
469470
*/
470471
void releaseResources(ResourceRequest request, @Nullable Worker worker)
471-
throws IOException, InterruptedException {
472+
throws IOException, InterruptedException, UserExecException {
472473
Preconditions.checkNotNull(
473474
request.getResourceSet(),
474475
"releaseResources called with resources == NULL during %s",
@@ -500,7 +501,7 @@ public void acquireResourceOwnership() {
500501
* wait.
501502
*/
502503
private synchronized ResourceLatch acquire(ResourceRequest request)
503-
throws IOException, InterruptedException {
504+
throws IOException, InterruptedException, UserExecException {
504505
if (areResourcesAvailable(request.getResourceSet())) {
505506
Worker worker = incrementResources(request);
506507
return new ResourceLatch(/* latch= */ null, worker);
@@ -523,7 +524,7 @@ private synchronized ResourceLatch acquire(ResourceRequest request)
523524

524525
/** Release resources and process the queues of waiting threads. */
525526
private synchronized void release(ResourceRequest request, @Nullable Worker worker)
526-
throws IOException, InterruptedException {
527+
throws IOException, InterruptedException, UserExecException {
527528
if (worker != null) {
528529
this.workerPool.returnWorker(worker.getWorkerKey(), worker);
529530
}
@@ -555,14 +556,15 @@ private synchronized void release(ResourceRequest request, @Nullable Worker work
555556
processAllWaitingRequests();
556557
}
557558

558-
private synchronized void processAllWaitingRequests() throws IOException, InterruptedException {
559+
private synchronized void processAllWaitingRequests()
560+
throws IOException, InterruptedException, UserExecException {
559561
processWaitingRequests(localRequests);
560562
processWaitingRequests(dynamicWorkerRequests);
561563
processWaitingRequests(dynamicStandaloneRequests);
562564
}
563565

564566
private synchronized void processWaitingRequests(Deque<WaitingRequest> requests)
565-
throws IOException, InterruptedException {
567+
throws IOException, InterruptedException, UserExecException {
566568
if (requests.isEmpty()) {
567569
return;
568570
}
@@ -607,10 +609,29 @@ private void assertResourcesTracked(ResourceSet resources) throws ExecException
607609
}
608610
}
609611

610-
private <T extends Number> boolean isAvailable(T available, T used, T requested) {
612+
private <T extends Number> boolean isAvailable(
613+
T available, T used, T requested, String resourceName) throws UserExecException {
614+
if (!allowOneActionOnResourceUnavailable
615+
&& available.doubleValue() + used.doubleValue() < requested.doubleValue()) {
616+
throw new UserExecException(
617+
FailureDetails.FailureDetail.newBuilder()
618+
.setMessage(
619+
String.format(
620+
"The `%s` resources are not enough to fulfill the request. To allow Bazel to"
621+
+ " bypass this limitation, please adjust the --local_resources flag or"
622+
+ " specify --allow_one_action_on_resource_unavailable in your Bazel"
623+
+ " command.",
624+
resourceName))
625+
.setLocalExecution(
626+
FailureDetails.LocalExecution.newBuilder()
627+
.setCode(FailureDetails.LocalExecution.Code.NOT_ENOUGH_LOCAL_RESOURCE)
628+
.build())
629+
.build());
630+
}
611631
// Resources are considered available if any one of the conditions below is true:
612632
// 1) If resource is not requested at all, it is available.
613-
// 2) If resource is not used at the moment, it is considered to be
633+
// 2) If resource is not used at the moment and the flag
634+
// "allow_one_action_on_resource_unavailable" is enabled, it is considered to be
614635
// available regardless of how much is requested. This is necessary to
615636
// ensure that at any given time, at least one thread is able to acquire
616637
// resources even if it requests more than available.
@@ -622,7 +643,7 @@ private <T extends Number> boolean isAvailable(T available, T used, T requested)
622643

623644
// Method will return true if all requested resources are considered to be available.
624645
@VisibleForTesting
625-
synchronized boolean areResourcesAvailable(ResourceSet resources) {
646+
synchronized boolean areResourcesAvailable(ResourceSet resources) throws UserExecException {
626647
Preconditions.checkNotNull(availableResources);
627648
// Comparison below is robust, since any calculation errors will be fixed
628649
// by the release() method.
@@ -640,7 +661,11 @@ synchronized boolean areResourcesAvailable(ResourceSet resources) {
640661
}
641662

642663
int availableLocalTestCount = availableResources.getLocalTestCount();
643-
if (!isAvailable(availableLocalTestCount, usedLocalTestCount, resources.getLocalTestCount())) {
664+
if (!isAvailable(
665+
availableLocalTestCount,
666+
usedLocalTestCount,
667+
resources.getLocalTestCount(),
668+
"local_test_count")) {
644669
return false;
645670
}
646671

@@ -662,14 +687,14 @@ synchronized boolean areResourcesAvailable(ResourceSet resources) {
662687
resource.getValue() * MIN_NECESSARY_RATIO.getOrDefault(key, DEFAULT_MIN_NECESSARY_RATIO);
663688
double used = usedResources.getOrDefault(key, 0.0);
664689
double available = availableResources.get(key);
665-
if (!isAvailable(available, used, requested)) {
690+
if (!isAvailable(available, used, requested, key)) {
666691
return false;
667692
}
668693
}
669694
return true;
670695
}
671696

672-
synchronized boolean isCpuAvailable(Map.Entry<String, Double> resource) {
697+
synchronized boolean isCpuAvailable(Map.Entry<String, Double> resource) throws UserExecException {
673698
String key = resource.getKey();
674699

675700
double requested =
@@ -684,10 +709,10 @@ synchronized boolean isCpuAvailable(Map.Entry<String, Double> resource) {
684709
if (runningActions >= MAX_ACTIONS_PER_CPU * availableResources.get(ResourceSet.CPU)) {
685710
return false;
686711
}
687-
return isAvailable(available, windowEstimation + currentUsage, requested);
712+
return isAvailable(available, windowEstimation + currentUsage, requested, key);
688713
}
689714

690-
return isAvailable(available, used, requested);
715+
return isAvailable(available, used, requested, key);
691716
}
692717

693718
@VisibleForTesting

src/main/java/com/google/devtools/build/lib/worker/WorkerSpawnRunner.java

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -645,15 +645,18 @@ private void finishWorkAsync(
645645

646646
w = null;
647647

648-
} catch (IOException | InterruptedException e2) {
648+
} catch (IOException | InterruptedException | UserExecException e2) {
649649
// The reaper thread can't do anything useful about this.
650650
}
651651
} finally {
652652
if (w != null) {
653653
try {
654654
resourceHandle.close();
655-
} catch (IOException | InterruptedException | IllegalStateException e) {
656-
// Error while returning worker to the pool. Could not do anythinng.
655+
} catch (IOException
656+
| InterruptedException
657+
| IllegalStateException
658+
| UserExecException e) {
659+
// Error while returning worker to the pool. Could not do anything.
657660
}
658661
}
659662
}

src/main/protobuf/failure_details.proto

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -778,6 +778,7 @@ message LocalExecution {
778778
LOCAL_EXECUTION_UNKNOWN = 0 [(metadata) = { exit_code: 37 }];
779779
LOCKFREE_OUTPUT_PREREQ_UNMET = 1 [(metadata) = { exit_code: 2 }];
780780
UNTRACKED_RESOURCE = 2 [(metadata) = { exit_code: 1 }];
781+
NOT_ENOUGH_LOCAL_RESOURCE = 3 [(metadata) = { exit_code: 1 }];
781782
}
782783

783784
Code code = 1;

src/test/java/com/google/devtools/build/lib/actions/ResourceManagerTest.java

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,8 @@ private ResourceHandle acquire(
137137
return acquire(ram, cpu, extraResources, tests, ResourcePriority.LOCAL);
138138
}
139139

140-
private void release(ResourceHandle resourceHandle) throws IOException, InterruptedException {
140+
private void release(ResourceHandle resourceHandle)
141+
throws IOException, InterruptedException, UserExecException {
141142
manager.releaseResources(resourceHandle.getRequest(), /* worker= */ null);
142143
}
143144

@@ -167,8 +168,8 @@ public void allowOneActionOnResourceUnavailable_success() throws Exception {
167168
// TODO: b/405364605 - Add a test for false case after we start throwing an exception.
168169
manager.setAllowOneActionOnResourceUnavailable(true);
169170

170-
// When nothing is consuming RAM,
171-
// Then Resource Manager will successfully acquire an over-budget request for RAM:
171+
// When nothing is consuming RAM, then ResourceManager will successfully acquire an over-budget
172+
// request for RAM if allow_one_action_on_resource_unavailable is set to true.
172173
double bigRam = 10000.0;
173174
ResourceHandle bigRamHandle = acquire(bigRam, 0, 0);
174175
// When RAM is consumed,
@@ -202,6 +203,16 @@ public void allowOneActionOnResourceUnavailable_success() throws Exception {
202203
assertThat(manager.inUse()).isFalse();
203204
}
204205

206+
@Test
207+
public void noAllowOneActionOnResourceUnavailable_exception() throws Exception {
208+
assertThat(manager.inUse()).isFalse();
209+
210+
// When nothing is consuming RAM, then ResourceManager should fail to acquire an over-budget
211+
// request for RAM, when allow_one_action_on_resource_unavailable is not explicitly set.
212+
double bigRam = 10000.0;
213+
assertThrows(UserExecException.class, () -> acquire(bigRam, 0, 0));
214+
}
215+
205216
@Test
206217
public void testThatCpuCanBeOverallocated() throws Exception {
207218
assertThat(manager.inUse()).isFalse();
@@ -395,14 +406,14 @@ public void testInterruptedAcquisitionClearsResources() throws Exception {
395406
assertThat(manager.inUse()).isFalse();
396407
// Acquire a small amount of resources so that future requests can block (the initial request
397408
// always succeeds even if it's for too much).
398-
TestThread smallThread = new TestThread(() -> acquire(1, 0, 0));
409+
TestThread smallThread = new TestThread(() -> acquire(400, 0, 0));
399410
smallThread.start();
400411
smallThread.joinAndAssertState(TestUtils.WAIT_TIMEOUT_MILLISECONDS);
401412
TestThread thread1 =
402413
new TestThread(
403414
() -> {
404415
Thread.currentThread().interrupt();
405-
assertThrows(InterruptedException.class, () -> acquire(1999, 0, 0));
416+
assertThrows(InterruptedException.class, () -> acquire(700, 0, 0));
406417
});
407418
thread1.start();
408419
thread1.joinAndAssertState(TestUtils.WAIT_TIMEOUT_MILLISECONDS);
@@ -413,7 +424,7 @@ public void testInterruptedAcquisitionClearsResources() throws Exception {
413424
TestThread thread2 =
414425
new TestThread(
415426
() -> {
416-
ResourceHandle handle = acquire(1999, 0, 0);
427+
ResourceHandle handle = acquire(700, 0, 0);
417428
release(handle);
418429
});
419430
thread2.start();
@@ -817,7 +828,8 @@ public void testCPULoadScheduling_cantAcquireX3Cpu() throws Exception {
817828
assertThat(e).hasCauseThat().hasMessageThat().contains("is still alive");
818829
}
819830

820-
synchronized boolean isAvailable(ResourceManager rm, double ram, double cpu, int localTestCount) {
831+
synchronized boolean isAvailable(ResourceManager rm, double ram, double cpu, int localTestCount)
832+
throws UserExecException {
821833
return rm.areResourcesAvailable(ResourceSet.create(ram, cpu, localTestCount));
822834
}
823835

0 commit comments

Comments
 (0)