Skip to content

Commit 0d72896

Browse files
authored
YARN-11753. Ensure NM is marked unhealthy if the ProcessBuilder reports an issue with the container-executor (#7290)
1 parent 9bf5e38 commit 0d72896

File tree

2 files changed

+38
-30
lines changed

2 files changed

+38
-30
lines changed

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java

+4-2
Original file line numberDiff line numberDiff line change
@@ -466,10 +466,12 @@ public void startLocalizer(LocalizerStartContext ctx)
466466
Throwable cause = e.getCause() != null ? e.getCause() : e;
467467
if (cause instanceof IOException) {
468468
IOException io = (IOException) cause;
469-
if (io.getMessage().contains("No such file or directory")) {
469+
String containerExecutorPath = getContainerExecutorExecutablePath(conf);
470+
if (io.getMessage() != null && io.getMessage().contains("Cannot run program \"" +
471+
containerExecutorPath + "\"")) {
470472
throw new ConfigurationException("Application " + appId + " initialization failed" +
471473
"(exitCode=" + exitCode + "). Container executor not found at "
472-
+ getContainerExecutorExecutablePath(conf), e);
474+
+ containerExecutorPath, e);
473475
}
474476
}
475477

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutorWithMocks.java

+34-28
Original file line numberDiff line numberDiff line change
@@ -628,15 +628,17 @@ protected PrivilegedOperationExecutor getPrivilegedOperationExecutor() {
628628
when(context.getEnvironment()).thenReturn(env);
629629
Path workDir = new Path("/tmp");
630630

631+
LocalizerStartContext lsc = new LocalizerStartContext.Builder()
632+
.setNmPrivateContainerTokens(nmPrivateCTokensPath)
633+
.setNmAddr(address)
634+
.setUser(appSubmitter)
635+
.setAppId(appId.toString())
636+
.setLocId("12345")
637+
.setDirsHandler(dirService)
638+
.build();
639+
631640
try {
632-
lce.startLocalizer(new LocalizerStartContext.Builder()
633-
.setNmPrivateContainerTokens(nmPrivateCTokensPath)
634-
.setNmAddr(address)
635-
.setUser(appSubmitter)
636-
.setAppId(appId.toString())
637-
.setLocId("12345")
638-
.setDirsHandler(dirService)
639-
.build());
641+
lce.startLocalizer(lsc);
640642
Assert.fail("startLocalizer should have thrown an exception");
641643
} catch (IOException e) {
642644
assertTrue("Unexpected exception " + e,
@@ -648,50 +650,54 @@ protected PrivilegedOperationExecutor getPrivilegedOperationExecutor() {
648650
LinuxContainerExecutor.ExitCode.INVALID_CONFIG_FILE.getExitCode(),
649651
};
650652

651-
for (int i = 0; i < exitCodesToThrow.length; i++) {
652-
int exitCode = exitCodesToThrow[i];
653+
for (int exitCode : exitCodesToThrow) {
653654
doThrow(new PrivilegedOperationException("invalid config", exitCode, null, null))
654655
.when(spyPrivilegedExecutor).executePrivilegedOperation(
655656
any(), any(PrivilegedOperation.class),
656657
any(), any(), anyBoolean(), anyBoolean());
657658

658659
try {
659-
lce.startLocalizer(new LocalizerStartContext.Builder()
660-
.setNmPrivateContainerTokens(nmPrivateCTokensPath)
661-
.setNmAddr(address)
662-
.setUser(appSubmitter)
663-
.setAppId(appId.toString())
664-
.setLocId("12345")
665-
.setDirsHandler(dirService)
666-
.build());
660+
lce.startLocalizer(lsc);
667661
Assert.fail("startLocalizer should have thrown a ConfigurationException");
668662
} catch (ConfigurationException e) {
669663
assertTrue("Unexpected exception " + e,
670664
e.getMessage().contains("exitCode=" + exitCode));
671665
}
672666
}
673667

668+
// Assert that we do catch an IOException thrown by the ProcessBuilder.start
669+
// method as a misconfiguration
670+
String containerExecutorPath = lce.getContainerExecutorExecutablePath(conf);
674671
doThrow(new PrivilegedOperationException("IO error",
675-
new IOException("No such file or directory")))
672+
new IOException("Cannot run program \""+ containerExecutorPath + "\"")))
676673
.when(spyPrivilegedExecutor).executePrivilegedOperation(
677674
any(), any(PrivilegedOperation.class),
678675
any(), any(), anyBoolean(), anyBoolean());
679676

680677
try {
681-
lce.startLocalizer(new LocalizerStartContext.Builder()
682-
.setNmPrivateContainerTokens(nmPrivateCTokensPath)
683-
.setNmAddr(address)
684-
.setUser(appSubmitter)
685-
.setAppId(appId.toString())
686-
.setLocId("12345")
687-
.setDirsHandler(dirService)
688-
.build());
689-
Assert.fail("startLocalizer should have thrown a ConfigurationException");
678+
lce.startLocalizer(lsc);
679+
Assert.fail("startLocalizer should have thrown an ConfigurationException");
690680
} catch (ConfigurationException e) {
691681
assertTrue("Unexpected exception " + e,
692682
e.getMessage().contains("Container executor not found"));
693683
}
694684

685+
// Assert that we do not catch every IOException as a misconfiguration
686+
doThrow(new PrivilegedOperationException("IO error",
687+
new IOException("No such file or directory")))
688+
.when(spyPrivilegedExecutor).executePrivilegedOperation(
689+
any(), any(PrivilegedOperation.class),
690+
any(), any(), anyBoolean(), anyBoolean());
691+
692+
try {
693+
lce.startLocalizer(lsc);
694+
Assert.fail("startLocalizer should have thrown an IOException");
695+
} catch (ConfigurationException e) {
696+
Assert.fail("startLocalizer should not have thrown a ConfigurationException");
697+
} catch (IOException e) {
698+
assertTrue("Unexpected exception " + e,
699+
e.getMessage().contains("exitCode"));
700+
}
695701

696702
doThrow(new PrivilegedOperationException("interrupted"))
697703
.when(spyPrivilegedExecutor).executePrivilegedOperation(

0 commit comments

Comments
 (0)