Skip to content

Commit 41b3ef1

Browse files
authored
Handle pods that have been evicted via the eviction API. (#1560)
1 parent 234076a commit 41b3ef1

File tree

5 files changed

+104
-37
lines changed

5 files changed

+104
-37
lines changed

kind.sh

+2-2
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,12 @@ cd $(dirname $0)
55
export PATH=$WORKSPACE_TMP:$PATH
66
if [ \! -x "$WORKSPACE_TMP/kind" ]
77
then
8-
curl -sLo "$WORKSPACE_TMP/kind" https://github.com/kubernetes-sigs/kind/releases/download/v0.17.0/kind-$(uname | tr '[:upper:]' '[:lower:]')-amd64
8+
curl -sLo "$WORKSPACE_TMP/kind" https://github.com/kubernetes-sigs/kind/releases/download/v0.23.0/kind-$(uname | tr '[:upper:]' '[:lower:]')-amd64
99
chmod +x "$WORKSPACE_TMP/kind"
1010
fi
1111
if [ \! -x "$WORKSPACE_TMP/kubectl" ]
1212
then
13-
curl -sLo "$WORKSPACE_TMP/kubectl" https://storage.googleapis.com/kubernetes-release/release/v1.25.4/bin/$(uname | tr '[:upper:]' '[:lower:]')/amd64/kubectl
13+
curl -sLo "$WORKSPACE_TMP/kubectl" https://storage.googleapis.com/kubernetes-release/release/v1.30.1/bin/$(uname | tr '[:upper:]' '[:lower:]')/amd64/kubectl
1414
chmod +x "$WORKSPACE_TMP/kubectl"
1515
fi
1616

src/main/java/org/csanchez/jenkins/plugins/kubernetes/pod/retention/Reaper.java

+70-29
Original file line numberDiff line numberDiff line change
@@ -437,31 +437,39 @@ public void onEvent(
437437

438438
List<ContainerStatus> terminatedContainers = PodUtils.getTerminatedContainers(pod);
439439
if (!terminatedContainers.isEmpty()) {
440-
String ns = pod.getMetadata().getNamespace();
441-
String name = pod.getMetadata().getName();
442-
TaskListener runListener = node.getRunListener();
443440
List<String> containers = new ArrayList<>();
444441
terminatedContainers.forEach(c -> {
445442
ContainerStateTerminated t = c.getState().getTerminated();
446443
String containerName = c.getName();
447444
containers.add(containerName);
448-
LOGGER.info(() -> ns + "/" + name + " Container " + containerName
449-
+ " was just terminated, so removing the corresponding Jenkins agent");
450445
String reason = t.getReason();
451-
runListener
452-
.getLogger()
453-
.printf(
454-
"%s/%s Container %s was terminated (Exit Code: %d, Reason: %s)%n",
455-
ns, name, containerName, t.getExitCode(), reason);
456446
if (reason != null) {
457447
terminationReasons.add(reason);
458448
}
459449
});
460-
461-
logLastLinesThenTerminateNode(node, pod, runListener);
462-
PodUtils.cancelQueueItemFor(pod, "ContainerError");
463-
disconnectComputer(
450+
String reason = pod.getStatus().getReason();
451+
String message = pod.getStatus().getMessage();
452+
var sb = new StringBuilder()
453+
.append(pod.getMetadata().getNamespace())
454+
.append("/")
455+
.append(pod.getMetadata().getName());
456+
if (containers.size() > 1) {
457+
sb.append(" Containers ")
458+
.append(String.join(",", containers))
459+
.append(" were terminated.");
460+
} else {
461+
sb.append(" Container ")
462+
.append(String.join(",", containers))
463+
.append(" was terminated.");
464+
}
465+
logAndCleanUp(
464466
node,
467+
pod,
468+
terminationReasons,
469+
reason,
470+
message,
471+
sb,
472+
node.getRunListener(),
465473
new PodOfflineCause(Messages._PodOfflineCause_ContainerFailed("ContainerError", containers)));
466474
}
467475
}
@@ -481,27 +489,60 @@ public void onEvent(
481489
}
482490

483491
if ("Failed".equals(pod.getStatus().getPhase())) {
484-
String ns = pod.getMetadata().getNamespace();
485-
String name = pod.getMetadata().getName();
486-
TaskListener runListener = node.getRunListener();
487492
String reason = pod.getStatus().getReason();
488493
String message = pod.getStatus().getMessage();
489-
LOGGER.info(
490-
() -> ns + "/" + name + " Pod just failed. Removing the corresponding Jenkins agent. Reason: "
491-
+ reason + ", Message: " + message);
492-
runListener
493-
.getLogger()
494-
.printf("%s/%s Pod just failed (Reason: %s, Message: %s)%n", ns, name, reason, message);
495-
if (reason != null) {
496-
terminationReasons.add(reason);
497-
}
498-
499-
logLastLinesThenTerminateNode(node, pod, runListener);
500-
disconnectComputer(node, new PodOfflineCause(Messages._PodOfflineCause_PodFailed(reason, message)));
494+
logAndCleanUp(
495+
node,
496+
pod,
497+
terminationReasons,
498+
reason,
499+
message,
500+
new StringBuilder()
501+
.append(pod.getMetadata().getNamespace())
502+
.append("/")
503+
.append(pod.getMetadata().getName())
504+
.append(" Pod just failed."),
505+
node.getRunListener(),
506+
new PodOfflineCause(Messages._PodOfflineCause_PodFailed(reason, message)));
501507
}
502508
}
503509
}
504510

511+
private static void logAndCleanUp(
512+
KubernetesSlave node,
513+
Pod pod,
514+
Set<String> terminationReasons,
515+
String reason,
516+
String message,
517+
StringBuilder sb,
518+
TaskListener runListener,
519+
PodOfflineCause cause)
520+
throws IOException, InterruptedException {
521+
List<String> details = new ArrayList<>();
522+
if (reason != null) {
523+
details.add("Reason: " + reason);
524+
terminationReasons.add(reason);
525+
}
526+
if (message != null) {
527+
details.add("Message: " + message);
528+
}
529+
if (!details.isEmpty()) {
530+
sb.append(" ").append(String.join(", ", details)).append(".");
531+
}
532+
var evictionCondition = pod.getStatus().getConditions().stream()
533+
.filter(c -> "EvictionByEvictionAPI".equals(c.getReason()))
534+
.findFirst();
535+
if (evictionCondition.isPresent()) {
536+
sb.append(" Pod was evicted by the Kubernetes Eviction API.");
537+
terminationReasons.add(evictionCondition.get().getReason());
538+
}
539+
LOGGER.info(() -> sb + " Removing corresponding node " + node.getNodeName() + " from Jenkins.");
540+
runListener.getLogger().println(sb);
541+
logLastLinesThenTerminateNode(node, pod, runListener);
542+
PodUtils.cancelQueueItemFor(pod, "PodFailure");
543+
disconnectComputer(node, cause);
544+
}
545+
505546
private static void logLastLinesThenTerminateNode(KubernetesSlave node, Pod pod, TaskListener runListener)
506547
throws IOException, InterruptedException {
507548
try {

src/test/java/org/csanchez/jenkins/plugins/kubernetes/pipeline/KubernetesPipelineTest.java

+21-6
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,8 @@
107107
public class KubernetesPipelineTest extends AbstractKubernetesPipelineTest {
108108

109109
private static final Logger LOGGER = Logger.getLogger(KubernetesPipelineTest.class.getName());
110+
public static final String POD_DEADLINE_EXCEEDED_MESSAGE =
111+
"Pod just failed. Reason: DeadlineExceeded, Message: Pod was active on the node longer than the specified deadline.";
110112

111113
@Rule
112114
public TemporaryFolder tmp = new TemporaryFolder();
@@ -575,9 +577,7 @@ public void errorPod() throws Exception {
575577
@Test
576578
public void podDeadlineExceeded() throws Exception {
577579
r.assertBuildStatus(Result.ABORTED, r.waitForCompletion(b));
578-
r.waitForMessage(
579-
"Pod just failed (Reason: DeadlineExceeded, Message: Pod was active on the node longer than the specified deadline)",
580-
b);
580+
r.waitForMessage(POD_DEADLINE_EXCEEDED_MESSAGE, b);
581581
}
582582

583583
@Test
@@ -587,9 +587,7 @@ public void podDeadlineExceededGlobalTemplate() throws Exception {
587587
podTemplate.setActiveDeadlineSeconds(30);
588588
cloud.addTemplate(podTemplate);
589589
r.assertBuildStatus(Result.ABORTED, r.waitForCompletion(b));
590-
r.waitForMessage(
591-
"Pod just failed (Reason: DeadlineExceeded, Message: Pod was active on the node longer than the specified deadline)",
592-
b);
590+
r.waitForMessage(POD_DEADLINE_EXCEEDED_MESSAGE, b);
593591
r.waitForMessage("---Logs---", b);
594592
}
595593

@@ -913,4 +911,21 @@ public void garbageCollection() throws Exception {
913911
await().timeout(1, TimeUnit.MINUTES)
914912
.until(() -> client.resource(finalPod).get() == null);
915913
}
914+
915+
@Test
916+
public void handleEviction() throws Exception {
917+
SemaphoreStep.waitForStart("pod/1", b);
918+
var client = cloud.connect();
919+
var pod = client.pods()
920+
.withLabels(getLabels(cloud, this, name))
921+
.list()
922+
.getItems()
923+
.get(0);
924+
client.pods().resource(pod).evict();
925+
r.waitForMessage("Pod was evicted by the Kubernetes Eviction API", b);
926+
SemaphoreStep.success("pod/1", null);
927+
SemaphoreStep.waitForStart("pod/2", b);
928+
SemaphoreStep.success("pod/2", null);
929+
r.assertBuildStatusSuccess(r.waitForCompletion(b));
930+
}
916931
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
podTemplate {
2+
retry(count: 2, conditions: [kubernetesAgent()]) {
3+
node(POD_LABEL) {
4+
semaphore 'pod'
5+
sh 'sleep 1'
6+
}
7+
}
8+
}

test-in-k8s.yaml

+3
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,9 @@ rules:
6464
- apiGroups: [""]
6565
resources: ["pods/log"]
6666
verbs: ["get","list","watch"]
67+
- apiGroups: [""]
68+
resources: ["pods/eviction"]
69+
verbs: ["create"]
6770
- apiGroups: [""]
6871
resources: ["events"]
6972
verbs: ["watch"]

0 commit comments

Comments
 (0)