Skip to content

Commit b211060

Browse files
authored
Handle Pod decorator failure by aborting the pending queue item. (#1570)
1 parent 28c157c commit b211060

File tree

9 files changed

+112
-12
lines changed

9 files changed

+112
-12
lines changed

src/main/java/org/csanchez/jenkins/plugins/kubernetes/KubernetesLauncher.java

+25-6
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
3333
import hudson.Functions;
3434
import hudson.model.Descriptor;
35+
import hudson.model.Run;
3536
import hudson.model.TaskListener;
3637
import hudson.slaves.ComputerLauncher;
3738
import hudson.slaves.JNLPLauncher;
@@ -54,6 +55,7 @@
5455
import java.util.stream.Collectors;
5556
import jenkins.metrics.api.Metrics;
5657
import org.apache.commons.lang.StringUtils;
58+
import org.csanchez.jenkins.plugins.kubernetes.pod.decorator.PodDecoratorException;
5759
import org.csanchez.jenkins.plugins.kubernetes.pod.retention.Reaper;
5860
import org.kohsuke.stapler.DataBoundConstructor;
5961

@@ -117,7 +119,20 @@ public synchronized void launch(SlaveComputer computer, TaskListener listener) {
117119
PodTemplate template = node.getTemplate();
118120
KubernetesCloud cloud = node.getKubernetesCloud();
119121
KubernetesClient client = cloud.connect();
120-
Pod pod = template.build(node);
122+
Pod pod;
123+
try {
124+
pod = template.build(node);
125+
} catch (PodDecoratorException e) {
126+
Run<?, ?> run = template.getRun();
127+
if (run != null) {
128+
template.getListener().getLogger().println("Failed to build pod definition : " + e.getMessage());
129+
PodUtils.cancelQueueItemFor(run.getUrl(), template.getLabel(), e.getMessage(), null);
130+
}
131+
e.printStackTrace(listener.fatalError("Failed to build pod definition"));
132+
setProblem(e);
133+
terminateOrLog(node);
134+
return;
135+
}
121136
node.assignPod(pod);
122137

123138
String podName = pod.getMetadata().getName();
@@ -305,15 +320,19 @@ public synchronized void launch(SlaveComputer computer, TaskListener listener) {
305320
String.format("Error in provisioning; agent=%s, template=%s", node, node.getTemplateId()),
306321
ex);
307322
LOGGER.log(Level.FINER, "Removing Jenkins node: {0}", node.getNodeName());
308-
try {
309-
node.terminate();
310-
} catch (IOException | InterruptedException e) {
311-
LOGGER.log(Level.WARNING, "Unable to remove Jenkins node", e);
312-
}
323+
terminateOrLog(node);
313324
throw new RuntimeException(ex);
314325
}
315326
}
316327

328+
private static void terminateOrLog(KubernetesSlave node) {
329+
try {
330+
node.terminate();
331+
} catch (IOException | InterruptedException e) {
332+
LOGGER.log(Level.WARNING, "Unable to remove Jenkins node", e);
333+
}
334+
}
335+
317336
private void checkTerminatedContainers(
318337
List<ContainerStatus> terminatedContainers,
319338
String podId,

src/main/java/org/csanchez/jenkins/plugins/kubernetes/KubernetesSlave.java

+6-2
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
import edu.umd.cs.findbugs.annotations.CheckForNull;
66
import edu.umd.cs.findbugs.annotations.NonNull;
7+
import edu.umd.cs.findbugs.annotations.Nullable;
78
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
89
import hudson.Extension;
910
import hudson.FilePath;
@@ -221,7 +222,7 @@ public void setNamespace(@NonNull String namespace) {
221222
this.namespace = namespace;
222223
}
223224

224-
@NonNull
225+
@Nullable
225226
public String getNamespace() {
226227
return namespace;
227228
}
@@ -423,7 +424,10 @@ name, getPodRetention(cloud)
423424
listener.getLogger().println(msg);
424425
}
425426

426-
private void deleteSlavePod(TaskListener listener, KubernetesClient client) throws IOException {
427+
private void deleteSlavePod(TaskListener listener, KubernetesClient client) {
428+
if (getNamespace() == null) {
429+
return;
430+
}
427431
try {
428432
boolean deleted = client.pods()
429433
.inNamespace(getNamespace())

src/main/java/org/csanchez/jenkins/plugins/kubernetes/PodTemplate.java

+17
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import hudson.model.DescriptorVisibilityFilter;
1111
import hudson.model.Label;
1212
import hudson.model.Node;
13+
import hudson.model.Run;
1314
import hudson.model.Saveable;
1415
import hudson.model.TaskListener;
1516
import hudson.model.labels.LabelAtom;
@@ -80,6 +81,13 @@ public class PodTemplate extends AbstractDescribableImpl<PodTemplate> implements
8081
public static final String JENKINS_LABEL = "jenkins/label";
8182
public static final String JENKINS_LABEL_DIGEST = "jenkins/label-digest";
8283

84+
/**
85+
* The run this pod template is associated with.
86+
* Only applicable to pod templates defined by the `podTemplate` step.
87+
*/
88+
@CheckForNull
89+
private transient Run<?, ?> run;
90+
8391
/**
8492
* Digest function that is used to compute the kubernetes label "jenkins/label-digest"
8593
* Not used for security.
@@ -1059,6 +1067,15 @@ public void setReadonlyFromUi(boolean readonlyFromUi) {
10591067
this.readonlyFromUi = readonlyFromUi;
10601068
}
10611069

1070+
public void setRun(Run<?, ?> run) {
1071+
this.run = run;
1072+
}
1073+
1074+
@CheckForNull
1075+
public Run<?, ?> getRun() {
1076+
return run;
1077+
}
1078+
10621079
@Extension
10631080
public static class DescriptorImpl extends Descriptor<PodTemplate> {
10641081

src/main/java/org/csanchez/jenkins/plugins/kubernetes/PodUtils.java

+14-3
Original file line numberDiff line numberDiff line change
@@ -102,13 +102,20 @@ public static void cancelQueueItemFor(Pod pod, String reason) {
102102
LOGGER.log(Level.FINE, () -> "Pod " + podDisplayName + " .metadata.labels is null");
103103
return;
104104
}
105-
var jenkinsLabel = labels.get(PodTemplate.JENKINS_LABEL);
105+
cancelQueueItemFor(runUrl, labels.get(PodTemplate.JENKINS_LABEL), reason, podDisplayName);
106+
}
107+
108+
public static void cancelQueueItemFor(
109+
@NonNull String runUrl,
110+
@NonNull String label,
111+
@CheckForNull String reason,
112+
@CheckForNull String podDisplayName) {
106113
var queue = Jenkins.get().getQueue();
107114
Arrays.stream(queue.getItems())
108115
.filter(item -> item.getTask().getUrl().equals(runUrl))
109116
.filter(item -> Optional.ofNullable(item.getAssignedLabel())
110117
.map(Label::getName)
111-
.map(name -> PodTemplateUtils.sanitizeLabel(name).equals(jenkinsLabel))
118+
.map(name -> PodTemplateUtils.sanitizeLabel(name).equals(label))
112119
.orElse(false))
113120
.findFirst()
114121
.ifPresentOrElse(
@@ -119,7 +126,11 @@ public static void cancelQueueItemFor(Pod pod, String reason) {
119126
+ (!StringUtils.isBlank(reason) ? "due to " + reason : ""));
120127
queue.cancel(item);
121128
},
122-
() -> LOGGER.log(Level.FINE, () -> "No queue item found for pod " + podDisplayName));
129+
() -> {
130+
if (podDisplayName != null) {
131+
LOGGER.log(Level.FINE, () -> "No queue item found for pod " + podDisplayName);
132+
}
133+
});
123134
}
124135

125136
@CheckForNull

src/main/java/org/csanchez/jenkins/plugins/kubernetes/pipeline/PodTemplateStepExecution.java

+2
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@ public boolean start() throws Exception {
132132
newTemplate.getAnnotations().add(new PodAnnotation(POD_ANNOTATION_BUILD_URL, url + run.getUrl()));
133133
newTemplate.getAnnotations().add(new PodAnnotation(POD_ANNOTATION_RUN_URL, run.getUrl()));
134134
}
135+
newTemplate.setRun(run);
135136
}
136137
newTemplate.setImagePullSecrets(step.getImagePullSecrets().stream()
137138
.map(x -> new PodImagePullSecret(x))
@@ -240,6 +241,7 @@ public void onResume() {
240241
KubernetesCloud cloud = resolveCloud(cloudName);
241242
TaskListener listener = getContext().get(TaskListener.class);
242243
newTemplate.setListener(listener);
244+
newTemplate.setRun(getContext().get(Run.class));
243245
LOGGER.log(Level.FINE, "Re-registering template with id=" + newTemplate.getId() + " after resume");
244246
if (VERBOSE) {
245247
listener.getLogger()

src/main/java/org/csanchez/jenkins/plugins/kubernetes/pod/decorator/PodDecorator.java

+8-1
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,15 @@
1111
*/
1212
public interface PodDecorator extends ExtensionPoint {
1313

14+
/**
15+
* Goes through all the {@link PodDecorator} extensions and decorates the pod.
16+
* @param kubernetesCloud The cloud this pod will be scheduled as context.
17+
* @param pod the initial pod definition before decoration.
18+
* @return The modified pod definition.
19+
* @throws PodDecoratorException Should any of the decorators fail to decorate the pod.
20+
*/
1421
@NonNull
15-
static Pod decorateAll(@NonNull KubernetesCloud kubernetesCloud, @NonNull Pod pod) {
22+
static Pod decorateAll(@NonNull KubernetesCloud kubernetesCloud, @NonNull Pod pod) throws PodDecoratorException {
1623
for (PodDecorator decorator : ExtensionList.lookup(PodDecorator.class)) {
1724
pod = decorator.decorate(kubernetesCloud, pod);
1825
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
package org.csanchez.jenkins.plugins.kubernetes.pod.decorator;
2+
3+
/**
4+
* A fatal exception raised by a {@link PodDecorator} implementation.
5+
*/
6+
public class PodDecoratorException extends RuntimeException {
7+
public PodDecoratorException(String message) {
8+
super(message);
9+
}
10+
11+
public PodDecoratorException(String message, Throwable cause) {
12+
super(message, cause);
13+
}
14+
}

src/test/java/org/csanchez/jenkins/plugins/kubernetes/pipeline/KubernetesPipelineTest.java

+21
Original file line numberDiff line numberDiff line change
@@ -73,13 +73,16 @@
7373
import jenkins.metrics.api.Metrics;
7474
import jenkins.model.Jenkins;
7575
import org.csanchez.jenkins.plugins.kubernetes.GarbageCollection;
76+
import org.csanchez.jenkins.plugins.kubernetes.KubernetesCloud;
7677
import org.csanchez.jenkins.plugins.kubernetes.KubernetesComputer;
7778
import org.csanchez.jenkins.plugins.kubernetes.KubernetesSlave;
7879
import org.csanchez.jenkins.plugins.kubernetes.KubernetesTestUtil;
7980
import org.csanchez.jenkins.plugins.kubernetes.MetricNames;
8081
import org.csanchez.jenkins.plugins.kubernetes.PodAnnotation;
8182
import org.csanchez.jenkins.plugins.kubernetes.PodTemplate;
8283
import org.csanchez.jenkins.plugins.kubernetes.PodTemplateUtils;
84+
import org.csanchez.jenkins.plugins.kubernetes.pod.decorator.PodDecorator;
85+
import org.csanchez.jenkins.plugins.kubernetes.pod.decorator.PodDecoratorException;
8386
import org.hamcrest.MatcherAssert;
8487
import org.htmlunit.html.DomNodeUtil;
8588
import org.htmlunit.html.HtmlElement;
@@ -90,6 +93,7 @@
9093
import org.jenkinsci.plugins.workflow.job.WorkflowRun;
9194
import org.jenkinsci.plugins.workflow.steps.durable_task.DurableTaskStep;
9295
import org.jenkinsci.plugins.workflow.test.steps.SemaphoreStep;
96+
import org.jetbrains.annotations.NotNull;
9397
import org.junit.After;
9498
import org.junit.Assert;
9599
import org.junit.Before;
@@ -103,6 +107,7 @@
103107
import org.jvnet.hudson.test.JenkinsRuleNonLocalhost;
104108
import org.jvnet.hudson.test.LoggerRule;
105109
import org.jvnet.hudson.test.MockAuthorizationStrategy;
110+
import org.jvnet.hudson.test.TestExtension;
106111

107112
public class KubernetesPipelineTest extends AbstractKubernetesPipelineTest {
108113

@@ -928,4 +933,20 @@ public void handleEviction() throws Exception {
928933
SemaphoreStep.success("pod/2", null);
929934
r.assertBuildStatusSuccess(r.waitForCompletion(b));
930935
}
936+
937+
@Test
938+
public void decoratorFailure() throws Exception {
939+
r.assertBuildStatus(Result.ABORTED, r.waitForCompletion(b));
940+
r.assertLogContains("I always fail", b);
941+
assertThat("Node should have been removed", r.jenkins.getNodes(), empty());
942+
}
943+
944+
@TestExtension("decoratorFailure")
945+
public static class DecoratorImpl implements PodDecorator {
946+
@NotNull
947+
@Override
948+
public Pod decorate(@NotNull KubernetesCloud kubernetesCloud, @NotNull Pod pod) {
949+
throw new PodDecoratorException("I always fail");
950+
}
951+
}
931952
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
podTemplate {
2+
node(POD_LABEL) {
3+
sh 'true'
4+
}
5+
}

0 commit comments

Comments
 (0)