Skip to content

Commit e8ea2ff

Browse files
authored
When a pending item lost its executor, it should be rescheduled (#10756)
2 parents ea2f421 + fd8ca81 commit e8ea2ff

File tree

3 files changed

+76
-1
lines changed

3 files changed

+76
-1
lines changed

core/src/main/java/hudson/model/Executor.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
import static java.util.logging.Level.SEVERE;
3131
import static java.util.logging.Level.WARNING;
3232

33+
import com.google.common.annotations.VisibleForTesting;
3334
import edu.umd.cs.findbugs.annotations.CheckForNull;
3435
import edu.umd.cs.findbugs.annotations.NonNull;
3536
import hudson.FilePath;
@@ -330,6 +331,11 @@ private void resetWorkUnit(String reason) {
330331
}
331332
}
332333

334+
@VisibleForTesting
335+
long getStartTime() {
336+
return startTime;
337+
}
338+
333339
@Override
334340
public void run() {
335341
if (!(owner instanceof Jenkins.MasterComputer)) {

core/src/main/java/hudson/model/Queue.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1532,7 +1532,11 @@ public void maintain() {
15321532
}
15331533
p.isPending = false;
15341534
pendings.remove(p);
1535-
makeBuildable(p); // TODO whatever this is for, the return value is being ignored, so this does nothing at all
1535+
var r = makeBuildable(p);
1536+
if (r != null) {
1537+
LOGGER.fine(() -> "Executing lost runnable " + p.task.getFullDisplayName());
1538+
r.run();
1539+
}
15361540
}
15371541
}
15381542

test/src/test/java/hudson/model/QueueTest.java

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424

2525
package hudson.model;
2626

27+
import static org.awaitility.Awaitility.await;
2728
import static org.hamcrest.MatcherAssert.assertThat;
2829
import static org.hamcrest.Matchers.contains;
2930
import static org.hamcrest.Matchers.containsString;
@@ -71,8 +72,11 @@
7172
import hudson.security.Permission;
7273
import hudson.security.ProjectMatrixAuthorizationStrategy;
7374
import hudson.security.SparseACL;
75+
import hudson.slaves.ComputerLauncher;
7476
import hudson.slaves.DumbSlave;
7577
import hudson.slaves.OfflineCause;
78+
import hudson.slaves.RetentionStrategy;
79+
import hudson.slaves.SlaveComputer;
7680
import hudson.tasks.BatchFile;
7781
import hudson.tasks.BuildTrigger;
7882
import hudson.tasks.Shell;
@@ -107,6 +111,7 @@
107111
import jenkins.model.Jenkins;
108112
import jenkins.model.queue.QueueIdStrategy;
109113
import jenkins.security.QueueItemAuthenticatorConfiguration;
114+
import jenkins.util.Timer;
110115
import org.acegisecurity.acls.sid.PrincipalSid;
111116
import org.htmlunit.HttpMethod;
112117
import org.htmlunit.Page;
@@ -1329,4 +1334,64 @@ public void run() {
13291334
execute(new BuildExecution());
13301335
}
13311336
}
1337+
1338+
private static class SlowSlave extends Slave {
1339+
SlowSlave(String name, File remoteFS, ComputerLauncher launcher) throws Descriptor.FormException, IOException {
1340+
super(name, remoteFS.getAbsolutePath(), launcher);
1341+
}
1342+
1343+
@Override public Computer createComputer() {
1344+
return new SlowComputer(this);
1345+
}
1346+
}
1347+
1348+
private static class SlowComputer extends SlaveComputer {
1349+
SlowComputer(SlowSlave slave) {
1350+
super(slave);
1351+
}
1352+
1353+
@Override
1354+
public boolean isOffline() {
1355+
try {
1356+
// This delay is just big enough to allow the test to simulate a computer failure at the time we expect.
1357+
Thread.sleep(100);
1358+
} catch (InterruptedException e) {
1359+
// ignore
1360+
}
1361+
return super.isOffline();
1362+
}
1363+
}
1364+
1365+
@Test
1366+
void computerFailsJustAfterCreatingExecutor() throws Throwable {
1367+
r.jenkins.setNumExecutors(0);
1368+
var p = r.createFreeStyleProject();
1369+
p.setAssignedLabel(r.jenkins.getLabel("agent"));
1370+
Slave onlineSlave = new SlowSlave("special", new File(r.jenkins.getRootDir(), "agent-work-dirs/special"), r.createComputerLauncher(null));
1371+
onlineSlave.setLabelString("agent");
1372+
onlineSlave.setRetentionStrategy(RetentionStrategy.NOOP);
1373+
r.jenkins.addNode(onlineSlave);
1374+
r.waitOnline(onlineSlave);
1375+
1376+
var computer = onlineSlave.toComputer();
1377+
Timer.get().execute(() -> {
1378+
// Simulate a computer failure just after the executor is created
1379+
while (computer.getExecutors().get(0).getStartTime() == 0) {
1380+
try {
1381+
Thread.sleep(10);
1382+
} catch (InterruptedException e) {
1383+
// ignore
1384+
}
1385+
}
1386+
computer.disconnect(new OfflineCause.ChannelTermination(new IllegalStateException()));
1387+
});
1388+
var f = p.scheduleBuild2(0);
1389+
await().until(computer::isOffline);
1390+
Thread.sleep(1000);
1391+
assertFalse(r.jenkins.getQueue().isEmpty(), "Queue item should be back as the executor got killed before it could be picked up");
1392+
// Put the computer back online
1393+
r.waitOnline(onlineSlave);
1394+
r.assertBuildStatusSuccess(f);
1395+
assertTrue(r.jenkins.getQueue().isEmpty());
1396+
}
13321397
}

0 commit comments

Comments
 (0)