Skip to content

Commit 8aeebbd

Browse files
authored
Merge pull request #489 from Artmorse/issue-381
Terminate the instance when 404 occured.
2 parents 005441d + 181e799 commit 8aeebbd

File tree

4 files changed

+62
-15
lines changed

4 files changed

+62
-15
lines changed

src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineComputerLauncher.java

Lines changed: 43 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
package com.google.jenkins.plugins.computeengine;
1818

19+
import com.google.api.client.googleapis.json.GoogleJsonResponseException;
1920
import com.google.api.services.compute.model.AccessConfig;
2021
import com.google.api.services.compute.model.Instance;
2122
import com.google.api.services.compute.model.NetworkInterface;
@@ -40,6 +41,7 @@
4041
import java.io.PrintStream;
4142
import java.net.InetSocketAddress;
4243
import java.net.Proxy;
44+
import java.net.SocketTimeoutException;
4345
import java.util.Base64;
4446
import java.util.Optional;
4547
import java.util.logging.Level;
@@ -154,14 +156,16 @@ public void launch(SlaveComputer slaveComputer, TaskListener listener) {
154156
}
155157
if (opError != null) {
156158
LOGGER.info(String.format(
157-
"Launch failed while waiting for operation %s to complete. Operation error was %s",
159+
"Launch failed while waiting for operation %s to complete. Operation error was %s. Terminating instance.",
158160
insertOperationId, opError.getErrors().get(0).getMessage()));
161+
terminateNode(computer, listener);
159162
return;
160163
}
161164
} catch (InterruptedException e) {
162165
LOGGER.info(String.format(
163-
"Launch failed while waiting for operation %s to complete. Operation error was %s",
166+
"Launch failed while waiting for operation %s to complete. Operation error was %s. Terminating instance",
164167
insertOperationId, opError.getErrors().get(0).getMessage()));
168+
terminateNode(computer, listener);
165169
return;
166170
}
167171

@@ -214,19 +218,26 @@ public void launch(SlaveComputer slaveComputer, TaskListener listener) {
214218
launch(computer, listener);
215219
} catch (IOException ioe) {
216220
ioe.printStackTrace(listener.error(ioe.getMessage()));
217-
node = (ComputeEngineInstance) slaveComputer.getNode();
218-
if (node != null) {
219-
try {
220-
node.terminate();
221-
} catch (Exception e) {
222-
listener.error(String.format("Failed to terminate node %s", node.getDisplayName()));
223-
}
224-
}
221+
terminateNode(slaveComputer, listener);
225222
} catch (InterruptedException ie) {
226223

227224
}
228225
}
229226

227+
private static void terminateNode(SlaveComputer slaveComputer, TaskListener listener) {
228+
ComputeEngineInstance node = (ComputeEngineInstance) slaveComputer.getNode();
229+
if (node != null) {
230+
try {
231+
node.terminate();
232+
} catch (Exception e) {
233+
listener.error(String.format("Failed to terminate node %s", node.getDisplayName()));
234+
}
235+
} else {
236+
LOGGER.fine(
237+
String.format("Tried to terminate unknown node from computer %s", slaveComputer.getDisplayName()));
238+
}
239+
}
240+
230241
private boolean testCommand(
231242
ComputeEngineComputer computer,
232243
Connection conn,
@@ -343,6 +354,10 @@ protected Connection connectToSsh(ComputeEngineComputer computer, TaskListener l
343354
+ ")");
344355
}
345356
Instance instance = computer.refreshInstance();
357+
// the instance will be null when the node is terminated
358+
if (instance == null) {
359+
return null;
360+
}
346361

347362
String host = "";
348363

@@ -410,10 +425,25 @@ protected Connection connectToSsh(ComputeEngineComputer computer, TaskListener l
410425
SSH_TIMEOUT_MILLIS);
411426
logInfo(computer, listener, "Connected via SSH.");
412427
return conn;
413-
} catch (IOException e) {
428+
} catch (GoogleJsonResponseException e) {
429+
if (e.getStatusCode() == 404) {
430+
log(
431+
LOGGER,
432+
Level.SEVERE,
433+
listener,
434+
String.format("Instance %s not found. Terminating instance.", computer.getName()));
435+
terminateNode(computer, listener);
436+
}
437+
} catch (SocketTimeoutException e) {
414438
// keep retrying until SSH comes up
415-
logInfo(computer, listener, "Failed to connect via ssh: " + e.getMessage());
416-
logInfo(computer, listener, "Waiting for SSH to come up. Sleeping 5.");
439+
logInfo(computer, listener, String.format("Failed to connect via ssh: %s", e.getMessage()));
440+
logInfo(
441+
computer,
442+
listener,
443+
String.format("Waiting for SSH to come up. Sleeping %d.", SSH_SLEEP_MILLIS / 1000));
444+
Thread.sleep(SSH_SLEEP_MILLIS);
445+
} catch (IOException e) {
446+
logWarning(computer, listener, String.format("An error occured: %s", e.getMessage()));
417447
Thread.sleep(SSH_SLEEP_MILLIS);
418448
}
419449
}

src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineInstance.java

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,11 @@
1616

1717
package com.google.jenkins.plugins.computeengine;
1818

19+
import static com.google.jenkins.plugins.computeengine.ComputeEngineCloud.CLOUD_ID_LABEL_KEY;
20+
1921
import com.google.cloud.graphite.platforms.plugin.client.ComputeClient.OperationException;
2022
import com.google.common.base.Strings;
23+
import com.google.common.collect.ImmutableMap;
2124
import com.google.jenkins.plugins.computeengine.ssh.GoogleKeyCredential;
2225
import edu.umd.cs.findbugs.annotations.Nullable;
2326
import hudson.Extension;
@@ -30,6 +33,7 @@
3033
import hudson.slaves.RetentionStrategy;
3134
import java.io.IOException;
3235
import java.util.Collections;
36+
import java.util.Map;
3337
import java.util.Optional;
3438
import java.util.logging.Level;
3539
import java.util.logging.Logger;
@@ -130,9 +134,16 @@ protected void _terminate(TaskListener listener) throws IOException, Interrupted
130134
.createSnapshotSync(cloud.getProjectId(), this.zone, this.getNodeName(), createSnapshotTimeout);
131135
}
132136

133-
// If the instance is running, attempt to terminate it. This is an async call and we
137+
Map<String, String> filterLabel = ImmutableMap.of(CLOUD_ID_LABEL_KEY, cloud.getInstanceId());
138+
var instanceExistsInCloud =
139+
cloud.getClient().listInstancesWithLabel(cloud.getProjectId(), filterLabel).stream()
140+
.anyMatch(instance -> instance.getName().equals(name));
141+
142+
// If the instance exists in the cloud, attempt to terminate it. This is an async call and we
134143
// return immediately, hoping for the best.
135-
cloud.getClient().terminateInstanceAsync(cloud.getProjectId(), zone, name);
144+
if (instanceExistsInCloud) {
145+
cloud.getClient().terminateInstanceAsync(cloud.getProjectId(), zone, name);
146+
}
136147
} catch (CloudNotFoundException cnfe) {
137148
listener.error(cnfe.getMessage());
138149
} catch (OperationException oe) {

src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineLinuxLauncher.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,9 @@ private Optional<Connection> bootstrap(
8585
logInfo(computer, listener, "Authenticating as " + node.getSshUser());
8686
try {
8787
bootstrapConn = connectToSsh(computer, listener);
88+
if (bootstrapConn == null) {
89+
break;
90+
}
8891
isAuthenticated = bootstrapConn.authenticateWithPublicKey(
8992
node.getSshUser(),
9093
Secret.toString(keyCred.getPrivateKey()).toCharArray(),

src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineWindowsLauncher.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,9 @@ private Optional<Connection> bootstrap(ComputeEngineComputer computer, TaskListe
9292
logInfo(computer, listener, "Authenticating as " + node.getSshUser());
9393
try {
9494
bootstrapConn = connectToSsh(computer, listener);
95+
if (bootstrapConn == null) {
96+
break;
97+
}
9598
isAuthenticated = authenticateSSH(node.getSshUser(), windowsConfig, bootstrapConn, listener);
9699
} catch (IOException e) {
97100
logException(computer, listener, "Exception trying to authenticate", e);

0 commit comments

Comments
 (0)