Skip to content

Commit 7fd205d

Browse files
authored
Retry SSH connect with configurable max attempts; update dependencies (#70)
1 parent 6087c2b commit 7fd205d

File tree

4 files changed

+109
-33
lines changed

4 files changed

+109
-33
lines changed

Cilicon.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved

Lines changed: 33 additions & 24 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cilicon/Config/Config.swift

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ struct Config: Codable {
1212
autoTransferImageVolume: String? = nil,
1313
retryDelay: Int,
1414
sshCredentials: SSHCredentials,
15+
sshConnectMaxRetries: Int,
1516
preRun: String? = nil,
1617
postRun: String? = nil,
1718
consoleDevices: [String] = []
@@ -25,6 +26,7 @@ struct Config: Codable {
2526
self.runnerName = runnerName
2627
self.retryDelay = retryDelay
2728
self.sshCredentials = sshCredentials
29+
self.sshConnectMaxRetries = sshConnectMaxRetries
2830
self.preRun = preRun
2931
self.postRun = postRun
3032
self.consoleDevices = consoleDevices
@@ -50,6 +52,8 @@ struct Config: Codable {
5052
let retryDelay: Int
5153
/// Credentials to be used when connecting via SSH.
5254
let sshCredentials: SSHCredentials
55+
/// Maximum number of retries for SSH connection attempts.
56+
let sshConnectMaxRetries: Int
5357
/// A command to run before the provisioning commands are run.
5458
let preRun: String?
5559
/// A command to run after the provisioning commands are run.
@@ -67,6 +71,7 @@ struct Config: Codable {
6771
case runnerName
6872
case retryDelay
6973
case sshCredentials
74+
case sshConnectMaxRetries
7075
case preRun
7176
case postRun
7277
case consoleDevices
@@ -85,6 +90,7 @@ struct Config: Codable {
8590
self.runnerName = try container.decodeIfPresent(String.self, forKey: .runnerName)
8691
self.retryDelay = try container.decodeIfPresent(Int.self, forKey: .retryDelay) ?? 5
8792
self.sshCredentials = try container.decodeIfPresent(SSHCredentials.self, forKey: .sshCredentials) ?? .default
93+
self.sshConnectMaxRetries = try container.decodeIfPresent(Int.self, forKey: .sshConnectMaxRetries) ?? 10
8894
self.preRun = try container.decodeIfPresent(String.self, forKey: .preRun)
8995
self.postRun = try container.decodeIfPresent(String.self, forKey: .postRun)
9096
self.consoleDevices = try container.decodeIfPresent([String].self, forKey: .consoleDevices) ?? []

Cilicon/VMManager.swift

Lines changed: 58 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -115,12 +115,8 @@ class VMManager: NSObject, ObservableObject {
115115
vmState = .running(virtualMachine)
116116
self.ip = try await fetchIP(macAddress: clonedBundle.configuration.macAddress.string)
117117

118-
let client = try await SSHClient.connect(
119-
host: ip,
120-
authenticationMethod: .passwordBased(username: config.sshCredentials.username, password: config.sshCredentials.password),
121-
hostKeyValidator: .acceptAnything(),
122-
reconnect: .always
123-
)
118+
// Wait for VM to fully boot and can execute SSH commands before proceeding
119+
let client = try await createAndConnectSSHClient(ip: ip)
124120

125121
if let preRun = config.preRun {
126122
let streamOutput = try await client.executeCommandStream(preRun, inShell: true)
@@ -161,7 +157,59 @@ class VMManager: NSObject, ObservableObject {
161157
}
162158
}
163159

164-
func provisionVM() async throws { }
160+
/// Creates and connects an SSH client to the given IP address, retrying until successful or a timeout occurs.
161+
@MainActor
162+
private func createAndConnectSSHClient(ip: String) async throws -> SSHClient {
163+
SSHLogger.shared.log(string: "Waiting for VM to boot and SSH to be available...\n")
164+
let maxRetries = config.sshConnectMaxRetries
165+
var tries = 0
166+
167+
while tries < maxRetries {
168+
do {
169+
let client = try await SSHClient.connect(
170+
host: ip,
171+
authenticationMethod: .passwordBased(
172+
username: config.sshCredentials.username,
173+
password: config.sshCredentials.password
174+
),
175+
hostKeyValidator: .acceptAnything(),
176+
reconnect: .never,
177+
connectTimeout: .seconds(5)
178+
)
179+
180+
// Test if we can execute a simple command
181+
let token = "ssh-connected"
182+
let streamOutput = try await client.executeCommandStream("echo \(token)", inShell: true)
183+
var commandSuccessful = false
184+
185+
for try await blob in streamOutput {
186+
switch blob {
187+
case let .stdout(stdout):
188+
let output = String(buffer: stdout)
189+
if output.contains(token) {
190+
commandSuccessful = true
191+
}
192+
case .stderr:
193+
break
194+
}
195+
}
196+
197+
if commandSuccessful {
198+
SSHLogger.shared.log(string: "VM fully booted and SSH available\n")
199+
return client
200+
}
201+
202+
try await client.close()
203+
} catch {
204+
// SSH not ready yet, continue waiting
205+
tries += 1
206+
SSHLogger.shared.log(string: "SSH connect \(tries)/\(maxRetries): SSH not ready, waiting 5s...\n")
207+
try await Task.sleep(for: .seconds(5))
208+
}
209+
}
210+
211+
throw VMManagerError.sshConnectTimeout
212+
}
165213

166214
func isBundleComplete() throws -> Bool {
167215
let filesExist = [
@@ -313,6 +361,7 @@ extension VMManager {
313361
enum VMManagerError: Error {
314362
case masterBundleNotFound(path: String)
315363
case failedToCreateDiskFile
364+
case sshConnectTimeout
316365
}
317366

318367
extension VMManagerError: LocalizedError {
@@ -322,6 +371,8 @@ extension VMManagerError: LocalizedError {
322371
return "Could not found bundle at \(path)"
323372
case .failedToCreateDiskFile:
324373
return "Failed to create Disk File"
374+
case .sshConnectTimeout:
375+
return "SSH Connect timeout"
325376
}
326377
}
327378
}

README.md

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,8 @@ provisioner:
105105
downloadURL: <DOWNLOAD_URL> # defaults to GitLab official S3 bucket
106106
tomlPath: <PATH_TO_TOML> # defaults to `nil`. If set, it ignores the other runner related variables and passes the specified path to the runner executable
107107
consoleDevices:
108-
- tart-version-cilicon
108+
- tart-version-2
109+
sshConnectMaxRetries: <SSH_CONNECT_MAX_RETRIES> # defaults to 10
109110
```
110111
111112
#### Buildkite Agent
@@ -142,7 +143,16 @@ To add console devices, use the `consoleDevices` field in your configuration:
142143

143144
```yml
144145
consoleDevices:
145-
- tart-version-cilicon
146+
- tart-version-2
147+
```
148+
149+
#### SSH Connect Retries
150+
After the VM starts, Cilicon connects to the guest over SSH to run any preRun/postRun commands and the provisioner. Some images may take a while before the SSH service is ready. Use `sshConnectMaxRetries` to control how many connection attempts Cilicon will make before giving up (default: 10).
151+
152+
Example:
153+
154+
```yml
155+
sshConnectMaxRetries: 20
146156
```
147157

148158
### 🔨 Setting Up the Host OS

0 commit comments

Comments
 (0)