Skip to content

Commit ea037e3

Browse files
committed
Add port machine restart primitive
Expose `port_runtime::restart_machine` and a matching CLI subcommand that stops and immediately relaunches a Port-managed machine under the same runtime root. The rootfs overlay (and therefore in-VM state — etcd, kubelet/containerd state, pulled images) is preserved across the swap, so a guest-image change between stop and relaunch is picked up without losing state. This is the building block for rolling guest-image upgrades — for example, K3s patch bumps where the binary lives in the read-only base image and the data dirs live in the per-machine overlay. Higher layers (infra) drive cordon/drain orchestration on top of this primitive.
1 parent 10f4c9f commit ea037e3

2 files changed

Lines changed: 128 additions & 0 deletions

File tree

crates/port-cli/src/lib.rs

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -425,6 +425,19 @@ pub enum MachineCommand {
425425
#[arg(long, default_value_t = 3)]
426426
wait_secs: u64,
427427
},
428+
#[command(
429+
about = "Restart a Port-managed machine in place, preserving the rootfs overlay so in-VM state survives the swap"
430+
)]
431+
Restart {
432+
#[arg(long)]
433+
machine: String,
434+
#[arg(long, default_value = "runtime")]
435+
runtime_root: PathBuf,
436+
#[arg(long, default_value_t = 3)]
437+
stop_wait_secs: u64,
438+
#[arg(long, default_value_t = 3)]
439+
boot_wait_secs: u64,
440+
},
428441
}
429442

430443
#[derive(Debug, Subcommand)]
@@ -2230,6 +2243,44 @@ fn run_machine(command: MachineCommand, config_path: Option<PathBuf>) -> Result<
22302243
}
22312244
println!("detail: {}", result.detail);
22322245
}
2246+
MachineCommand::Restart {
2247+
machine,
2248+
runtime_root,
2249+
stop_wait_secs,
2250+
boot_wait_secs,
2251+
} => {
2252+
let ssh_context = ssh_machine_route_context(&config, &machine)?;
2253+
let result = port_runtime::restart_machine(
2254+
&config,
2255+
&runtime_root,
2256+
&machine,
2257+
Duration::from_secs(stop_wait_secs),
2258+
Duration::from_secs(boot_wait_secs),
2259+
)?;
2260+
println!("machine: {}", result.launch.machine_name);
2261+
if let Some(context) = ssh_context.as_ref() {
2262+
print_ssh_machine_route_context(context, "restart route");
2263+
}
2264+
println!("previous state: {}", result.stop.previous_state);
2265+
println!("relaunched pid: {}", result.launch.pid);
2266+
println!("runtime dir: {}", result.launch.runtime_dir.display());
2267+
println!("config path: {}", result.launch.config_path.display());
2268+
println!(
2269+
"hypervisor binary: {}",
2270+
result.launch.firecracker_binary.display()
2271+
);
2272+
println!("hypervisor log: {}", result.launch.log_path.display());
2273+
println!("console stdout: {}", result.launch.stdout_path.display());
2274+
println!("console stderr: {}", result.launch.stderr_path.display());
2275+
println!("manifest: {}", result.launch.manifest_path.display());
2276+
output_runtime_class(&result.launch.runtime_class);
2277+
if !result.launch.attached_volumes.is_empty() {
2278+
print!(
2279+
"{}",
2280+
format_attached_volumes(&result.launch.attached_volumes)
2281+
);
2282+
}
2283+
}
22332284
}
22342285

22352286
Ok(())
@@ -4446,6 +4497,52 @@ mod tests {
44464497
other => panic!("unexpected command: {other:?}"),
44474498
}
44484499

4500+
let restart = Cli::parse_from([
4501+
"port",
4502+
"machine",
4503+
"restart",
4504+
"--machine",
4505+
"demo",
4506+
"--runtime-root",
4507+
"/tmp/runtime",
4508+
"--stop-wait-secs",
4509+
"5",
4510+
"--boot-wait-secs",
4511+
"7",
4512+
]);
4513+
4514+
match restart.command {
4515+
Command::Machine(MachineCommand::Restart {
4516+
machine,
4517+
runtime_root,
4518+
stop_wait_secs,
4519+
boot_wait_secs,
4520+
}) => {
4521+
assert_eq!(machine, "demo");
4522+
assert_eq!(runtime_root, std::path::Path::new("/tmp/runtime"));
4523+
assert_eq!(stop_wait_secs, 5);
4524+
assert_eq!(boot_wait_secs, 7);
4525+
}
4526+
other => panic!("unexpected command: {other:?}"),
4527+
}
4528+
4529+
let restart_defaults = Cli::parse_from(["port", "machine", "restart", "--machine", "demo"]);
4530+
4531+
match restart_defaults.command {
4532+
Command::Machine(MachineCommand::Restart {
4533+
machine,
4534+
runtime_root,
4535+
stop_wait_secs,
4536+
boot_wait_secs,
4537+
}) => {
4538+
assert_eq!(machine, "demo");
4539+
assert_eq!(runtime_root, std::path::Path::new("runtime"));
4540+
assert_eq!(stop_wait_secs, 3);
4541+
assert_eq!(boot_wait_secs, 3);
4542+
}
4543+
other => panic!("unexpected command: {other:?}"),
4544+
}
4545+
44494546
let monitor = Cli::parse_from([
44504547
"port",
44514548
"machine",

crates/port-runtime/src/lib.rs

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6708,6 +6708,37 @@ pub fn stop_machine(
67086708
firecracker_local_stop_machine(runtime_root, machine_name, timeout)
67096709
}
67106710

6711+
#[derive(Debug, Clone)]
6712+
pub struct RestartResult {
6713+
pub stop: StopResult,
6714+
pub launch: LaunchMetadata,
6715+
}
6716+
6717+
/// Stop a Port-managed machine and immediately relaunch it under the same
6718+
/// runtime root, preserving the rootfs overlay (and therefore in-VM state).
6719+
/// The relaunch picks up any change in the machine's guest image — making
6720+
/// this the primitive that supports rolling guest-image upgrades (e.g. K3s
6721+
/// patch bumps) without losing etcd, kubelet state, or pulled container
6722+
/// images held in the overlay.
6723+
pub fn restart_machine(
6724+
config: &PortConfig,
6725+
runtime_root: &Path,
6726+
machine_name: &str,
6727+
stop_timeout: Duration,
6728+
boot_wait: Duration,
6729+
) -> Result<RestartResult> {
6730+
let stop = stop_machine(config, runtime_root, machine_name, stop_timeout)?;
6731+
let launch = launch_local_machine(
6732+
config,
6733+
&LaunchRequest {
6734+
machine_name,
6735+
runtime_root,
6736+
boot_wait,
6737+
},
6738+
)?;
6739+
Ok(RestartResult { stop, launch })
6740+
}
6741+
67116742
fn firecracker_local_stop_machine(
67126743
runtime_root: &Path,
67136744
machine_name: &str,

0 commit comments

Comments
 (0)