Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion pkg/container/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ package container

import (
"crypto/ed25519"
"os"
"time"

apko_types "chainguard.dev/apko/pkg/build/types"
Expand Down Expand Up @@ -73,7 +74,7 @@ type Config struct {
SSHBuildClient *ssh.Client // SSH client for the build environment, may not have privileges
SSHControlBuildClient *ssh.Client // SSH client for control operations in the build environment, has privileges
SSHControlClient *ssh.Client // SSH client for unrestricted control environment, has privileges
QemuPID int
QemuProcess *os.Process // QEMU process handle (not just PID, to avoid PID reuse issues)
RunAsGID string

// Virtiofs-related fields for cache directory
Expand Down
56 changes: 49 additions & 7 deletions pkg/container/qemu_runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
_ "embed"
"encoding/base64"
"encoding/pem"
"errors"
"fmt"
"io"
"io/fs"
Expand Down Expand Up @@ -394,6 +395,24 @@ func (bw *qemu) StartPod(ctx context.Context, cfg *Config) error {
return createMicroVM(ctx, cfg)
}

// waitForProcessExit polls until a process exits or the timeout is reached.
// Returns true if the process exited, false if timeout exceeded.
func waitForProcessExit(proc *os.Process, timeout time.Duration) bool {
if proc == nil {
return true
}
deadline := time.Now().Add(timeout)
for time.Now().Before(deadline) {
// Signal 0 tests if the process exists without sending a signal
err := proc.Signal(syscall.Signal(0))
if err != nil {
return true // process is gone
}
time.Sleep(100 * time.Millisecond)
}
return false
}

// TerminatePod terminates a pod if necessary. For Qemu runners, shuts
// down the guest VM.
func (bw *qemu) TerminatePod(ctx context.Context, cfg *Config) error {
Expand All @@ -405,7 +424,8 @@ func (bw *qemu) TerminatePod(ctx context.Context, cfg *Config) error {
defer secureDelete(ctx, cfg.InitramfsPath)
defer stopVirtiofsd(ctx, cfg)

clog.FromContext(ctx).Info("qemu: sending shutdown signal")
log := clog.FromContext(ctx)
log.Info("qemu: sending shutdown signal")
err := sendSSHCommand(ctx,
cfg.SSHControlClient,
cfg,
Expand All @@ -416,12 +436,29 @@ func (bw *qemu) TerminatePod(ctx context.Context, cfg *Config) error {
[]string{"sh", "-c", "echo s > /proc/sysrq-trigger && echo o > /proc/sysrq-trigger&"},
)
if err != nil {
clog.FromContext(ctx).Warnf("failed to gracefully shutdown vm, killing it: %v", err)
// in case of graceful shutdown failure, axe it with pkill
return syscall.Kill(cfg.QemuPID, syscall.SIGKILL)
// ExitMissingError is expected when the VM powers off abruptly before
// the SSH channel can return a clean exit status. Don't log this as an error.
var missingErr *ssh.ExitMissingError
if !errors.As(err, &missingErr) {
log.Warnf("qemu: graceful shutdown command failed: %v", err)
}
}

return nil
// Wait up to 5 seconds for the VM process to exit
if waitForProcessExit(cfg.QemuProcess, 5*time.Second) {
return nil
}

// VM didn't exit, try SIGTERM and wait another 5 seconds
log.Warn("qemu: VM did not exit after shutdown signal, sending SIGTERM")
_ = cfg.QemuProcess.Signal(syscall.SIGTERM)
if waitForProcessExit(cfg.QemuProcess, 5*time.Second) {
return nil
}

// VM still didn't exit, send SIGKILL
log.Warn("qemu: VM did not exit after SIGTERM, sending SIGKILL")
return cfg.QemuProcess.Signal(syscall.SIGKILL)
}

// WorkspaceTar implements Runner
Expand Down Expand Up @@ -1118,7 +1155,7 @@ func createMicroVM(ctx context.Context, cfg *Config) error {
// don't fail the build because of this.
}

cfg.QemuPID = qemuCmd.Process.Pid
cfg.QemuProcess = qemuCmd.Process
return nil
}

Expand Down Expand Up @@ -1599,7 +1636,12 @@ func sendSSHCommand(ctx context.Context, client *ssh.Client,
clog.FromContext(ctx).Debugf("running (%d) %v", len(command), cmd)
err = session.Run(cmd)
if err != nil {
clog.FromContext(ctx).Errorf("Failed to run command %q: %v", cmd, err)
// ExitMissingError is expected when the SSH channel closes abruptly
// (e.g., when the VM powers off). Don't log it as an error.
var missingErr *ssh.ExitMissingError
if !errors.As(err, &missingErr) {
clog.FromContext(ctx).Errorf("Failed to run command %q: %v", cmd, err)
}
return err
}

Expand Down
Loading