Skip to content

Add support for executing a binary before saving and after restoring. #11697

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions pkg/sentry/control/state.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ package control
import (
"errors"
"fmt"
"time"

"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/pgalloc"
Expand All @@ -25,6 +26,18 @@ import (
"gvisor.dev/gvisor/pkg/urpc"
)

const (
// SaveRestoreBinTimeoutKey is the key used to save the timeout for the
// save/restore binary in the metadata during save/restore.
SaveRestoreBinTimeoutKey = "save-restore-bin-timeout"

// SaveRestoreBinPathKey is the key used to save the path to the save/restore
// binary in the metadata during save/restore.
SaveRestoreBinPathKey = "save-restore-bin-path"

defaultSaveRestoreBinTimeout = 10 * time.Second
)

// ErrInvalidFiles is returned when the urpc call to Save does not include an
// appropriate file payload (e.g. there is no output file!).
var ErrInvalidFiles = errors.New("exactly one file must be provided")
Expand Down Expand Up @@ -97,5 +110,20 @@ func (s *State) Save(o *SaveOpts, _ *struct{}) error {
}
defer saveOpts.PagesFile.Close()
}
if saveRestoreBinPath, ok := o.Metadata[SaveRestoreBinPathKey]; ok {
saveRestoreBinTimeout := defaultSaveRestoreBinTimeout
if saveRestoreBinTimeoutString, ok := o.Metadata[SaveRestoreBinTimeoutKey]; ok {
var err error
saveRestoreBinTimeout, err = time.ParseDuration(saveRestoreBinTimeoutString)
if err != nil {
return fmt.Errorf("failed to parse save/restore bin timeout: %w", err)
}
}
s.Kernel.SaveRestoreBinPath = saveRestoreBinPath
s.Kernel.SaveRestoreBinTimeout = saveRestoreBinTimeout
if _, err := s.Kernel.ExecSaveRestoreBin(kernel.SaveRestoreBinSave); err != nil {
return fmt.Errorf("failed to exec save/restore binary: %w", err)
}
}
return saveOpts.Save(s.Kernel.SupervisorContext(), s.Kernel, s.Watchdog)
}
102 changes: 102 additions & 0 deletions pkg/sentry/kernel/kernel.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,16 @@ import (
// allow easy access everywhere.
var IOUringEnabled = false

// SaveRestoreBinMode is the mode for the save/restore binary.
type SaveRestoreBinMode string

const (
// SaveRestoreBinSave is the save mode for the save/restore binary.
SaveRestoreBinSave SaveRestoreBinMode = "save"
// SaveRestoreBinRestore is the restore mode for the save/restore binary.
SaveRestoreBinRestore SaveRestoreBinMode = "restore"
)

// UserCounters is a set of user counters.
//
// +stateify savable
Expand Down Expand Up @@ -370,6 +380,16 @@ type Kernel struct {

// UnixSocketOpts stores configuration options for management of unix sockets.
UnixSocketOpts transport.UnixSocketOpts

// SaveRestoreBinPath is the path to the save/restore binary. It is executed
// with the argument "save" before the kernel is saved and "restore" after
// the kernel is restored and restarted.
SaveRestoreBinPath string

// SaveRestoreBinTimeout is the timeout for the save/restore binary. If the
// binary fails to exit within this timeout the save/restore operation will
// fail.
SaveRestoreBinTimeout time.Duration
}

// InitKernelArgs holds arguments to Init.
Expand Down Expand Up @@ -2072,3 +2092,85 @@ func (k *Kernel) ContainerName(cid string) string {
defer k.extMu.Unlock()
return k.containerNames[cid]
}

// ExecSaveRestoreBin creates a new process that executes the save/restore
// binary. If the kernel has been started, the process is immediately started
// and the method waits for it to exit. Otherwise, the caller is responsible
// for starting and waiting for the process.
func (k *Kernel) ExecSaveRestoreBin(mode SaveRestoreBinMode) (*ThreadGroup, error) {
if k.SaveRestoreBinPath == "" {
return nil, nil
}
sctx := k.SupervisorContext()
leader := k.GlobalInit().Leader()
contID := leader.ContainerID()
mntns := leader.MountNamespace()
if mntns == nil || !mntns.TryIncRef() {
log.Warningf("PID %d in container %q has exited, skipping CUDA checkpoint for it", leader.ThreadGroup().ID(), contID)
return nil, nil
}
fdTable := leader.FDTable()
fdTable.IncRef()
root := mntns.Root(sctx)
cu := cleanup.Make(func() {
root.DecRef(sctx)
})
defer cu.Clean()
ctx := vfs.WithRoot(sctx, root)
cu.Add(func() {
mntns.DecRef(ctx)
})

argv := []string{k.SaveRestoreBinPath, string(mode)}
leader.FDTable().IncRef()
cu.Add(func() {
fdTable.DecRef(ctx)
})
defer leader.FDTable().DecRef(ctx)

mntns.IncRef()
args := CreateProcessArgs{
Filename: argv[0],
Argv: argv,
ContainerID: contID,
MountNamespace: mntns,
PIDNamespace: k.RootPIDNamespace(),
UTSNamespace: k.RootUTSNamespace(),
IPCNamespace: k.RootIPCNamespace(),
Credentials: leader.Credentials(),
Umask: 0022,
Limits: limits.NewLimitSet(),
FDTable: fdTable,
Origin: OriginExec,
}
tg, _, err := k.CreateProcess(args)
if err != nil {
return nil, fmt.Errorf("failed to create process: %w", err)
}
if k.started {
k.StartProcess(tg)
return nil, k.WaitForSaveRestoreBin(tg)
}
return tg, nil
}

// WaitForSaveRestoreBin waits for the save/restore binary to exit. If the
// SaveRestoreBinTimeout is exceeded, the save/restore binary is killed and
// the method returns an error.
func (k *Kernel) WaitForSaveRestoreBin(saveRestoreTg *ThreadGroup) error {
waitC := make(chan struct{})
go func() {
saveRestoreTg.WaitExited()
waitC <- struct{}{}
}()
select {
case <-waitC:
if saveRestoreTg.ExitStatus() != 0 {
return fmt.Errorf("%v exited with non-zero status %d", k.SaveRestoreBinPath, saveRestoreTg.ExitStatus())
}
case <-time.After(k.SaveRestoreBinTimeout):
saveRestoreTg.SendSignal(&linux.SignalInfo{Signo: int32(linux.SIGKILL)})
return fmt.Errorf("%s timed out after %v", k.SaveRestoreBinPath, k.SaveRestoreBinTimeout)
}
return nil
}
5 changes: 5 additions & 0 deletions runsc/boot/restore.go
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,11 @@ func (r *restorer) restore(l *Loader) error {
if err := l.k.LoadFrom(ctx, r.stateFile, r.asyncMFLoader == nil, nil, oldInetStack, time.NewCalibratedClocks(), &vfs.CompleteRestoreOptions{}, l.saveRestoreNet); err != nil {
return fmt.Errorf("failed to load kernel: %w", err)
}
// The kernel should already have been started at this point, so we can
// immediately wait for the save/restore binary to be ready.
if _, err := l.k.ExecSaveRestoreBin(kernel.SaveRestoreBinRestore); err != nil {
return fmt.Errorf("failed to wait for save/restore binary: %w", err)
}

if r.asyncMFLoader != nil {
if r.background {
Expand Down