Skip to content

Commit 6c6bb5b

Browse files
committed
guestagent,hostagent,limactl: add memory ballooning and auto-pause
Add adaptive memory ballooning and auto-pause for macOS VZ driver VMs. The balloon controller dynamically adjusts guest memory between a configured minimum and the VM's maximum based on guest PSI pressure metrics, host memory pressure (Mach vm_stat), and workload signals. Key components: Guest agent memory metrics: Collect /proc/meminfo, /proc/pressure/ memory, /proc/vmstat, and Docker container stats via gRPC streaming. Export PSI some/full avg10 and avg60, AnonPages, swap usage, page fault rates, and container count. Balloon controller state machine: Four states (Bootstrap → Learning Descend → Steady → OOM Recovery) with PSI-driven grow/shrink decisions. Configurable thresholds, cooldowns, settle windows, and step sizes. Learned floor tracking with OOM circuit breaker (3 OOMs in 10min locks at max). Host pressure monitor (macOS): Poll Mach vm_stat for free/inactive/ speculative pages. Classify Normal/Warning/Critical with hysteresis. Critical triggers immediate grow; Warning halves shrink steps. Auto-pause manager: Pause the VM after configurable idle timeout using multi-signal idle detection (SSH connections, CPU, disk I/O, socket proxy activity). Resume on incoming connections with socket proxy that queues traffic during pause. Manual pause/resume via limactl commands. VZ driver integration: Implement Ballooner and Pausable interfaces. SetMemorySize via Virtualization.framework, pause/resume via VZ API. CLI additions: - limactl pause/resume: manual VM suspend and resume - limactl ls: show physical memory footprint column Template: Alpine Docker VZ template with memory optimization (swap, zswap, cgroup tuning, PSI boot script, compaction cron). Signed-off-by: Jason W. Ehrlich <jwehrlich@outlook.com>
1 parent 508d1d3 commit 6c6bb5b

53 files changed

Lines changed: 7324 additions & 99 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

cmd/lima-guestagent/daemon_linux.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import (
1717

1818
"github.com/lima-vm/lima/v2/pkg/guestagent"
1919
"github.com/lima-vm/lima/v2/pkg/guestagent/api/server"
20+
"github.com/lima-vm/lima/v2/pkg/guestagent/metrics"
2021
"github.com/lima-vm/lima/v2/pkg/guestagent/serialport"
2122
"github.com/lima-vm/lima/v2/pkg/guestagent/ticker"
2223
"github.com/lima-vm/lima/v2/pkg/portfwdserver"
@@ -145,5 +146,8 @@ func daemonAction(cmd *cobra.Command, _ []string) error {
145146
logrus.Infof("serving the guest agent on %q", socket)
146147
}
147148
defer logrus.Debug("exiting lima-guestagent daemon")
148-
return server.StartServer(ctx, l, &server.GuestServer{Agent: agent, TunnelS: portfwdserver.NewTunnelServer()})
149+
dockerSocket := "/var/run/docker.sock"
150+
collector := metrics.NewCollector(&dockerSocket)
151+
defer collector.Close()
152+
return server.StartServer(ctx, l, &server.GuestServer{Agent: agent, TunnelS: portfwdserver.NewTunnelServer(), Collector: collector})
149153
}

cmd/limactl/disk.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -345,7 +345,7 @@ func diskUnlockAction(cmd *cobra.Command, args []string) error {
345345
diskName, disk.Instance, inst.Errors)
346346
continue
347347
}
348-
if inst.Status == limatype.StatusRunning {
348+
if inst.Status == limatype.StatusRunning || inst.Status == limatype.StatusPaused {
349349
logrus.Warnf("Cannot unlock disk %q used by running instance %q", diskName, disk.Instance)
350350
continue
351351
}
@@ -403,7 +403,7 @@ func diskResizeAction(cmd *cobra.Command, args []string) error {
403403
if disk.Instance != "" {
404404
inst, err := store.Inspect(ctx, disk.Instance)
405405
if err == nil {
406-
if inst.Status == limatype.StatusRunning {
406+
if inst.Status == limatype.StatusRunning || inst.Status == limatype.StatusPaused {
407407
return fmt.Errorf("cannot resize disk %q used by running instance %q. Please stop the VM instance", diskName, disk.Instance)
408408
}
409409
}

cmd/limactl/edit.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ func editAction(cmd *cobra.Command, args []string) error {
6464
}
6565
return err
6666
}
67-
if inst.Status == limatype.StatusRunning {
67+
if inst.Status == limatype.StatusRunning || inst.Status == limatype.StatusPaused {
6868
return errors.New("cannot edit a running instance")
6969
}
7070
filePath = filepath.Join(inst.Dir, filenames.LimaYAML)

cmd/limactl/guest-install.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,8 @@ func guestInstallAction(cmd *cobra.Command, args []string) error {
6464
if err != nil {
6565
return err
6666
}
67-
if inst.Status == limatype.StatusStopped {
68-
return fmt.Errorf("instance %q is stopped, run `limactl start %s` to start the instance", instName, instName)
67+
if inst.Status == limatype.StatusStopped || inst.Status == limatype.StatusPaused {
68+
return fmt.Errorf("instance %q is not running (status: %s), run `limactl start %s` to start the instance", instName, inst.Status, instName)
6969
}
7070

7171
ctx := cmd.Context()

cmd/limactl/main.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,8 @@ func newApp() *cobra.Command {
181181
newCreateCommand(),
182182
newStartCommand(),
183183
newStopCommand(),
184+
newPauseCommand(),
185+
newResumeCommand(),
184186
newShellCommand(),
185187
newCopyCommand(),
186188
newListCommand(),

cmd/limactl/pause.go

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
// SPDX-FileCopyrightText: Copyright The Lima Authors
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
package main
5+
6+
import (
7+
"fmt"
8+
"path/filepath"
9+
10+
"github.com/spf13/cobra"
11+
12+
hostagentclient "github.com/lima-vm/lima/v2/pkg/hostagent/api/client"
13+
"github.com/lima-vm/lima/v2/pkg/limatype"
14+
"github.com/lima-vm/lima/v2/pkg/limatype/filenames"
15+
"github.com/lima-vm/lima/v2/pkg/store"
16+
)
17+
18+
func newPauseCommand() *cobra.Command {
19+
return &cobra.Command{
20+
Use: "pause INSTANCE",
21+
Short: "Pause a running instance",
22+
Long: "Pause a running instance immediately. Requires auto-pause to be enabled in the instance configuration. The instance can be resumed with 'limactl resume' or automatically when a client connects to a forwarded socket.",
23+
Args: WrapArgsError(cobra.MaximumNArgs(1)),
24+
RunE: pauseAction,
25+
ValidArgsFunction: pauseBashComplete,
26+
GroupID: basicCommand,
27+
}
28+
}
29+
30+
func pauseAction(cmd *cobra.Command, args []string) error {
31+
ctx := cmd.Context()
32+
instName := DefaultInstanceName
33+
if len(args) > 0 {
34+
instName = args[0]
35+
}
36+
37+
inst, err := store.Inspect(ctx, instName)
38+
if err != nil {
39+
return err
40+
}
41+
42+
if inst.Status == limatype.StatusPaused {
43+
return fmt.Errorf("instance %q is already paused", instName)
44+
}
45+
if inst.Status != limatype.StatusRunning {
46+
return fmt.Errorf("instance %q is not running (status: %s)", instName, inst.Status)
47+
}
48+
49+
haSock := filepath.Join(inst.Dir, filenames.HostAgentSock)
50+
haClient, err := hostagentclient.NewHostAgentClient(haSock)
51+
if err != nil {
52+
return fmt.Errorf("failed to connect to host agent: %w", err)
53+
}
54+
55+
if err := haClient.Pause(ctx); err != nil {
56+
return fmt.Errorf("failed to pause instance %q: %w", instName, err)
57+
}
58+
59+
fmt.Fprintf(cmd.OutOrStdout(), "Paused instance %q\n", instName)
60+
return nil
61+
}
62+
63+
func pauseBashComplete(cmd *cobra.Command, _ []string, _ string) ([]string, cobra.ShellCompDirective) {
64+
return bashCompleteInstanceNames(cmd)
65+
}

cmd/limactl/resume.go

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
// SPDX-FileCopyrightText: Copyright The Lima Authors
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
package main
5+
6+
import (
7+
"fmt"
8+
"path/filepath"
9+
10+
"github.com/spf13/cobra"
11+
12+
hostagentclient "github.com/lima-vm/lima/v2/pkg/hostagent/api/client"
13+
"github.com/lima-vm/lima/v2/pkg/limatype"
14+
"github.com/lima-vm/lima/v2/pkg/limatype/filenames"
15+
"github.com/lima-vm/lima/v2/pkg/store"
16+
)
17+
18+
func newResumeCommand() *cobra.Command {
19+
return &cobra.Command{
20+
Use: "resume INSTANCE",
21+
Short: "Resume a paused instance",
22+
Long: "Resume a paused instance. If the instance is already running, this is a no-op.",
23+
Args: WrapArgsError(cobra.MaximumNArgs(1)),
24+
RunE: resumeAction,
25+
ValidArgsFunction: resumeBashComplete,
26+
GroupID: basicCommand,
27+
}
28+
}
29+
30+
func resumeAction(cmd *cobra.Command, args []string) error {
31+
ctx := cmd.Context()
32+
instName := DefaultInstanceName
33+
if len(args) > 0 {
34+
instName = args[0]
35+
}
36+
37+
inst, err := store.Inspect(ctx, instName)
38+
if err != nil {
39+
return err
40+
}
41+
42+
if inst.Status == limatype.StatusRunning {
43+
fmt.Fprintf(cmd.OutOrStdout(), "Instance %q is already running\n", instName)
44+
return nil
45+
}
46+
if inst.Status != limatype.StatusPaused {
47+
return fmt.Errorf("instance %q is not paused (status: %s)", instName, inst.Status)
48+
}
49+
50+
haSock := filepath.Join(inst.Dir, filenames.HostAgentSock)
51+
haClient, err := hostagentclient.NewHostAgentClient(haSock)
52+
if err != nil {
53+
return fmt.Errorf("failed to connect to host agent: %w", err)
54+
}
55+
56+
triggered, err := haClient.Resume(ctx)
57+
if err != nil {
58+
return fmt.Errorf("failed to resume instance %q: %w", instName, err)
59+
}
60+
if !triggered {
61+
fmt.Fprintf(cmd.OutOrStdout(), "Instance %q is already running\n", instName)
62+
return nil
63+
}
64+
65+
fmt.Fprintf(cmd.OutOrStdout(), "Resumed instance %q\n", instName)
66+
return nil
67+
}
68+
69+
func resumeBashComplete(cmd *cobra.Command, _ []string, _ string) ([]string, cobra.ShellCompDirective) {
70+
return bashCompleteInstanceNames(cmd)
71+
}

cmd/limactl/shell.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ func shellAction(cmd *cobra.Command, args []string) error {
135135
if inst.Config == nil {
136136
return fmt.Errorf("instance %q has no configuration", instName)
137137
}
138-
if inst.Status == limatype.StatusStopped {
138+
if inst.Status == limatype.StatusStopped || inst.Status == limatype.StatusPaused {
139139
startNow, err := flags.GetBool("start")
140140
if err != nil {
141141
return err
@@ -149,7 +149,7 @@ func shellAction(cmd *cobra.Command, args []string) error {
149149
}
150150

151151
if !startNow {
152-
return fmt.Errorf("instance %q is stopped, run `limactl start %s` to start the instance", instName, instName)
152+
return fmt.Errorf("instance %q is not running (status: %s), run `limactl start %s` to start the instance", instName, inst.Status, instName)
153153
}
154154

155155
// Network reconciliation will be performed by the process launched by the autostart manager

cmd/limactl/start.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -581,6 +581,9 @@ func startAction(cmd *cobra.Command, args []string) error {
581581
inst.Name, instance.LimactlShellCmd(inst.Name))
582582
// Not an error
583583
return nil
584+
case limatype.StatusPaused:
585+
logrus.Infof("The instance %q is paused and will be resumed by the host agent on activity.", inst.Name)
586+
return nil
584587
case limatype.StatusStopped:
585588
// NOP
586589
default:

cmd/limactl/tunnel.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,8 +73,8 @@ func tunnelAction(cmd *cobra.Command, args []string) error {
7373
}
7474
return err
7575
}
76-
if inst.Status == limatype.StatusStopped {
77-
return fmt.Errorf("instance %q is stopped, run `limactl start %s` to start the instance", instName, instName)
76+
if inst.Status == limatype.StatusStopped || inst.Status == limatype.StatusPaused {
77+
return fmt.Errorf("instance %q is not running (status: %s), run `limactl start %s` to start the instance", instName, inst.Status, instName)
7878
}
7979

8080
if port == 0 {

0 commit comments

Comments
 (0)