Skip to content

Commit e3ffe7a

Browse files
JAORMXclaude
andauthored
Prune stale firmware cache versions after download (#199)
* Prune stale firmware cache versions after download Each go-microvm version bump creates a new firmware cache directory at ~/.cache/broodbox/firmware/<version>/, but only the pinned version is ever used. Old version dirs (tens of MB each) were never deleted, so the cache grew unbounded. After a successful fresh download (with the firmware lock held), scan sibling version dirs under the cache root and remove any that don't match the version just downloaded. Pruning runs only on a fresh download, not on a cache hit, and never on transient temp entries (.firmware.lock, firmware-*.tar.gz, firmware-extract-*). Failures are logged at debug level but never fail the download — the firmware is already cached. Also add a `task firmware-clean` target for manual cache removal. Fixes #12 Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> Claude-Session: https://claude.ai/code/session_01VP887qH8BMW4PMUXBuEqGc * Address review: macOS cache path, temp prefix, log level - firmware-clean Taskfile target now mirrors xdg.CacheHome resolution (XDG_CACHE_HOME → Darwin /var/home/jaosorior/Library/Caches → /var/home/jaosorior/.cache) so macOS no longer removes the wrong path while firmware accumulates. - Hoist the transient-entry "firmware-" prefix into firmwareTempPrefix, referenced by both os.CreateTemp/os.MkdirTemp patterns and the prune skip-guard, so the coupling is explicit. - Bump unexpected os.RemoveAll failures in pruneStaleFirmwareVersions to Warn (only "not exist" stays at Debug) so a persistently-failing removal doesn't silently refill the cache disk. --------- Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent f1f96a5 commit e3ffe7a

3 files changed

Lines changed: 141 additions & 2 deletions

File tree

Taskfile.yaml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,22 @@ tasks:
236236
- rm -f internal/infra/vm/runtimebin/VERSION internal/infra/vm/runtimebin/LICENSE-GPL
237237
- rm -f internal/infra/vm/runtimebin/sha256sums.txt
238238

239+
firmware-clean:
240+
desc: Remove the entire firmware download cache
241+
cmds:
242+
- rm -rf "{{.BBOX_CACHE_HOME}}/broodbox/firmware"
243+
vars:
244+
BBOX_CACHE_HOME:
245+
sh: |
246+
base="${XDG_CACHE_HOME:-}"
247+
if [ -z "$base" ]; then
248+
case "$(uname -s)" in
249+
Darwin) base="$HOME/Library/Caches" ;;
250+
*) base="$HOME/.cache" ;;
251+
esac
252+
fi
253+
echo "$base"
254+
239255
# --- OCI guest images ---
240256

241257
image-base:

internal/infra/vm/firmware.go

Lines changed: 62 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,13 @@ const (
3434
maxFirmwareExtractSize = 128 << 20
3535
// maxFirmwareEntries caps the number of tar entries to prevent inode exhaustion.
3636
maxFirmwareEntries = 1000
37+
// firmwareTempPrefix is the shared prefix of transient download entries
38+
// created directly under cacheRoot (firmware-*.tar.gz archives and
39+
// firmware-extract-* dirs). pruneStaleFirmwareVersions skips any entry
40+
// with this prefix so leftover temps from a crashed run are never
41+
// mistaken for version directories. Both os.CreateTemp and os.MkdirTemp
42+
// patterns below reference it to keep the coupling explicit.
43+
firmwareTempPrefix = "firmware-"
3744
)
3845

3946
type FirmwareResolution struct {
@@ -193,7 +200,7 @@ func downloadFirmware(ctx context.Context, cacheRoot, version, osName, arch stri
193200
}
194201
url := firmwareURL(version, osName, candidate)
195202

196-
tmpArchive, err := os.CreateTemp(cacheRoot, "firmware-*.tar.gz")
203+
tmpArchive, err := os.CreateTemp(cacheRoot, firmwareTempPrefix+"*.tar.gz")
197204
if err != nil {
198205
return FirmwareResolution{}, fmt.Errorf("create firmware temp archive: %w", err)
199206
}
@@ -220,7 +227,7 @@ func downloadFirmware(ctx context.Context, cacheRoot, version, osName, arch stri
220227
continue
221228
}
222229

223-
tmpDir, err := os.MkdirTemp(cacheRoot, "firmware-extract-")
230+
tmpDir, err := os.MkdirTemp(cacheRoot, firmwareTempPrefix+"extract-")
224231
if err != nil {
225232
cleanupArchive()
226233
return FirmwareResolution{}, fmt.Errorf("create firmware temp dir: %w", err)
@@ -276,6 +283,11 @@ func downloadFirmware(ctx context.Context, cacheRoot, version, osName, arch stri
276283
return FirmwareResolution{}, err
277284
}
278285

286+
// Fresh download succeeded — reclaim cache dirs left by older
287+
// firmware versions. Only the version pinned in go.mod is ever
288+
// used, so previous version dirs (tens of MB each) are dead weight.
289+
pruneStaleFirmwareVersions(ctx, cacheRoot, version)
290+
279291
slog.DebugContext(ctx, "firmware downloaded", "dir", filepath.Dir(finalFwPath), "version", version, "arch", candidate)
280292
return FirmwareResolution{
281293
Dir: filepath.Dir(finalFwPath),
@@ -294,6 +306,54 @@ func downloadFirmware(ctx context.Context, cacheRoot, version, osName, arch stri
294306
return FirmwareResolution{}, lastErr
295307
}
296308

309+
// pruneStaleFirmwareVersions removes firmware cache version directories under
310+
// cacheRoot that don't match keepVersion. Each go-microvm version bump creates
311+
// a new <cacheRoot>/<version>/ directory, but only the pinned version is ever
312+
// used, so older ones accumulate unbounded.
313+
//
314+
// It must be called only after a successful fresh download, with the firmware
315+
// lock held (no concurrent writers). Failures are logged and never propagated:
316+
// the firmware is already cached successfully, so pruning is strictly
317+
// best-effort cleanup. The lock file and transient temp entries
318+
// (firmware-*.tar.gz archives, firmware-extract-* dirs) are left untouched.
319+
func pruneStaleFirmwareVersions(ctx context.Context, cacheRoot, keepVersion string) {
320+
entries, err := os.ReadDir(cacheRoot)
321+
if err != nil {
322+
slog.DebugContext(ctx, "firmware prune: cannot scan cache root", "root", cacheRoot, "error", err)
323+
return
324+
}
325+
326+
for _, entry := range entries {
327+
// Only version directories are pruned; this skips .firmware.lock
328+
// and any leftover firmware-*.tar.gz temp archives (plain files).
329+
if !entry.IsDir() {
330+
continue
331+
}
332+
name := entry.Name()
333+
if name == keepVersion {
334+
continue
335+
}
336+
// Skip transient temp dirs created in cacheRoot during download.
337+
if strings.HasPrefix(name, firmwareTempPrefix) {
338+
continue
339+
}
340+
341+
stalePath := filepath.Join(cacheRoot, name)
342+
if err := os.RemoveAll(stalePath); err != nil {
343+
// "not exist" is legitimately quiet; unexpected removal failures
344+
// (permissions drift, read-only mount) are surfaced at Warn so
345+
// they don't silently fill the cache disk over many version bumps.
346+
if !errors.Is(err, os.ErrNotExist) {
347+
slog.WarnContext(ctx, "firmware prune: failed to remove stale version",
348+
"path", stalePath, "error", err)
349+
}
350+
continue
351+
}
352+
slog.DebugContext(ctx, "firmware prune: removed stale version",
353+
"path", stalePath, "version", name)
354+
}
355+
}
356+
297357
func findSystemFirmware(version, osName, arch string) (FirmwareResolution, error) {
298358
path, err := findFirmwareInDirs(systemFirmwareDirs(), firmwareLibNames(osName))
299359
if err != nil {

internal/infra/vm/firmware_test.go

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ package vm
66
import (
77
"archive/tar"
88
"compress/gzip"
9+
"context"
910
"crypto/sha256"
1011
"encoding/hex"
1112
"encoding/json"
@@ -897,3 +898,65 @@ func TestDownloadToFile_HTTPError(t *testing.T) {
897898
require.Error(t, err)
898899
assert.Contains(t, err.Error(), "unexpected status")
899900
}
901+
902+
func TestPruneStaleFirmwareVersions(t *testing.T) {
903+
t.Parallel()
904+
905+
cacheRoot := t.TempDir()
906+
keep := "v0.0.8"
907+
908+
// Two stale version dirs, the current one, plus entries that must be
909+
// preserved: the lock file and transient download temp entries.
910+
staleA := filepath.Join(cacheRoot, "v0.0.6", "linux-amd64")
911+
staleB := filepath.Join(cacheRoot, "v0.0.7", "linux-amd64")
912+
current := filepath.Join(cacheRoot, keep, "linux-amd64")
913+
for _, d := range []string{staleA, staleB, current} {
914+
require.NoError(t, os.MkdirAll(d, 0o755))
915+
require.NoError(t, os.WriteFile(filepath.Join(d, "firmware.json"), []byte("{}"), 0o600))
916+
}
917+
lockPath := filepath.Join(cacheRoot, ".firmware.lock")
918+
require.NoError(t, os.WriteFile(lockPath, nil, 0o600))
919+
tmpArchive := filepath.Join(cacheRoot, "firmware-123.tar.gz")
920+
require.NoError(t, os.WriteFile(tmpArchive, []byte("x"), 0o600))
921+
tmpExtract := filepath.Join(cacheRoot, "firmware-extract-456")
922+
require.NoError(t, os.MkdirAll(tmpExtract, 0o755))
923+
924+
pruneStaleFirmwareVersions(context.Background(), cacheRoot, keep)
925+
926+
// Stale version dirs removed.
927+
for _, d := range []string{filepath.Join(cacheRoot, "v0.0.6"), filepath.Join(cacheRoot, "v0.0.7")} {
928+
_, err := os.Stat(d)
929+
assert.True(t, os.IsNotExist(err), "stale version %s should be removed", filepath.Base(d))
930+
}
931+
// Current version and non-version entries preserved.
932+
_, err := os.Stat(current)
933+
assert.NoError(t, err, "current version dir must be preserved")
934+
_, err = os.Stat(lockPath)
935+
assert.NoError(t, err, "lock file must be preserved")
936+
_, err = os.Stat(tmpArchive)
937+
assert.NoError(t, err, "transient temp archive must be preserved")
938+
_, err = os.Stat(tmpExtract)
939+
assert.NoError(t, err, "transient extract dir must be preserved")
940+
}
941+
942+
func TestPruneStaleFirmwareVersions_NoStaleVersions(t *testing.T) {
943+
t.Parallel()
944+
945+
cacheRoot := t.TempDir()
946+
keep := "v0.0.8"
947+
current := filepath.Join(cacheRoot, keep, "linux-amd64")
948+
require.NoError(t, os.MkdirAll(current, 0o755))
949+
950+
// Must be a no-op when only the current version is present.
951+
pruneStaleFirmwareVersions(context.Background(), cacheRoot, keep)
952+
953+
_, err := os.Stat(current)
954+
assert.NoError(t, err, "current version dir must be preserved")
955+
}
956+
957+
func TestPruneStaleFirmwareVersions_MissingCacheRoot(t *testing.T) {
958+
t.Parallel()
959+
960+
// Must not panic when the cache root does not exist.
961+
pruneStaleFirmwareVersions(context.Background(), filepath.Join(t.TempDir(), "nope"), "v0.0.8")
962+
}

0 commit comments

Comments
 (0)