Skip to content

Commit 945b99c

Browse files
committed
fix(spec-044): autostart nil result not cached (gemini P1)
Gemini cross-review found a tray-core boot race: if the core process starts milliseconds before the tray writes tray-autostart.json, the first Read() sees fs.ErrNotExist and (before this fix) marked cachedOnce=true, locking autostart_enabled to null for the whole 1h TTL window — even after the tray subsequently wrote `true`. Fix: on ErrNotExist and other transient I/O errors, do NOT mark cachedOnce. Return nil this call but allow the next Read() to re-probe. Successful reads (including malformed-JSON "known unknowable") still cache for the full TTL. Adds TestReadAutostart_BootRaceDoesNotPoisonCache covering the race scenario: first read on absent file -> nil; tray writes sidecar; second read within TTL -> picks up the new value. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 1f8dcd3 commit 945b99c

2 files changed

Lines changed: 38 additions & 5 deletions

File tree

internal/telemetry/autostart.go

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -102,15 +102,18 @@ func (r *AutostartReader) Read() *bool {
102102

103103
data, err := os.ReadFile(r.Path)
104104
if err != nil {
105-
// File absent → tray not running (or never launched). Cache nil.
105+
// File absent → tray not running (or hasn't written the sidecar yet).
106+
// Tray-core boot race: if core starts slightly before the tray, the
107+
// first Read() sees ErrNotExist and would poison the 1h TTL with nil,
108+
// causing AutostartEnabled to stay null for the whole first hour even
109+
// after the tray publishes `true`. Do NOT mark cachedOnce so the next
110+
// Read() re-probes. Gemini P1 cross-review.
106111
if errors.Is(err, fs.ErrNotExist) {
107112
r.cached = nil
108-
r.cachedAt = now
109-
r.cachedOnce = true
110113
return nil
111114
}
112-
// Other errors (permissions, etc.): cache nil but don't poison the
113-
// long TTL — use a short retry by not marking cachedOnce.
115+
// Other errors (permissions, etc.): same short-retry behaviour — do
116+
// not mark cachedOnce so the next heartbeat tries again.
114117
r.cached = nil
115118
return nil
116119
}

internal/telemetry/autostart_test.go

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,36 @@ func TestReadAutostart_CachedOneHour(t *testing.T) {
114114
}
115115
}
116116

117+
// TestReadAutostart_BootRaceDoesNotPoisonCache: gemini P1 — if the core's
118+
// first Read() happens BEFORE the tray writes the sidecar, a missing file
119+
// must NOT poison the 1h TTL. The next Read() should re-probe and see the
120+
// value the tray has since written.
121+
func TestReadAutostart_BootRaceDoesNotPoisonCache(t *testing.T) {
122+
dir := t.TempDir()
123+
path := filepath.Join(dir, "tray-autostart.json")
124+
// File deliberately absent at first read — simulates core-starts-before-tray.
125+
126+
r := &AutostartReader{Path: path}
127+
if got := r.Read(); got != nil {
128+
t.Fatalf("first Read() = %v, want nil (file absent)", *got)
129+
}
130+
131+
// Tray now writes the sidecar (the race has closed).
132+
if err := os.WriteFile(path, []byte(`{"enabled": true}`), 0o644); err != nil {
133+
t.Fatal(err)
134+
}
135+
136+
// Second Read() within the 1h TTL must re-probe and see the new value.
137+
// Before the fix, this returned nil (poisoned cache) for up to an hour.
138+
got := r.Read()
139+
if got == nil {
140+
t.Fatalf("second Read() = nil, want *true (tray wrote sidecar after first read)")
141+
}
142+
if *got != true {
143+
t.Fatalf("second Read() = %v, want true", *got)
144+
}
145+
}
146+
117147
// TestDefaultAutostartReader_PathResolution: the default constructor returns
118148
// a reader that does not panic on a missing sidecar.
119149
func TestDefaultAutostartReader_PathResolution(t *testing.T) {

0 commit comments

Comments
 (0)