Skip to content

Commit 40bfad5

Browse files
Merge pull request #702 from gastownhall/hotfix/v0.14.0-copyfiles-fingerprint
hotfix: backport #687 and #695 to v0.14.0
2 parents 1b74f73 + 8d59e43 commit 40bfad5

File tree

9 files changed

+329
-9
lines changed

9 files changed

+329
-9
lines changed

cmd/gc/cmd_start.go

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -734,6 +734,9 @@ func stageHookFiles(copyFiles []runtime.CopyEntry, cityPath, workDir string) []r
734734

735735
// workDir-based hooks: gemini, codex, opencode, copilot, pi, omp.
736736
// Use path.Join for RelDst (container-target, always forward slashes).
737+
// These paths live inside the agent worktree and are populated by
738+
// pre_start staging (e.g. worktree-setup.sh --sync), so hashing their
739+
// contents would drift across reconciler cycles. See issue #682.
737740
for _, rel := range []string{
738741
path.Join(".gemini", "settings.json"),
739742
path.Join(".codex", "hooks.json"),
@@ -747,7 +750,7 @@ func stageHookFiles(copyFiles []runtime.CopyEntry, cityPath, workDir string) []r
747750
if _, err := os.Stat(abs); err == nil {
748751
copyFiles = append(copyFiles, runtime.CopyEntry{
749752
Src: abs, RelDst: path.Join(relWorkDir, rel),
750-
Probed: true, ContentHash: runtime.HashPathContent(abs),
753+
Probed: true, SkipFingerprint: true,
751754
})
752755
}
753756
}
@@ -756,7 +759,7 @@ func stageHookFiles(copyFiles []runtime.CopyEntry, cityPath, workDir string) []r
756759
if info, err := os.Stat(skillsDir); err == nil && info.IsDir() {
757760
copyFiles = append(copyFiles, runtime.CopyEntry{
758761
Src: skillsDir, RelDst: path.Join(relWorkDir, ".claude", "skills"),
759-
Probed: true, ContentHash: runtime.HashPathContent(skillsDir),
762+
Probed: true, SkipFingerprint: true,
760763
})
761764
}
762765
// cityDir-based hooks: claude (.gc/settings.json).

cmd/gc/cmd_start_test.go

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -284,13 +284,15 @@ func TestStageHookFilesIncludesCodexAndCopilotExecutableHooks(t *testing.T) {
284284
t.Errorf("stageHookFiles() missing %q (got %v)", want, rels)
285285
}
286286
}
287-
// All filesystem-probed entries must be marked Probed with a ContentHash.
288287
for _, entry := range got {
289288
if !entry.Probed {
290289
t.Errorf("stageHookFiles() entry %q not marked Probed", entry.RelDst)
291290
}
292-
if entry.ContentHash == "" {
293-
t.Errorf("stageHookFiles() entry %q has empty ContentHash", entry.RelDst)
291+
if !entry.SkipFingerprint {
292+
t.Errorf("stageHookFiles() workDir entry %q must have SkipFingerprint=true (#682)", entry.RelDst)
293+
}
294+
if entry.ContentHash != "" {
295+
t.Errorf("stageHookFiles() workDir entry %q should skip HashPathContent, got %q (#682)", entry.RelDst, entry.ContentHash)
294296
}
295297
}
296298
}
@@ -319,6 +321,10 @@ func TestStageHookFilesIncludesCanonicalClaudeHook(t *testing.T) {
319321
if entry.ContentHash == "" {
320322
t.Fatal("stageHookFiles() .gc/settings.json has empty ContentHash")
321323
}
324+
// City-level settings MUST still drive config drift — never skip.
325+
if entry.SkipFingerprint {
326+
t.Fatal("stageHookFiles() .gc/settings.json must not have SkipFingerprint (cityDir fallback must drive drain on real edits)")
327+
}
322328
return
323329
}
324330
}
@@ -348,6 +354,10 @@ func TestStageHookFilesFallsBackToLegacyClaudeHook(t *testing.T) {
348354
if entry.ContentHash == "" {
349355
t.Fatal("stageHookFiles() hooks/claude.json has empty ContentHash")
350356
}
357+
// City-level legacy fallback must still drive drain.
358+
if entry.SkipFingerprint {
359+
t.Fatal("stageHookFiles() hooks/claude.json must not have SkipFingerprint (cityDir fallback must drive drain)")
360+
}
351361
return
352362
}
353363
}

cmd/gc/config_hash.go

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,8 +102,15 @@ func canonicalConfigHash(params TemplateParams, overlay map[string]string) strin
102102
h.Write([]byte(params.Hints.OverlayDir)) //nolint:errcheck
103103
h.Write([]byte{0}) //nolint:errcheck
104104

105-
// CopyFiles.
105+
// CopyFiles. Mirrors runtime.hashCoreFields only in excluding probed
106+
// workDir entries marked SkipFingerprint, so this hash stays stable if
107+
// that dormant path is ever wired into drift detection (#682). Unlike
108+
// runtime hashing, this canonical hash still fingerprints CopyFiles by
109+
// Src and RelDst here; it does not use ContentHash or a sentinel.
106110
for _, cf := range params.Hints.CopyFiles {
111+
if cf.Probed && cf.SkipFingerprint {
112+
continue
113+
}
107114
h.Write([]byte(cf.Src)) //nolint:errcheck
108115
h.Write([]byte{0}) //nolint:errcheck
109116
h.Write([]byte(cf.RelDst)) //nolint:errcheck

cmd/gc/providers.go

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,8 +245,17 @@ func displayProviderName(name string) string {
245245
// Priority: GC_BEADS env var → city.toml [beads].provider → "bd" default.
246246
// This is the unmodified config value; use beadsProvider() for lifecycle
247247
// routing which remaps "bd" → exec:.
248+
//
249+
// If the ambient GC_BEADS points at the city-managed gc-beads-bd lifecycle
250+
// wrapper, we normalize it back to "bd". The wrapper exits 2 for data ops
251+
// (see #647), so inheriting it from a contaminated parent would reintroduce
252+
// the crash in nested agent sessions. Genuine user exec: overrides are
253+
// preserved — only the well-known lifecycle wrapper path is stripped.
248254
func rawBeadsProvider(cityPath string) string {
249255
if v := os.Getenv("GC_BEADS"); v != "" {
256+
if isLifecycleWrapperPath(v) {
257+
return "bd"
258+
}
250259
return v
251260
}
252261
// Try to read provider from city.toml.
@@ -257,10 +266,26 @@ func rawBeadsProvider(cityPath string) string {
257266
return "bd"
258267
}
259268

269+
// isLifecycleWrapperPath reports whether v is the city-managed gc-beads-bd
270+
// lifecycle wrapper (i.e., `exec:<anything>/.gc/system/bin/gc-beads-bd`).
271+
func isLifecycleWrapperPath(v string) bool {
272+
if !strings.HasPrefix(v, "exec:") {
273+
return false
274+
}
275+
return strings.HasSuffix(v, string(filepath.Separator)+citylayout.SystemBinRoot+string(filepath.Separator)+"gc-beads-bd")
276+
}
277+
260278
// beadsProvider returns the bead store provider name for lifecycle operations.
261279
// Maps "bd" → "exec:<cityPath>/.gc/system/bin/gc-beads-bd" so all lifecycle operations
262280
// route through the exec: protocol. Other providers pass through unchanged.
263281
//
282+
// This is for lifecycle operations ONLY (start/stop/health/ensure-ready/init).
283+
// gc-beads-bd exits 2 for data operations (get/list/create/update/close); it
284+
// is not a full exec-beads protocol implementation. Data-path callers — in
285+
// particular agent-session environments (see template_resolve.go) — must use
286+
// rawBeadsProvider() so they route through BdStore directly. See #647 for the
287+
// crash that surfaced when this invariant was violated.
288+
//
264289
// Related env vars:
265290
// - GC_DOLT=skip — the gc-beads-bd script checks this and exits 2 for all
266291
// operations. Used by testscript and integration tests.

cmd/gc/template_resolve.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,9 @@ func resolveTemplate(p *agentBuildParams, cfgAgent *config.Agent, qualifiedName
212212
for key, value := range citylayout.CityRuntimeEnvMap(p.cityPath) {
213213
agentEnv[key] = value
214214
}
215-
agentEnv["GC_BEADS"] = beadsProvider(p.cityPath)
215+
// Agent-session data ops must bypass the lifecycle wrapper. See
216+
// beadsProvider() docs and #647.
217+
agentEnv["GC_BEADS"] = rawBeadsProvider(p.cityPath)
216218
if exe, err := os.Executable(); err == nil && exe != "" {
217219
agentEnv["GC_BIN"] = exe
218220
}

cmd/gc/template_resolve_workdir_test.go

Lines changed: 116 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,8 @@ func TestResolveTemplateRigScopedEnvCarriesRigRoots(t *testing.T) {
214214
}
215215

216216
func TestResolveTemplateUsesCityManagedDoltPort(t *testing.T) {
217+
// Hermetic: ambient GC_BEADS would poison the "bd" assertion below.
218+
t.Setenv("GC_BEADS", "")
217219
cityPath := t.TempDir()
218220
writeTemplateResolveCityConfig(t, cityPath, "")
219221
stateDir := filepath.Join(cityPath, ".gc", "runtime", "packs", "dolt")
@@ -269,8 +271,120 @@ func TestResolveTemplateUsesCityManagedDoltPort(t *testing.T) {
269271
if got := tp.Env["GC_BIN"]; got == "" {
270272
t.Fatalf("GC_BIN = %q, want non-empty", got)
271273
}
272-
if got := tp.Env["GC_BEADS"]; got != "exec:"+filepath.Join(cityPath, ".gc", "system", "bin", "gc-beads-bd") {
273-
t.Fatalf("GC_BEADS = %q, want exec gc-beads-bd provider", got)
274+
// Agent sessions route data ops to raw bd, not the lifecycle wrapper.
275+
// See #647.
276+
if got := tp.Env["GC_BEADS"]; got != "bd" {
277+
t.Fatalf("GC_BEADS = %q, want %q", got, "bd")
278+
}
279+
}
280+
281+
// Regression for #647: agent-session data ops must not route through the
282+
// lifecycle-only gc-beads-bd wrapper.
283+
func TestResolveTemplateRoutesAgentSessionDataOpsToRawBd(t *testing.T) {
284+
cases := []struct {
285+
name string
286+
provider string
287+
want string
288+
}{
289+
{name: "default bd", provider: "", want: "bd"},
290+
{name: "explicit bd", provider: "bd", want: "bd"},
291+
{name: "file passthrough", provider: "file", want: "file"},
292+
{name: "custom exec passthrough", provider: "exec:/custom/gc-beads-br", want: "exec:/custom/gc-beads-br"},
293+
}
294+
for _, tc := range cases {
295+
t.Run(tc.name, func(t *testing.T) {
296+
// Hermetic: the city.toml value is authoritative for this test,
297+
// regardless of what the developer has exported in their shell.
298+
t.Setenv("GC_BEADS", "")
299+
cityPath := t.TempDir()
300+
writeTemplateResolveCityConfig(t, cityPath, tc.provider)
301+
302+
params := &agentBuildParams{
303+
cityName: "city",
304+
cityPath: cityPath,
305+
workspace: &config.Workspace{Provider: "test"},
306+
providers: map[string]config.ProviderSpec{"test": {Command: "echo", PromptMode: "none"}},
307+
lookPath: func(string) (string, error) { return "/bin/echo", nil },
308+
fs: fsys.OSFS{},
309+
beaconTime: time.Unix(0, 0),
310+
beadNames: make(map[string]string),
311+
stderr: io.Discard,
312+
}
313+
314+
agent := &config.Agent{Name: "worker"}
315+
tp, err := resolveTemplate(params, agent, agent.QualifiedName(), nil)
316+
if err != nil {
317+
t.Fatalf("resolveTemplate: %v", err)
318+
}
319+
320+
if got := tp.Env["GC_BEADS"]; got != tc.want {
321+
t.Fatalf("GC_BEADS = %q, want %q", got, tc.want)
322+
}
323+
})
324+
}
325+
}
326+
327+
// Regression for #647: if a parent process leaked GC_BEADS pointing at the
328+
// city-managed lifecycle wrapper, nested agent sessions would re-inherit it
329+
// and recreate the exit-2/empty-JSON crash on data ops. The raw provider
330+
// normalizes that well-known wrapper path back to "bd".
331+
func TestResolveTemplateRawBeadsProviderStripsLifecycleWrapper(t *testing.T) {
332+
cityPath := t.TempDir()
333+
writeTemplateResolveCityConfig(t, cityPath, "")
334+
contaminated := "exec:" + filepath.Join(cityPath, ".gc", "system", "bin", "gc-beads-bd")
335+
t.Setenv("GC_BEADS", contaminated)
336+
337+
params := &agentBuildParams{
338+
cityName: "city",
339+
cityPath: cityPath,
340+
workspace: &config.Workspace{Provider: "test"},
341+
providers: map[string]config.ProviderSpec{"test": {Command: "echo", PromptMode: "none"}},
342+
lookPath: func(string) (string, error) { return "/bin/echo", nil },
343+
fs: fsys.OSFS{},
344+
beaconTime: time.Unix(0, 0),
345+
beadNames: make(map[string]string),
346+
stderr: io.Discard,
347+
}
348+
349+
agent := &config.Agent{Name: "worker"}
350+
tp, err := resolveTemplate(params, agent, agent.QualifiedName(), nil)
351+
if err != nil {
352+
t.Fatalf("resolveTemplate: %v", err)
353+
}
354+
355+
if got := tp.Env["GC_BEADS"]; got != "bd" {
356+
t.Fatalf("GC_BEADS = %q, want %q (ambient wrapper must be normalized)", got, "bd")
357+
}
358+
}
359+
360+
// Genuine user exec: overrides must pass through untouched — only the
361+
// well-known lifecycle wrapper path is stripped.
362+
func TestResolveTemplateRawBeadsProviderPreservesCustomExec(t *testing.T) {
363+
cityPath := t.TempDir()
364+
writeTemplateResolveCityConfig(t, cityPath, "")
365+
custom := "exec:/usr/local/bin/my-beads-backend"
366+
t.Setenv("GC_BEADS", custom)
367+
368+
params := &agentBuildParams{
369+
cityName: "city",
370+
cityPath: cityPath,
371+
workspace: &config.Workspace{Provider: "test"},
372+
providers: map[string]config.ProviderSpec{"test": {Command: "echo", PromptMode: "none"}},
373+
lookPath: func(string) (string, error) { return "/bin/echo", nil },
374+
fs: fsys.OSFS{},
375+
beaconTime: time.Unix(0, 0),
376+
beadNames: make(map[string]string),
377+
stderr: io.Discard,
378+
}
379+
380+
agent := &config.Agent{Name: "worker"}
381+
tp, err := resolveTemplate(params, agent, agent.QualifiedName(), nil)
382+
if err != nil {
383+
t.Fatalf("resolveTemplate: %v", err)
384+
}
385+
386+
if got := tp.Env["GC_BEADS"]; got != custom {
387+
t.Fatalf("GC_BEADS = %q, want %q (custom exec: must pass through)", got, custom)
274388
}
275389
}
276390

internal/runtime/fingerprint.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,13 @@ func hashCoreFields(h hash.Hash, cfg Config) {
164164
// use Src/RelDst paths. When a probed entry has an empty ContentHash
165165
// (transient I/O error), a stable sentinel is used instead of falling
166166
// back to path-based hashing, which would flip fingerprint modes.
167+
// Probed entries with SkipFingerprint are excluded entirely — see
168+
// issue #682. SkipFingerprint is ignored on config-derived entries
169+
// so real config changes always drive drain.
167170
for _, cf := range cfg.CopyFiles {
171+
if cf.Probed && cf.SkipFingerprint {
172+
continue
173+
}
168174
if cf.Probed {
169175
h.Write([]byte(cf.RelDst)) //nolint:errcheck // hash.Write never errors
170176
h.Write([]byte{0}) //nolint:errcheck // hash.Write never errors
@@ -271,6 +277,9 @@ func CoreFingerprintBreakdown(cfg Config) map[string]string {
271277
}),
272278
"CopyFiles": fieldHash(func(h hash.Hash) {
273279
for _, cf := range cfg.CopyFiles {
280+
if cf.Probed && cf.SkipFingerprint {
281+
continue
282+
}
274283
if cf.Probed {
275284
h.Write([]byte(cf.RelDst))
276285
h.Write([]byte{0})
@@ -335,6 +344,9 @@ func LogCoreFingerprintDrift(w io.Writer, name string, storedBreakdown map[strin
335344
fmt.Fprintf(w, " SessionSetupScript len: %d\n", len(current.SessionSetupScript)) //nolint:errcheck // best-effort diag
336345
case "CopyFiles":
337346
for i, cf := range current.CopyFiles {
347+
if cf.Probed && cf.SkipFingerprint {
348+
continue
349+
}
338350
fmt.Fprintf(w, " CopyFiles[%d]: RelDst=%q ContentHash=%q\n", i, cf.RelDst, cf.ContentHash) //nolint:errcheck // best-effort diag
339351
}
340352
}

0 commit comments

Comments
 (0)