Skip to content

Commit b51cc1e

Browse files
claudeDumbris
authored andcommitted
fix(restart): re-read mcp_config.json before restarting upstream (#467)
Runtime.RestartServer pulled the server's config from BoltDB, never from disk. There is no fsnotify-style auto file-watcher, so a user who edited mcp_config.json (changing env, headers, args, or isolation_json) and then ran `mcpproxy upstream restart` from the CLI / tray would silently replay the stale config — only the live REST PATCH path used to update those maps. The repro from #467 (env wiped + restored on obsidian-pilot) confirmed this: editing the file changed nothing on the running container; only `mcpproxy upstream patch` made the change visible. Fix: pull the latest server config from disk first, fall back to BoltDB on read / parse failure, and persist the disk-loaded config back to BoltDB so subsequent restarts see consistent state. Added lookupServerConfigForRestart() to keep the restart path readable. RestartAll inherits the fix automatically because it loops through RestartServer. Tests cover env, headers, and the storage fallback path. The latter proves a malformed disk file doesn't break restart. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent f772e49 commit b51cc1e

2 files changed

Lines changed: 249 additions & 11 deletions

File tree

internal/runtime/lifecycle.go

Lines changed: 59 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1183,26 +1183,74 @@ func (r *Runtime) BulkEnableServers(serverNames []string, enabled bool) (map[str
11831183
return resultErrs, nil
11841184
}
11851185

1186-
// RestartServer restarts an upstream server by disconnecting and reconnecting it.
1187-
// Validation and disconnect are synchronous; reconnection and reindexing happen
1188-
// asynchronously so the caller (HTTP handler) returns immediately.
1189-
func (r *Runtime) RestartServer(serverName string) error {
1190-
r.logger.Info("Request to restart server", zap.String("server", serverName))
1186+
// lookupServerConfigForRestart returns the named server's config, preferring
1187+
// the on-disk mcp_config.json over the BoltDB cache. Falls back to BoltDB
1188+
// when the disk file is unreadable, malformed, or missing the named server.
1189+
//
1190+
// On a successful disk read, the resolved config is also written back to
1191+
// BoltDB so subsequent restarts (and any other read-from-storage code path)
1192+
// see the same value. Without this, only the synchronous restart that did
1193+
// the disk read would see the edit; the next one would replay storage and
1194+
// regress. See issue #467 for context.
1195+
func (r *Runtime) lookupServerConfigForRestart(serverName string) *config.ServerConfig {
1196+
r.mu.RLock()
1197+
cfgPath := r.cfgPath
1198+
r.mu.RUnlock()
1199+
1200+
if cfgPath != "" {
1201+
diskCfg, err := config.LoadFromFile(cfgPath)
1202+
if err != nil {
1203+
r.logger.Warn("Failed to re-read config from disk during restart, falling back to storage",
1204+
zap.String("path", cfgPath),
1205+
zap.String("server", serverName),
1206+
zap.Error(err))
1207+
} else {
1208+
for _, srv := range diskCfg.Servers {
1209+
if srv != nil && srv.Name == serverName {
1210+
if r.storageManager != nil {
1211+
if saveErr := r.storageManager.SaveUpstreamServer(srv); saveErr != nil {
1212+
r.logger.Warn("Failed to persist disk-loaded config to storage during restart",
1213+
zap.String("server", serverName),
1214+
zap.Error(saveErr))
1215+
}
1216+
}
1217+
return srv
1218+
}
1219+
}
1220+
}
1221+
}
11911222

1192-
// Check if server exists in storage (config)
1223+
if r.storageManager == nil {
1224+
return nil
1225+
}
11931226
servers, err := r.storageManager.ListUpstreamServers()
11941227
if err != nil {
1195-
return fmt.Errorf("failed to list servers: %w", err)
1228+
r.logger.Error("Failed to list servers during restart fallback",
1229+
zap.String("server", serverName),
1230+
zap.Error(err))
1231+
return nil
11961232
}
1197-
1198-
var serverConfig *config.ServerConfig
11991233
for _, srv := range servers {
12001234
if srv.Name == serverName {
1201-
serverConfig = srv
1202-
break
1235+
return srv
12031236
}
12041237
}
1238+
return nil
1239+
}
1240+
1241+
// RestartServer restarts an upstream server by disconnecting and reconnecting it.
1242+
// Validation and disconnect are synchronous; reconnection and reindexing happen
1243+
// asynchronously so the caller (HTTP handler) returns immediately.
1244+
func (r *Runtime) RestartServer(serverName string) error {
1245+
r.logger.Info("Request to restart server", zap.String("server", serverName))
12051246

1247+
// Issue #467: pull the latest server config from disk before falling
1248+
// back to BoltDB. There is no fsnotify-style auto file-watcher, so a
1249+
// user who edits mcp_config.json and then triggers a restart would
1250+
// otherwise replay stale env / headers / args / isolation data — only
1251+
// the live REST PATCH path used to update them. Disk-first here closes
1252+
// that gap for the (much more common) edit-then-restart UX.
1253+
serverConfig := r.lookupServerConfigForRestart(serverName)
12061254
if serverConfig == nil {
12071255
return fmt.Errorf("server '%s' not found in configuration", serverName)
12081256
}
Lines changed: 190 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,190 @@
1+
package runtime
2+
3+
import (
4+
"os"
5+
"path/filepath"
6+
"testing"
7+
8+
"go.uber.org/zap"
9+
10+
"github.com/smart-mcp-proxy/mcpproxy-go/internal/config"
11+
12+
"github.com/stretchr/testify/assert"
13+
"github.com/stretchr/testify/require"
14+
)
15+
16+
// TestRestartServer_PicksUpDiskEnvChanges reproduces issue #467: editing
17+
// env in mcp_config.json and then calling `mcpproxy upstream restart` (which
18+
// flows into Runtime.RestartServer) must propagate the new env to the
19+
// running upstream. Pre-fix, RestartServer read from BoltDB only — and
20+
// BoltDB never sees the file edit because there's no auto file-watcher —
21+
// so the restart silently replayed the stale env.
22+
//
23+
// We verify the new behavior at the storage layer: after RestartServer,
24+
// the BoltDB record for the server reflects the disk env. That guarantees
25+
// the subsequent upstreamManager.AddServer call (which itself diffs by
26+
// env / headers) sees the new value.
27+
func TestRestartServer_PicksUpDiskEnvChanges(t *testing.T) {
28+
tmpDir := t.TempDir()
29+
cfgPath := filepath.Join(tmpDir, "mcp_config.json")
30+
31+
initialCfg := config.DefaultConfig()
32+
initialCfg.Listen = "127.0.0.1:0"
33+
initialCfg.DataDir = tmpDir
34+
initialCfg.Servers = []*config.ServerConfig{
35+
{
36+
Name: "obsidian-pilot",
37+
Command: "uvx",
38+
Args: []string{"obsidianpilot"},
39+
Protocol: "stdio",
40+
Enabled: true,
41+
Env: map[string]string{"OBSIDIAN_VAULT_PATH": "/old/path"},
42+
},
43+
}
44+
require.NoError(t, config.SaveConfig(initialCfg, cfgPath))
45+
46+
rt, err := New(initialCfg, cfgPath, zap.NewNop())
47+
require.NoError(t, err)
48+
defer func() { _ = rt.Close() }()
49+
50+
// Seed BoltDB with the initial config so RestartServer's lookup succeeds.
51+
require.NoError(t, rt.storageManager.SaveUpstreamServer(initialCfg.Servers[0]))
52+
53+
// Simulate the user's repro: edit the file on disk to change env, but do
54+
// NOT call ApplyConfig / ReloadConfiguration. BoltDB stays stale by
55+
// design — that's the bug condition.
56+
editedCfg := config.DefaultConfig()
57+
editedCfg.Listen = initialCfg.Listen
58+
editedCfg.DataDir = tmpDir
59+
editedCfg.Servers = []*config.ServerConfig{
60+
{
61+
Name: "obsidian-pilot",
62+
Command: "uvx",
63+
Args: []string{"obsidianpilot"},
64+
Protocol: "stdio",
65+
Enabled: true,
66+
Env: map[string]string{"OBSIDIAN_VAULT_PATH": "/new/path"},
67+
},
68+
}
69+
require.NoError(t, config.SaveConfig(editedCfg, cfgPath))
70+
71+
// Sanity: BoltDB still has old env at this point.
72+
beforeServers, err := rt.storageManager.ListUpstreamServers()
73+
require.NoError(t, err)
74+
require.Len(t, beforeServers, 1)
75+
assert.Equal(t, "/old/path", beforeServers[0].Env["OBSIDIAN_VAULT_PATH"],
76+
"precondition: BoltDB should still have stale env before restart")
77+
78+
// Trigger restart. The upstream client doesn't actually exist (we never
79+
// connected), so RestartServer takes the "client doesn't exist, recreate"
80+
// branch; either way, the disk-read + storage persist must run first.
81+
_ = rt.RestartServer("obsidian-pilot")
82+
83+
// Post-fix: BoltDB should now mirror the disk env.
84+
afterServers, err := rt.storageManager.ListUpstreamServers()
85+
require.NoError(t, err)
86+
require.Len(t, afterServers, 1)
87+
assert.Equal(t, "/new/path", afterServers[0].Env["OBSIDIAN_VAULT_PATH"],
88+
"RestartServer must re-read disk before consulting storage so env edits take effect (#467)")
89+
}
90+
91+
// TestRestartServer_PicksUpDiskHeaderChanges is the http-transport sibling
92+
// of the env test — same gap, same fix surface. Headers and env share the
93+
// "edit-then-restart" UX, so both must behave identically.
94+
func TestRestartServer_PicksUpDiskHeaderChanges(t *testing.T) {
95+
tmpDir := t.TempDir()
96+
cfgPath := filepath.Join(tmpDir, "mcp_config.json")
97+
98+
initialCfg := config.DefaultConfig()
99+
initialCfg.Listen = "127.0.0.1:0"
100+
initialCfg.DataDir = tmpDir
101+
initialCfg.Servers = []*config.ServerConfig{
102+
{
103+
Name: "synapbus",
104+
URL: "https://hub.synapbus.dev/mcp",
105+
Protocol: "streamable-http",
106+
Enabled: true,
107+
Headers: map[string]string{"Authorization": "Bearer old-token"},
108+
},
109+
}
110+
require.NoError(t, config.SaveConfig(initialCfg, cfgPath))
111+
112+
rt, err := New(initialCfg, cfgPath, zap.NewNop())
113+
require.NoError(t, err)
114+
defer func() { _ = rt.Close() }()
115+
require.NoError(t, rt.storageManager.SaveUpstreamServer(initialCfg.Servers[0]))
116+
117+
// Edit only on disk.
118+
editedCfg := config.DefaultConfig()
119+
editedCfg.Listen = initialCfg.Listen
120+
editedCfg.DataDir = tmpDir
121+
editedCfg.Servers = []*config.ServerConfig{
122+
{
123+
Name: "synapbus",
124+
URL: "https://hub.synapbus.dev/mcp",
125+
Protocol: "streamable-http",
126+
Enabled: true,
127+
Headers: map[string]string{"Authorization": "Bearer new-token"},
128+
},
129+
}
130+
require.NoError(t, config.SaveConfig(editedCfg, cfgPath))
131+
132+
_ = rt.RestartServer("synapbus")
133+
134+
after, err := rt.storageManager.ListUpstreamServers()
135+
require.NoError(t, err)
136+
require.Len(t, after, 1)
137+
assert.Equal(t, "Bearer new-token", after[0].Headers["Authorization"],
138+
"RestartServer must re-read disk so header edits take effect (#467)")
139+
}
140+
141+
// TestRestartServer_FallsBackToStorageWhenDiskMissing covers the path where
142+
// the disk file became unreadable between server creation and a restart
143+
// (truncated, deleted, perms broken). The existing behavior — fall back to
144+
// BoltDB — must still work so a transient disk problem doesn't make every
145+
// restart fail.
146+
func TestRestartServer_FallsBackToStorageWhenDiskMissing(t *testing.T) {
147+
tmpDir := t.TempDir()
148+
cfgPath := filepath.Join(tmpDir, "mcp_config.json")
149+
150+
initialCfg := config.DefaultConfig()
151+
initialCfg.Listen = "127.0.0.1:0"
152+
initialCfg.DataDir = tmpDir
153+
initialCfg.Servers = []*config.ServerConfig{
154+
{
155+
Name: "obsidian-pilot",
156+
Command: "uvx",
157+
Args: []string{"obsidianpilot"},
158+
Protocol: "stdio",
159+
Enabled: true,
160+
Env: map[string]string{"OBSIDIAN_VAULT_PATH": "/storage/path"},
161+
},
162+
}
163+
require.NoError(t, config.SaveConfig(initialCfg, cfgPath))
164+
165+
rt, err := New(initialCfg, cfgPath, zap.NewNop())
166+
require.NoError(t, err)
167+
defer func() { _ = rt.Close() }()
168+
require.NoError(t, rt.storageManager.SaveUpstreamServer(initialCfg.Servers[0]))
169+
170+
// Wipe the file so the disk-read fails; storage must take over.
171+
require.NoError(t, writeFile(cfgPath, []byte("{not valid json")))
172+
173+
// Should not error out — fallback path keeps the lookup working.
174+
err = rt.RestartServer("obsidian-pilot")
175+
// Server might or might not error depending on async ordering, but the
176+
// important guarantee is "no panic, no nil-deref, lookup succeeds".
177+
_ = err
178+
179+
after, err := rt.storageManager.ListUpstreamServers()
180+
require.NoError(t, err)
181+
require.Len(t, after, 1)
182+
// Storage value preserved unchanged.
183+
assert.Equal(t, "/storage/path", after[0].Env["OBSIDIAN_VAULT_PATH"])
184+
}
185+
186+
// writeFile overwrites a file in place — a tiny shim so tests don't need to
187+
// pull os everywhere.
188+
func writeFile(path string, content []byte) error {
189+
return os.WriteFile(path, content, 0o600)
190+
}

0 commit comments

Comments
 (0)