Skip to content

Commit 24d7da6

Browse files
committed
Avoid duplicate daemon start on handoff timeout
1 parent 74f7146 commit 24d7da6

File tree

4 files changed

+100
-37
lines changed

4 files changed

+100
-37
lines changed

cmd/roborev/main.go

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2880,6 +2880,13 @@ func restartDaemonAfterUpdate(binDir string, noRestart bool) {
28802880
fmt.Println("OK")
28812881
return
28822882
}
2883+
// A replacement PID already exists; avoid manually starting
2884+
// another daemon instance while handoff is still warming up.
2885+
fmt.Println(
2886+
"warning: daemon handoff detected but replacement is not ready;" +
2887+
" restart it manually",
2888+
)
2889+
return
28832890
}
28842891
// Treat as unresolved and continue to kill fallback.
28852892
exited = false
@@ -2906,15 +2913,13 @@ func restartDaemonAfterUpdate(binDir string, noRestart bool) {
29062913
fmt.Println("OK")
29072914
return
29082915
}
2909-
// In uncertain stop-failure paths, avoid starting another daemon
2910-
// when a handoff PID already exists but is not ready yet.
2911-
if stopFailed {
2912-
fmt.Println(
2913-
"warning: daemon handoff detected but replacement is not ready;" +
2914-
" restart it manually",
2915-
)
2916-
return
2917-
}
2916+
// A replacement PID already exists; avoid manually starting
2917+
// another daemon instance while handoff is still warming up.
2918+
fmt.Println(
2919+
"warning: daemon handoff detected but replacement is not ready;" +
2920+
" restart it manually",
2921+
)
2922+
return
29182923
}
29192924
if !exitedAfterKill {
29202925
fmt.Printf(

cmd/roborev/main_test.go

Lines changed: 58 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1345,17 +1345,69 @@ func TestRestartDaemonAfterUpdateManagerHandoffUnresponsiveUsesRuntimePID(t *tes
13451345
restartDaemonAfterUpdate("/tmp/bin", false)
13461346
})
13471347

1348-
if s.killCalls != 1 {
1349-
t.Fatalf("expected kill fallback called once for unresolved handoff, got %d", s.killCalls)
1348+
if s.killCalls != 0 {
1349+
t.Fatalf("expected kill fallback not called when handoff PID already exists, got %d", s.killCalls)
13501350
}
1351-
if s.startCalls != 1 {
1352-
t.Fatalf("expected manual start after unresolved handoff, got %d", s.startCalls)
1351+
if s.startCalls != 0 {
1352+
t.Fatalf("expected startUpdatedDaemon not called for unready handoff, got %d", s.startCalls)
13531353
}
13541354
if strings.Contains(output, "Restarting daemon... OK") {
13551355
t.Fatalf("unexpected handoff success output: %q", output)
13561356
}
1357-
if !strings.Contains(output, "warning: daemon did not become ready after restart; restart it manually") {
1358-
t.Fatalf("expected not-ready warning after manual start, got %q", output)
1357+
if !strings.Contains(output, "warning: daemon handoff detected but replacement is not ready; restart it manually") {
1358+
t.Fatalf("expected not-ready handoff warning, got %q", output)
1359+
}
1360+
}
1361+
1362+
func TestRestartDaemonAfterUpdateManagerHandoffAfterKillNotReadyWarnsNoStart(t *testing.T) {
1363+
s := stubRestartVars(t)
1364+
1365+
var handoffSeen bool
1366+
getAnyRunningDaemon = func() (*daemon.RuntimeInfo, error) {
1367+
if s.killCalls == 0 {
1368+
// Initial probe + first wait loop see only the old daemon,
1369+
// forcing timeout and kill fallback.
1370+
return &daemon.RuntimeInfo{PID: 100, Addr: "127.0.0.1:7373"}, nil
1371+
}
1372+
if !handoffSeen {
1373+
// After kill fallback, handoff PID appears once.
1374+
handoffSeen = true
1375+
return &daemon.RuntimeInfo{PID: 500, Addr: "127.0.0.1:7373"}, nil
1376+
}
1377+
// Replacement remains unresponsive during readiness polling.
1378+
return nil, os.ErrNotExist
1379+
}
1380+
1381+
listAllRuntimes = func() ([]*daemon.RuntimeInfo, error) {
1382+
if s.killCalls == 0 {
1383+
return []*daemon.RuntimeInfo{
1384+
{PID: 100, Addr: "127.0.0.1:7373"},
1385+
}, nil
1386+
}
1387+
return []*daemon.RuntimeInfo{
1388+
{PID: 500, Addr: "127.0.0.1:7373"},
1389+
}, nil
1390+
}
1391+
1392+
isPIDAliveForUpdate = func(pid int) bool {
1393+
return pid == 500
1394+
}
1395+
1396+
output := captureStdout(t, func() {
1397+
restartDaemonAfterUpdate("/tmp/bin", false)
1398+
})
1399+
1400+
if s.killCalls != 1 {
1401+
t.Fatalf("expected kill fallback called once, got %d", s.killCalls)
1402+
}
1403+
if s.startCalls != 0 {
1404+
t.Fatalf("expected startUpdatedDaemon not called for unready handoff, got %d", s.startCalls)
1405+
}
1406+
if !strings.Contains(output, "warning: daemon handoff detected but replacement is not ready; restart it manually") {
1407+
t.Fatalf("expected not-ready handoff warning, got %q", output)
1408+
}
1409+
if strings.Contains(output, "Restarting daemon... OK") {
1410+
t.Fatalf("unexpected success output: %q", output)
13591411
}
13601412
}
13611413

cmd/roborev/processcheck_common.go

Lines changed: 18 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,14 @@ const (
1414
updatePIDNotRoborev
1515
)
1616

17+
var nonRunDaemonSubcommandsForUpdate = map[string]struct{}{
18+
"status": {},
19+
"start": {},
20+
"stop": {},
21+
"restart": {},
22+
"logs": {},
23+
}
24+
1725
func normalizeCommandLineForUpdate(s string) string {
1826
// Strip NUL bytes first (common with UTF-16LE output and /proc cmdline).
1927
s = strings.ReplaceAll(s, "\x00", " ")
@@ -117,6 +125,10 @@ func isRoborevDaemonCommandForUpdate(cmdStr string) bool {
117125
fields := strings.Fields(cmdLower)
118126
foundDaemon := false
119127
for _, field := range fields {
128+
field = trimCommandTokenQuotesForUpdate(field)
129+
if field == "" {
130+
continue
131+
}
120132
if !foundDaemon {
121133
if field == "daemon" ||
122134
strings.HasSuffix(field, "/daemon") ||
@@ -125,32 +137,16 @@ func isRoborevDaemonCommandForUpdate(cmdStr string) bool {
125137
}
126138
continue
127139
}
128-
if strings.HasPrefix(field, "-") {
129-
continue
140+
if field == "run" {
141+
return true
130142
}
131-
if looksLikeFlagValueForUpdate(field) {
132-
continue
143+
if _, isNonRunSubcommand := nonRunDaemonSubcommandsForUpdate[field]; isNonRunSubcommand {
144+
return false
133145
}
134-
return field == "run"
135146
}
136147
return false
137148
}
138149

139-
func looksLikeFlagValueForUpdate(token string) bool {
140-
if strings.ContainsAny(token, "/\\") {
141-
return true
142-
}
143-
if strings.Contains(token, ":") {
144-
return true
145-
}
146-
if strings.Contains(token, "=") {
147-
return true
148-
}
149-
if len(token) > 0 && token[0] >= '0' && token[0] <= '9' {
150-
return true
151-
}
152-
if strings.Contains(token, ".") {
153-
return true
154-
}
155-
return false
150+
func trimCommandTokenQuotesForUpdate(token string) string {
151+
return strings.Trim(token, `"'`)
156152
}

cmd/roborev/processcheck_common_test.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,16 @@ func TestIsRoborevDaemonCommandForUpdate(t *testing.T) {
2222
cmdLine: `C:\Tools\roborev.exe daemon run --port 7373`,
2323
want: true,
2424
},
25+
{
26+
name: "quoted windows args daemon run",
27+
cmdLine: `"C:\Tools\roborev.exe" "daemon" "run" "--port" "7373"`,
28+
want: true,
29+
},
30+
{
31+
name: "plain flag value before run",
32+
cmdLine: "roborev daemon --profile prod run",
33+
want: true,
34+
},
2535
{
2636
name: "daemon status command",
2737
cmdLine: "/usr/local/bin/roborev daemon status",

0 commit comments

Comments
 (0)