Skip to content

Commit 18f0f06

Browse files
committed
fix: handle stale terminating states and prevent pipe blocking during process shutdown
extends #450
1 parent 49c6975 commit 18f0f06

3 files changed

Lines changed: 92 additions & 15 deletions

File tree

src/app/process.go

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,18 @@ func NewProcess(opts ...ProcOpts) *Process {
110110

111111
func (p *Process) run() int {
112112
if p.isState(types.ProcessStateTerminating) {
113-
return 0
113+
select {
114+
case <-p.procRunCtx.Done():
115+
// Concurrent stop in progress — honour it.
116+
return 0
117+
default:
118+
// Stale "Terminating" state from a prior broken shutdown (e.g. zombie
119+
// that was reaped without completing the normal lifecycle). Reset so
120+
// the process can start fresh.
121+
log.Warn().Str("process", p.getName()).
122+
Msg("Resetting stale Terminating state before start")
123+
p.setState(types.ProcessStatePending)
124+
}
114125
}
115126

116127
if err := p.validateProcess(); err != nil {
@@ -202,6 +213,9 @@ func (p *Process) waitForStdOutErr() {
202213
case <-ctx.Done():
203214
log.Debug().Msgf("%s stdout done with timeout", p.getName())
204215
return
216+
case <-p.procRunCtx.Done():
217+
log.Debug().Msgf("%s stdout abandoned: process stopped", p.getName())
218+
return
205219
case <-p.stdOutDone:
206220
log.Debug().Msgf("%s stdout done", p.getName())
207221
}
@@ -212,6 +226,9 @@ func (p *Process) waitForStdOutErr() {
212226
case <-ctx.Done():
213227
log.Debug().Msgf("%s stderr done with timeout", p.getName())
214228
return
229+
case <-p.procRunCtx.Done():
230+
log.Debug().Msgf("%s stderr abandoned: process stopped", p.getName())
231+
return
215232
case <-p.stdErrDone:
216233
log.Debug().Msgf("%s stderr done", p.getName())
217234
}

src/app/project_runner.go

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -195,20 +195,6 @@ func (p *ProjectRunner) runProcess(config *types.ProcessConfig) {
195195
procLog = pclog.NewLogBuffer(0)
196196
}
197197
procState, _ := p.GetProcessState(config.ReplicaName)
198-
// Recover from stale "Terminating" state snapshots (for example when a
199-
// process ended as zombie and never transitioned to Completed). Reusing
200-
// that snapshot would cause Process.run() to short-circuit immediately.
201-
if procState != nil &&
202-
procState.Status == types.ProcessStateTerminating &&
203-
!procState.IsRunning {
204-
log.Warn().
205-
Str("process", config.ReplicaName).
206-
Msg("Resetting stale terminating state before start")
207-
procState = types.NewProcessState(config)
208-
p.statesMutex.Lock()
209-
p.processStates[config.ReplicaName] = procState
210-
p.statesMutex.Unlock()
211-
}
212198
isMain := config.Name == p.mainProcess
213199
hasMain := p.mainProcess != ""
214200
printLogs := !hasMain && !p.isTuiOn && !p.project.MCPServer.IsStdio()

src/app/system_test.go

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1508,6 +1508,80 @@ func TestSystem_StartProcessResetsStaleTerminatingState(t *testing.T) {
15081508
}
15091509
}
15101510

1511+
func TestSystem_TestTerminatingWithOrphanedChildHoldingPipe(t *testing.T) {
1512+
if runtime.GOOS == "windows" {
1513+
t.Skip("test requires Unix process semantics")
1514+
}
1515+
// Reproduce the zombie/stale-Terminating scenario:
1516+
// A parent process spawns a child that inherits stdout and keeps it open.
1517+
// When we stop the parent, the stdout pipe never gets an EOF because the
1518+
// child still holds it. Without a timeout in waitForStdOutErr, the
1519+
// process would hang in Terminating forever and never reach Completed.
1520+
1521+
testProcess := "orphan_pipe"
1522+
shell := command.DefaultShellConfig()
1523+
shutdownTimeout := 3
1524+
1525+
project := &types.Project{
1526+
Processes: map[string]types.ProcessConfig{
1527+
testProcess: {
1528+
Name: testProcess,
1529+
ReplicaName: testProcess,
1530+
Executable: shell.ShellCommand,
1531+
// Parent spawns a child in a new session (setsid) so it won't
1532+
// receive the SIGTERM sent to the parent's process group.
1533+
// The child writes to stdout (holding the pipe open) and sleeps.
1534+
// When the parent is killed, the child survives and keeps the
1535+
// pipe open, blocking waitForStdOutErr until the timeout fires.
1536+
Args: []string{shell.ShellArgument, "setsid bash -c 'while true; do echo holding_pipe; sleep 1; done' & sleep 60"},
1537+
RestartPolicy: types.RestartPolicyConfig{
1538+
Restart: types.RestartPolicyNo,
1539+
},
1540+
ShutDownParams: types.ShutDownParams{
1541+
ShutDownTimeout: shutdownTimeout,
1542+
Signal: int(syscall.SIGTERM),
1543+
},
1544+
},
1545+
},
1546+
ShellConfig: shell,
1547+
}
1548+
1549+
runner, err := NewProjectRunner(&ProjectOpts{project: project})
1550+
if err != nil {
1551+
t.Fatal(err)
1552+
}
1553+
go func() {
1554+
_ = runner.Run()
1555+
}()
1556+
1557+
// Wait for the process to be running (poll via mutex-protected method to avoid race).
1558+
var proc *Process
1559+
for i := 0; i < 50; i++ {
1560+
time.Sleep(100 * time.Millisecond)
1561+
proc = runner.getRunningProcess(testProcess)
1562+
if proc != nil {
1563+
break
1564+
}
1565+
}
1566+
if proc == nil {
1567+
t.Fatal("process never started")
1568+
}
1569+
1570+
// Stop the parent — it will enter Terminating, but the child keeps stdout open.
1571+
stopDone := make(chan struct{})
1572+
go func() {
1573+
defer close(stopDone)
1574+
_ = runner.StopProcess(testProcess)
1575+
}()
1576+
1577+
// The process must reach Completed within the shutdown timeout + margin,
1578+
// NOT stay stuck in Terminating.
1579+
waitForProcessState(t, runner, testProcess, types.ProcessStateCompleted,
1580+
time.Duration(shutdownTimeout+5)*time.Second)
1581+
1582+
<-stopDone
1583+
}
1584+
15111585
func TestReadinessProbeRestart(t *testing.T) {
15121586
proc := &types.ProcessConfig{
15131587
Name: "test",

0 commit comments

Comments
 (0)