Skip to content

Commit a7be2c7

Browse files
feat: cross-platform force-kill primitive for stuck PHP threads
Introduces a small, self-contained primitive that unblocks a PHP thread stuck in a blocking call (sleep, synchronous I/O, etc.) so the graceful drain used by RestartWorkers and DrainWorkers can make progress instead of waiting for the block to return on its own. The primitive is useful on its own and gives follow-up graceful-shutdown work a reviewed foundation to build on. - frankenphp.c: add frankenphp_init_force_kill / frankenphp_save_php_timer / frankenphp_force_kill_thread / frankenphp_destroy_force_kill. The per-thread PHP timer handle (Linux/FreeBSD ZTS) or OS thread handle (Windows) is captured at thread boot and stored in a pre-sized array so the kill path can fire from any goroutine without touching per-thread PHP state. Linux/FreeBSD arm PHP's max_execution_time timer (delivers SIGALRM -> "Maximum execution time exceeded"); Windows uses CancelSynchronousIo + QueueUserAPC to interrupt I/O and alertable waits; macOS and other platforms are a safe no-op (the thread is abandoned and exits when the blocking call returns naturally). - phpmainthread.go: wire frankenphp_init_force_kill into initPHPThreads (sized to maxThreads, matching the thread_metrics allocation) and frankenphp_destroy_force_kill into drainPHPThreads. - worker.go: add a 5-second graceful-drain grace period to drainWorkerThreads. Once elapsed, arm the force-kill primitive on any thread still outside Yielding and keep waiting on ready.Wait(); the kill lets the thread return from its blocking call so the drain completes in bounded time instead of hanging. - worker_test.go + testdata/worker-sleep.php: TestRestartWorkersForceKillsStuckThread drives the path end-to-end. A worker blocks inside sleep(60) below frankenphp_handle_request (so drainChan close can't reach it); the test asserts RestartWorkers returns within 8s (grace + slack). The test skips on platforms without the underlying primitive.
1 parent a05e6dd commit a7be2c7

6 files changed

Lines changed: 216 additions & 1 deletion

File tree

frankenphp.c

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,108 @@ static bool is_forked_child = false;
9292
static void frankenphp_fork_child(void) { is_forked_child = true; }
9393
#endif
9494

95+
/* Best-effort force-kill for PHP threads stuck in blocking calls after the
96+
* graceful-drain grace period. Cross-platform primitives with no dependency
97+
* on any specific worker type; the Go side decides when to arm them.
98+
* - Linux/FreeBSD ZTS: arm PHP's per-thread timer -> "max execution time" fatal
99+
* - Windows: CancelSynchronousIo + QueueUserAPC -> interrupts I/O and sleeps
100+
* - macOS/other: no-op (threads abandoned, exit when the blocking call returns)
101+
*
102+
* The timer/handle is captured by the thread itself at boot (see
103+
* frankenphp_save_php_timer at the top of the PHP thread loop) so
104+
* force_kill_thread can fire from any goroutine without touching per-thread
105+
* PHP state directly. */
106+
static int force_kill_num_threads = 0;
107+
#ifdef ZEND_MAX_EXECUTION_TIMERS
108+
static timer_t *thread_php_timers = NULL;
109+
static bool *thread_php_timer_saved = NULL;
110+
#elif defined(PHP_WIN32)
111+
static HANDLE *thread_handles = NULL;
112+
static bool *thread_handle_saved = NULL;
113+
static void CALLBACK frankenphp_noop_apc(ULONG_PTR param) { (void)param; }
114+
#endif
115+
116+
void frankenphp_init_force_kill(int num_threads) {
117+
force_kill_num_threads = num_threads;
118+
#ifdef ZEND_MAX_EXECUTION_TIMERS
119+
thread_php_timers = calloc(num_threads, sizeof(timer_t));
120+
thread_php_timer_saved = calloc(num_threads, sizeof(bool));
121+
#elif defined(PHP_WIN32)
122+
thread_handles = calloc(num_threads, sizeof(HANDLE));
123+
thread_handle_saved = calloc(num_threads, sizeof(bool));
124+
#endif
125+
}
126+
127+
void frankenphp_save_php_timer(uintptr_t idx) {
128+
if (idx >= (uintptr_t)force_kill_num_threads) {
129+
return;
130+
}
131+
#ifdef ZEND_MAX_EXECUTION_TIMERS
132+
if (thread_php_timers && EG(pid)) {
133+
thread_php_timers[idx] = EG(max_execution_timer_timer);
134+
thread_php_timer_saved[idx] = true;
135+
}
136+
#elif defined(PHP_WIN32)
137+
if (thread_handles) {
138+
DuplicateHandle(GetCurrentProcess(), GetCurrentThread(),
139+
GetCurrentProcess(), &thread_handles[idx], 0, FALSE,
140+
DUPLICATE_SAME_ACCESS);
141+
thread_handle_saved[idx] = true;
142+
}
143+
#endif
144+
(void)idx;
145+
}
146+
147+
void frankenphp_force_kill_thread(uintptr_t idx) {
148+
if (idx >= (uintptr_t)force_kill_num_threads) {
149+
return;
150+
}
151+
#ifdef ZEND_MAX_EXECUTION_TIMERS
152+
if (thread_php_timers && thread_php_timer_saved[idx]) {
153+
struct itimerspec its;
154+
its.it_value.tv_sec = 0;
155+
its.it_value.tv_nsec = 1;
156+
its.it_interval.tv_sec = 0;
157+
its.it_interval.tv_nsec = 0;
158+
timer_settime(thread_php_timers[idx], 0, &its, NULL);
159+
}
160+
#elif defined(PHP_WIN32)
161+
if (thread_handles && thread_handle_saved[idx]) {
162+
CancelSynchronousIo(thread_handles[idx]);
163+
QueueUserAPC((PAPCFUNC)frankenphp_noop_apc, thread_handles[idx], 0);
164+
}
165+
#endif
166+
(void)idx;
167+
}
168+
169+
void frankenphp_destroy_force_kill(void) {
170+
#ifdef ZEND_MAX_EXECUTION_TIMERS
171+
if (thread_php_timers) {
172+
free(thread_php_timers);
173+
thread_php_timers = NULL;
174+
}
175+
if (thread_php_timer_saved) {
176+
free(thread_php_timer_saved);
177+
thread_php_timer_saved = NULL;
178+
}
179+
#elif defined(PHP_WIN32)
180+
if (thread_handles) {
181+
for (int i = 0; i < force_kill_num_threads; i++) {
182+
if (thread_handle_saved && thread_handle_saved[i]) {
183+
CloseHandle(thread_handles[i]);
184+
}
185+
}
186+
free(thread_handles);
187+
thread_handles = NULL;
188+
}
189+
if (thread_handle_saved) {
190+
free(thread_handle_saved);
191+
thread_handle_saved = NULL;
192+
}
193+
#endif
194+
force_kill_num_threads = 0;
195+
}
196+
95197
void frankenphp_update_local_thread_context(bool is_worker) {
96198
is_worker_thread = is_worker;
97199

@@ -1073,6 +1175,10 @@ static void *php_thread(void *arg) {
10731175
#endif
10741176
#endif
10751177

1178+
/* Capture this thread's PHP timer / OS handle so the Go side can force-kill
1179+
* the thread after a grace period if it gets stuck in a blocking call. */
1180+
frankenphp_save_php_timer(thread_index);
1181+
10761182
bool thread_is_healthy = true;
10771183
bool has_attempted_shutdown = false;
10781184

frankenphp.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,12 @@ void frankenphp_init_thread_metrics(int max_threads);
193193
void frankenphp_destroy_thread_metrics(void);
194194
size_t frankenphp_get_thread_memory_usage(uintptr_t thread_index);
195195

196+
/* Best-effort force-kill primitives for threads stuck in blocking calls. */
197+
void frankenphp_init_force_kill(int num_threads);
198+
void frankenphp_save_php_timer(uintptr_t thread_index);
199+
void frankenphp_force_kill_thread(uintptr_t thread_index);
200+
void frankenphp_destroy_force_kill(void);
201+
196202
void register_extensions(zend_module_entry **m, int len);
197203

198204
#endif

phpmainthread.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,11 @@ func initPHPThreads(numThreads int, numMaxThreads int, phpIni map[string]string)
5656

5757
C.frankenphp_init_thread_metrics(C.int(mainThread.maxThreads))
5858

59+
// initialize force-kill support: allocates per-thread slots that the PHP
60+
// threads fill in at boot (frankenphp_save_php_timer) so a stuck thread
61+
// can be unblocked after the graceful-drain grace period.
62+
C.frankenphp_init_force_kill(C.int(mainThread.maxThreads))
63+
5964
// initialize all other threads
6065
phpThreads = make([]*phpThread, mainThread.maxThreads)
6166
phpThreads[0] = initialThread
@@ -97,6 +102,7 @@ func drainPHPThreads() {
97102
}
98103

99104
doneWG.Wait()
105+
C.frankenphp_destroy_force_kill()
100106
mainThread.state.Set(state.Done)
101107
mainThread.state.WaitFor(state.Reserved)
102108
C.frankenphp_destroy_thread_metrics()

testdata/worker-sleep.php

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
<?php
2+
3+
// Worker that sleeps inside the handler to simulate a stuck request blocking
4+
// drain. Used to test the force-kill grace period.
5+
$fn = static function () {
6+
sleep(60);
7+
echo 'should not reach';
8+
};
9+
10+
do {
11+
$ret = \frankenphp_handle_request($fn);
12+
} while ($ret);

worker.go

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ package frankenphp
44
import "C"
55
import (
66
"fmt"
7+
"log/slog"
78
"os"
89
"path/filepath"
910
"runtime"
@@ -165,6 +166,13 @@ func newWorker(o workerOpt) (*worker, error) {
165166
return w, nil
166167
}
167168

169+
// drainGracePeriod is the time a worker thread has to stop gracefully after
170+
// receiving the drain signal before the force-kill primitive is armed on it.
171+
// Well-behaved scripts return promptly on drainChan close; stuck ones (e.g.
172+
// blocking C calls inside the VM) would otherwise hang drainWorkerThreads
173+
// forever.
174+
const drainGracePeriod = 5 * time.Second
175+
168176
// EXPERIMENTAL: DrainWorkers finishes all worker scripts before a graceful shutdown
169177
func DrainWorkers() {
170178
_ = drainWorkerThreads()
@@ -201,7 +209,31 @@ func drainWorkerThreads() []*phpThread {
201209
worker.threadMutex.RUnlock()
202210
}
203211

204-
ready.Wait()
212+
// Wait for graceful drain, then arm the force-kill primitive on any
213+
// thread still stuck. Linux/FreeBSD ZTS arms PHP's max_execution_time
214+
// timer; Windows interrupts blocking I/O and alertable waits; other
215+
// platforms leave the thread abandoned (it will exit when the blocking
216+
// call returns).
217+
done := make(chan struct{})
218+
go func() {
219+
ready.Wait()
220+
close(done)
221+
}()
222+
223+
select {
224+
case <-done:
225+
// everyone yielded in time
226+
case <-time.After(drainGracePeriod):
227+
for _, thread := range drainedThreads {
228+
if !thread.state.Is(state.Yielding) {
229+
C.frankenphp_force_kill_thread(C.uintptr_t(thread.threadIndex))
230+
}
231+
}
232+
if globalLogger.Enabled(globalCtx, slog.LevelWarn) {
233+
globalLogger.LogAttrs(globalCtx, slog.LevelWarn, "worker threads did not yield within grace period, force-killing stuck threads")
234+
}
235+
<-done
236+
}
205237

206238
return drainedThreads
207239
}

worker_test.go

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,17 @@ import (
99
"net/http"
1010
"net/http/httptest"
1111
"net/url"
12+
"os"
13+
"runtime"
1214
"strconv"
1315
"strings"
1416
"sync"
1517
"testing"
18+
"time"
1619

1720
"github.com/dunglas/frankenphp"
1821
"github.com/stretchr/testify/assert"
22+
"github.com/stretchr/testify/require"
1923
)
2024

2125
func TestWorker(t *testing.T) {
@@ -45,6 +49,55 @@ func TestWorker(t *testing.T) {
4549
}, &testOptions{workerScript: "worker.php", nbWorkers: 1, nbParallelRequests: 1})
4650
}
4751

52+
// TestRestartWorkersForceKillsStuckThread verifies that the drain path used
53+
// by RestartWorkers and DrainWorkers does not hang indefinitely when a
54+
// worker thread is stuck inside a blocking PHP call (sleep, synchronous I/O,
55+
// etc.). The force-kill primitive must arm PHP's max_execution_time timer
56+
// (Linux/FreeBSD ZTS) or the equivalent Windows primitive after the grace
57+
// period so the thread unblocks. macOS and other platforms without these
58+
// primitives would hang; skip there.
59+
func TestRestartWorkersForceKillsStuckThread(t *testing.T) {
60+
if runtime.GOOS != "linux" && runtime.GOOS != "freebsd" && runtime.GOOS != "windows" {
61+
t.Skipf("force-kill primitive not available on %s", runtime.GOOS)
62+
}
63+
64+
cwd, _ := os.Getwd()
65+
testDataDir := cwd + "/testdata/"
66+
67+
require.NoError(t, frankenphp.Init(
68+
frankenphp.WithWorkers("sleep-worker", testDataDir+"worker-sleep.php", 1),
69+
frankenphp.WithNumThreads(2),
70+
))
71+
t.Cleanup(frankenphp.Shutdown)
72+
73+
// Fire a request the worker will handle and then block on (sleep 60s).
74+
// When the drain runs, the worker script is inside the handler callback,
75+
// below frankenphp_handle_request, so the drain signal on drainChan
76+
// can't be observed until the blocking sleep returns.
77+
go func() {
78+
req := httptest.NewRequest("GET", "http://example.com/worker-sleep.php", nil)
79+
fr, err := frankenphp.NewRequestWithContext(req, frankenphp.WithRequestDocumentRoot(testDataDir, false))
80+
if err != nil {
81+
return
82+
}
83+
_ = frankenphp.ServeHTTP(httptest.NewRecorder(), fr)
84+
}()
85+
86+
// Give the request time to reach the handler and enter sleep().
87+
time.Sleep(500 * time.Millisecond)
88+
89+
// RestartWorkers must complete within the grace period + a bit of slack.
90+
// Without force-kill, it would wait for the 60s sleep to return.
91+
start := time.Now()
92+
frankenphp.RestartWorkers()
93+
elapsed := time.Since(start)
94+
95+
// Grace period is 5s; allow margin for SIGALRM dispatch, PHP VM tick,
96+
// and the drain's final ready.Wait() plus the restart loop.
97+
const budget = 8 * time.Second
98+
assert.Less(t, elapsed, budget, "drain must force-kill the stuck thread within the grace period")
99+
}
100+
48101
func TestWorkerDie(t *testing.T) {
49102
runTest(t, func(handler func(http.ResponseWriter, *http.Request), _ *httptest.Server, i int) {
50103
req := httptest.NewRequest("GET", "http://example.com/die.php", nil)

0 commit comments

Comments
 (0)