Skip to content

Commit f6a1222

Browse files
heartwilltellclaude
authored andcommitted
Add growable runtime stacks
1 parent 98cc3fa commit f6a1222

4 files changed

Lines changed: 209 additions & 26 deletions

File tree

src/codegen_c.zig

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,7 @@ pub const CCodegen = struct {
133133

134134
// Declare all temporaries used in the function
135135
try self.emitTempDeclarations(func);
136+
try self.emitStackCheck();
136137

137138
// Emit basic blocks
138139
for (func.blocks.items, 0..) |*block, i| {
@@ -196,6 +197,11 @@ pub const CCodegen = struct {
196197
}
197198
}
198199

200+
fn emitStackCheck(self: *CCodegen) !void {
201+
try self.emitIndent();
202+
try self.writer().print("{{ char _run_stack_probe; run_stack_check(&_run_stack_probe); }}\n", .{});
203+
}
204+
199205
fn inferCType(self: *const CCodegen, inst: ir.Inst) []const u8 {
200206
if (inst.op == .local_get) {
201207
if (inst.arg1 < self.module.local_infos.items.len) {
@@ -820,6 +826,8 @@ test "CCodegen: hello world" {
820826
try std.testing.expect(std.mem.indexOf(u8, result, "void run_main__main(void)") != null);
821827
// Should contain the string constant creation
822828
try std.testing.expect(std.mem.indexOf(u8, result, "run_string_from_cstr(\"Hello, World!\")") != null);
829+
// Should contain the stack growth probe
830+
try std.testing.expect(std.mem.indexOf(u8, result, "run_stack_check(&_run_stack_probe)") != null);
823831
// Should contain return
824832
try std.testing.expect(std.mem.indexOf(u8, result, "return;") != null);
825833
}

src/runtime/run_scheduler.c

Lines changed: 146 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@
3434
/* Default max for growable stacks */
3535
#define RUN_DEFAULT_STACK_MAX ((size_t)1024 * 1024) /* 1 MB */
3636
#define RUN_GROWABLE_INITIAL ((size_t)8 * 1024) /* 8 KB initial commit */
37+
#define RUN_STACK_SHRINK_THRESHOLD 4 /* shrink below 25% usage */
38+
#define RUN_STACK_SHRINK_HYSTERESIS 2 /* keep 2x live usage */
3739

3840
/* ========================================================================
3941
* Thread-Local Storage
@@ -81,7 +83,11 @@ static _Atomic uint64_t next_g_id = 1;
8183
static _Atomic int64_t live_g_count = 0;
8284

8385
/* Growable stacks enabled flag */
86+
#if defined(_WIN32)
8487
static bool growable_stacks_enabled = false;
88+
#else
89+
static bool growable_stacks_enabled = true;
90+
#endif
8591
static size_t stack_max_size_cached = 0;
8692

8793
/* Multi-P scheduling enabled — lock-free local queues and atomic state. */
@@ -315,6 +321,119 @@ static void run_stack_free(void *base, size_t size) {
315321
run_vmem_free(base, size);
316322
}
317323

324+
static char *run_stack_top(run_g_t *g) {
325+
return (char *)g->stack_base + g->stack_size;
326+
}
327+
328+
static size_t run_align_up_size(size_t value, size_t alignment) {
329+
return (value + alignment - 1) & ~(alignment - 1);
330+
}
331+
332+
static void *run_context_sp(run_context_t *ctx) {
333+
#if defined(__aarch64__) || defined(__arm64__)
334+
return ctx->sp;
335+
#else
336+
return ctx->rsp;
337+
#endif
338+
}
339+
340+
static void run_stack_record_sp(run_g_t *g, void *sp) {
341+
if (!g || !g->stack_base || !sp)
342+
return;
343+
char *top = run_stack_top(g);
344+
char *cur = (char *)sp;
345+
if (cur < (char *)g->stack_base || cur > top)
346+
return;
347+
size_t used = (size_t)(top - cur);
348+
if (used > g->stack_watermark) {
349+
g->stack_watermark = used;
350+
}
351+
}
352+
353+
static void run_stack_grow_to_sp(run_g_t *g, void *sp) {
354+
if (!growable_stacks_enabled || !g || !g->stack_base || !sp)
355+
return;
356+
357+
char *top = run_stack_top(g);
358+
char *cur = (char *)sp;
359+
if (cur < (char *)g->stack_base || cur > top) {
360+
fprintf(stderr, "run: stack overflow in green thread %llu (sp outside stack)\n",
361+
(unsigned long long)g->id);
362+
abort();
363+
}
364+
365+
size_t page_size = run_vmem_page_size();
366+
size_t used = (size_t)(top - cur);
367+
size_t needed = run_align_up_size(used + page_size, page_size);
368+
if (needed <= g->stack_committed)
369+
return;
370+
371+
size_t new_committed = g->stack_committed;
372+
while (new_committed < needed && new_committed < g->stack_size) {
373+
new_committed *= 2;
374+
}
375+
if (new_committed < needed || new_committed > g->stack_size) {
376+
fprintf(stderr, "run: stack overflow in green thread %llu (max %zu bytes)\n",
377+
(unsigned long long)g->id, g->stack_size);
378+
abort();
379+
}
380+
381+
char *old_lo = top - g->stack_committed;
382+
char *new_lo = top - new_committed;
383+
run_vmem_protect(new_lo, (size_t)(old_lo - new_lo), RUN_VMEM_READWRITE);
384+
g->stack_committed = new_committed;
385+
g->stack_lo = new_lo;
386+
}
387+
388+
static size_t run_stack_used_by_sp(run_g_t *g, void *sp) {
389+
if (!g || !g->stack_base || !sp)
390+
return 0;
391+
char *top = run_stack_top(g);
392+
char *cur = (char *)sp;
393+
if (cur < (char *)g->stack_base || cur > top)
394+
return 0;
395+
return (size_t)(top - cur);
396+
}
397+
398+
static void run_stack_maybe_shrink(run_g_t *g, void *sp) {
399+
if (!growable_stacks_enabled || !g || !g->stack_base)
400+
return;
401+
if (g->stack_committed <= RUN_GROWABLE_INITIAL)
402+
return;
403+
404+
size_t current_used = run_stack_used_by_sp(g, sp);
405+
size_t page_size = run_vmem_page_size();
406+
size_t watermark = run_align_up_size(g->stack_watermark, page_size);
407+
if (watermark < RUN_GROWABLE_INITIAL) {
408+
watermark = RUN_GROWABLE_INITIAL;
409+
}
410+
if (watermark * RUN_STACK_SHRINK_THRESHOLD > g->stack_committed) {
411+
g->stack_watermark = current_used;
412+
return;
413+
}
414+
415+
size_t target = g->stack_committed / 2;
416+
size_t minimum = watermark * RUN_STACK_SHRINK_HYSTERESIS;
417+
if (target < minimum) {
418+
target = run_align_up_size(minimum, page_size);
419+
}
420+
if (target < RUN_GROWABLE_INITIAL) {
421+
target = RUN_GROWABLE_INITIAL;
422+
}
423+
if (target >= g->stack_committed)
424+
return;
425+
426+
char *top = run_stack_top(g);
427+
char *old_lo = top - g->stack_committed;
428+
char *new_lo = top - target;
429+
size_t release_size = (size_t)(new_lo - old_lo);
430+
run_vmem_release(old_lo, release_size);
431+
run_vmem_protect(old_lo, release_size, RUN_VMEM_NONE);
432+
g->stack_committed = target;
433+
g->stack_lo = new_lo;
434+
g->stack_watermark = current_used;
435+
}
436+
318437
/* Push to a P's local queue with overflow to the global queue. */
319438
static void run_local_push_or_global(run_local_queue_t *lq, run_g_t *g) {
320439
if (!run_local_queue_push(lq, g)) {
@@ -348,6 +467,12 @@ static run_g_t *run_g_alloc(void (*fn)(void *), void *arg) {
348467

349468
/* Stack top = base + size (stack grows downward) */
350469
void *stack_top = (char *)g->stack_base + g->stack_size;
470+
if (growable_stacks_enabled) {
471+
g->stack_lo = (char *)stack_top - g->stack_committed;
472+
} else {
473+
g->stack_lo = (char *)g->stack_base + RUN_GUARD_PAGE_SIZE;
474+
}
475+
g->stack_watermark = 0;
351476

352477
/* Initialize context to start at entry function */
353478
run_context_init(&g->context, stack_top, fn, arg);
@@ -735,6 +860,8 @@ static void run_schedule_loop(run_m_t *m) {
735860

736861
/* Returned here: g yielded or completed */
737862
m->current_g = NULL;
863+
void *saved_sp = run_context_sp(&g->context);
864+
run_stack_record_sp(g, saved_sp);
738865

739866
if (g->status == G_DEAD) {
740867
atomic_fetch_add_explicit(&scheduler_metrics.complete_count, 1, memory_order_relaxed);
@@ -749,6 +876,8 @@ static void run_schedule_loop(run_m_t *m) {
749876
run_unpark_all_idle_ms();
750877
}
751878
run_g_free(g);
879+
} else {
880+
run_stack_maybe_shrink(g, saved_sp);
752881
}
753882
/* If g->status is G_RUNNABLE (yield), it's already re-enqueued.
754883
* If g->status is G_WAITING (channel), the channel code handles it. */
@@ -827,12 +956,9 @@ void run_scheduler_init(void) {
827956
/* Initialize the network poller (io_uring on Linux, kqueue on macOS) */
828957
run_poller_init();
829958

830-
/* Check for growable stacks */
831-
const char *stack_env = getenv("RUN_STACK_MAX");
832-
if (stack_env) {
833-
growable_stacks_enabled = true;
834-
run_stack_growth_init();
835-
}
959+
/* Install growable stack fault handling. RUN_STACK_MAX can override the
960+
* default reservation size, but stacks are growable by default. */
961+
run_stack_growth_init();
836962

837963
/* Check for trace output (#410) */
838964
const char *trace_env = getenv("RUN_TRACE");
@@ -1362,27 +1488,13 @@ static run_g_t *run_find_g_by_fault_addr(void *addr) {
13621488
}
13631489

13641490
static void run_stack_growth_handler(int sig, siginfo_t *info, void *uctx) {
1365-
(void)sig;
13661491
(void)uctx;
13671492

13681493
void *fault_addr = info->si_addr;
13691494
run_g_t *g = run_find_g_by_fault_addr(fault_addr);
13701495

13711496
if (g && growable_stacks_enabled) {
1372-
size_t page_size = run_vmem_page_size();
1373-
/* Check if we can grow */
1374-
size_t max_size = run_stack_max_size();
1375-
if (g->stack_committed >= max_size) {
1376-
fprintf(stderr, "run: stack overflow in green thread %llu (max %zu bytes)\n",
1377-
(unsigned long long)g->id, max_size);
1378-
abort();
1379-
}
1380-
1381-
/* Commit the page containing the fault address */
1382-
// NOLINTNEXTLINE(performance-no-int-to-ptr): page alignment requires uintptr_t masking
1383-
void *page = (void *)((uintptr_t)fault_addr & ~(page_size - 1));
1384-
run_vmem_protect(page, page_size, RUN_VMEM_READWRITE);
1385-
g->stack_committed += page_size;
1497+
run_stack_grow_to_sp(g, fault_addr);
13861498
return; /* Resume execution */
13871499
}
13881500

@@ -1391,8 +1503,8 @@ static void run_stack_growth_handler(int sig, siginfo_t *info, void *uctx) {
13911503
sa.sa_handler = SIG_DFL;
13921504
sigemptyset(&sa.sa_mask);
13931505
sa.sa_flags = 0;
1394-
sigaction(SIGSEGV, &sa, NULL);
1395-
raise(SIGSEGV);
1506+
sigaction(sig, &sa, NULL);
1507+
raise(sig);
13961508
}
13971509

13981510
void run_stack_growth_init(void) {
@@ -1413,6 +1525,7 @@ void run_stack_growth_init(void) {
14131525
sigemptyset(&sa.sa_mask);
14141526
sa.sa_flags = SA_SIGINFO | SA_ONSTACK;
14151527
sigaction(SIGSEGV, &sa, NULL);
1528+
sigaction(SIGBUS, &sa, NULL);
14161529
}
14171530

14181531
#else /* Windows stub */
@@ -1502,9 +1615,16 @@ void run_debug_run_dump_goroutines(char *buf, size_t buf_size) {
15021615
* ======================================================================== */
15031616

15041617
void run_morestack(void) {
1505-
/* Stub: real implementation requires stack copying */
1506-
fputs("run: stack overflow (morestack not yet implemented)\n", stderr);
1507-
abort();
1618+
char marker;
1619+
run_stack_check(&marker);
1620+
}
1621+
1622+
void run_stack_check(void *sp) {
1623+
run_g_t *g = run_current_g();
1624+
if (g == NULL || g->id == 0)
1625+
return;
1626+
run_stack_record_sp(g, sp);
1627+
run_stack_grow_to_sp(g, sp);
15081628
}
15091629

15101630
/* ========================================================================

src/runtime/run_scheduler.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,7 @@ void run_signal_preemption_stop(void);
207207
/* ---------- Growable stacks ---------- */
208208
size_t run_stack_max_size(void);
209209
void run_stack_growth_init(void);
210+
void run_stack_check(void *sp);
210211
void run_morestack(void);
211212

212213
/* ---------- Debug helpers ---------- */

src/runtime/tests/test_scheduler.c

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,10 @@ static _Atomic int g_counter = 0;
1010
static _Atomic int multi_p_counter = 0;
1111
static _Atomic int tight_loop_entered = 0;
1212
static _Atomic int tight_loop_stop = 0;
13+
static volatile size_t stack_growth_committed = 0;
14+
static volatile size_t stack_growth_watermark = 0;
15+
static volatile size_t stack_shrink_before = 0;
16+
static volatile size_t stack_shrink_after = 0;
1317

1418
static void increment_fn(void *arg) {
1519
(void)arg;
@@ -51,6 +55,36 @@ static void stop_tight_loop_fn(void *arg) {
5155
atomic_store_explicit(&tight_loop_stop, 1, memory_order_release);
5256
}
5357

58+
static void consume_stack(int depth) {
59+
char probe;
60+
volatile char buf[2048];
61+
for (size_t i = 0; i < sizeof(buf); i++) {
62+
buf[i] = (char)(depth + (int)i);
63+
}
64+
run_stack_check(&probe);
65+
if (depth > 0) {
66+
consume_stack(depth - 1);
67+
}
68+
}
69+
70+
static void stack_growth_fn(void *arg) {
71+
(void)arg;
72+
consume_stack(32);
73+
run_g_t *g = run_current_g();
74+
stack_growth_committed = g->stack_committed;
75+
stack_growth_watermark = g->stack_watermark;
76+
}
77+
78+
static void stack_shrink_fn(void *arg) {
79+
(void)arg;
80+
consume_stack(32);
81+
run_yield();
82+
run_g_t *g = run_current_g();
83+
stack_shrink_before = g->stack_committed;
84+
run_yield();
85+
stack_shrink_after = g->stack_committed;
86+
}
87+
5488
/* --- Tests --- */
5589

5690
static void test_scheduler_init(void) {
@@ -122,6 +156,24 @@ static void test_multi_p_progress(void) {
122156
RUN_ASSERT_EQ(atomic_load_explicit(&multi_p_counter, memory_order_relaxed), 64 * 4);
123157
}
124158

159+
static void test_stack_growth(void) {
160+
stack_growth_committed = 0;
161+
stack_growth_watermark = 0;
162+
run_spawn(stack_growth_fn, NULL);
163+
run_scheduler_run();
164+
RUN_ASSERT(stack_growth_committed > 8 * 1024);
165+
RUN_ASSERT(stack_growth_watermark > 8 * 1024);
166+
}
167+
168+
static void test_stack_shrink(void) {
169+
stack_shrink_before = 0;
170+
stack_shrink_after = 0;
171+
run_spawn(stack_shrink_fn, NULL);
172+
run_scheduler_run();
173+
RUN_ASSERT(stack_shrink_before > 8 * 1024);
174+
RUN_ASSERT(stack_shrink_after < stack_shrink_before);
175+
}
176+
125177
/* --- G Queue Tests --- */
126178

127179
static void test_g_queue_basic(void) {
@@ -244,6 +296,8 @@ void run_test_scheduler(void) {
244296
RUN_TEST(test_yield);
245297
RUN_TEST(test_spawn_many);
246298
RUN_TEST(test_multi_p_progress);
299+
RUN_TEST(test_stack_growth);
300+
RUN_TEST(test_stack_shrink);
247301
RUN_TEST(test_runtime_metrics);
248302
RUN_TEST(test_signal_preemption_tight_loop);
249303
}

0 commit comments

Comments
 (0)