Skip to content

Commit 4ba36cf

Browse files
committed
extmod/zephyr_kernel: Remove gc_collect() from mp_thread_create_ex() to fix deadlock.
The gc_collect() call in mp_thread_create_ex() was executed before locking thread_mutex, causing deadlock when spawned threads called gc.collect() and tried to acquire thread_mutex. Scenario: Main thread calls gc.collect() (unprotected), then tries to lock thread_mutex. Spawned thread calls gc.collect() -> mp_thread_gc_others() which locks thread_mutex. Main thread blocks waiting for thread_mutex while spawned thread holds it. Remove the gc_collect() call since normal GC cycles will clean up finished threads anyway, and this avoids the lock ordering issue. Fixes thread_gc1_simple hanging indefinitely. Signed-off-by: Andrew Leech <[email protected]>
1 parent 6c81a76 commit 4ba36cf

File tree

1 file changed

+17
-7
lines changed

1 file changed

+17
-7
lines changed

extmod/zephyr_kernel/kernel/mpthread_zephyr.c

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
*/
1313

1414
#include <stdio.h>
15+
#include <string.h>
1516

1617
#include "py/runtime.h"
1718
#include "py/gc.h"
@@ -171,6 +172,7 @@ void mp_thread_gc_others(void) {
171172
}
172173
mp_thread_counter--;
173174
DEBUG_printf("GC: Collected thread %s\n", k_thread_name_get(th->id));
175+
// The "th" memory will eventually be reclaimed by the GC
174176
// Don't update prev - we removed this node
175177
} else {
176178
th->alive = 0; // Reset for next GC cycle
@@ -260,10 +262,11 @@ mp_uint_t mp_thread_create_ex(void *(*entry)(void *), void *arg, size_t *stack_s
260262
// *stack_size = MP_THREAD_MIN_STACK_SIZE;
261263
// }
262264

263-
// in case some threads have finished but their stack has not been collected yet
264-
gc_collect();
265+
// NOTE: gc_collect() removed from here - it was causing deadlock with spawned threads
266+
// trying to lock thread_mutex while main thread was in gc_collect(). The normal GC
267+
// cycles will clean up finished threads anyway.
265268

266-
// Allocate thread node (must be outside mutex lock for GC)
269+
// Allocate linked-list node (must be outside thread_mutex lock)
267270
mp_thread_t *th = m_new_obj(mp_thread_t);
268271

269272
mp_thread_mutex_lock(&thread_mutex, 1);
@@ -272,7 +275,6 @@ mp_uint_t mp_thread_create_ex(void *(*entry)(void *), void *arg, size_t *stack_s
272275
int32_t _slot = mp_thread_find_stack_slot();
273276
if (_slot < 0) {
274277
mp_thread_mutex_unlock(&thread_mutex);
275-
m_del_obj(mp_thread_t, th);
276278
mp_raise_msg(&mp_type_OSError, MP_ERROR_TEXT("maximum number of threads reached"));
277279
}
278280

@@ -289,7 +291,6 @@ mp_uint_t mp_thread_create_ex(void *(*entry)(void *), void *arg, size_t *stack_s
289291

290292
if (th->id == NULL) {
291293
mp_thread_mutex_unlock(&thread_mutex);
292-
m_del_obj(mp_thread_t, th);
293294
mp_raise_msg(&mp_type_OSError, MP_ERROR_TEXT("can't create thread"));
294295
}
295296

@@ -323,7 +324,7 @@ mp_uint_t mp_thread_create_ex(void *(*entry)(void *), void *arg, size_t *stack_s
323324

324325
// Create thread (standard API)
325326
mp_uint_t mp_thread_create(void *(*entry)(void *), void *arg, size_t *stack_size) {
326-
char name[16];
327+
static char name[16]; // Static to avoid dangling pointer
327328
snprintf(name, sizeof(name), "mp_thread_%d", mp_thread_counter);
328329
return mp_thread_create_ex(entry, arg, stack_size, MP_THREAD_PRIORITY, name);
329330
}
@@ -346,19 +347,26 @@ void mp_thread_finish(void) {
346347
// Initialize mutex - use Zephyr's k_sem (binary semaphore, non-recursive)
347348
// Non-recursive behavior is required for Python threading semantics
348349
void mp_thread_mutex_init(mp_thread_mutex_t *mutex) {
350+
// Zero the k_sem structure before init to ensure clean state
351+
// This prevents corruption from stale state in global memory across soft resets
352+
memset(&mutex->handle, 0, sizeof(struct k_sem));
349353
k_sem_init(&mutex->handle, 0, 1);
350354
k_sem_give(&mutex->handle);
351355
}
352356

353357
// Lock mutex
354358
int mp_thread_mutex_lock(mp_thread_mutex_t *mutex, int wait) {
359+
// DEBUG logging disabled - was causing reentrancy issues
355360
return k_sem_take(&mutex->handle, wait ? K_FOREVER : K_NO_WAIT) == 0;
356361
}
357362

358363
// Unlock mutex
359364
void mp_thread_mutex_unlock(mp_thread_mutex_t *mutex) {
360365
k_sem_give(&mutex->handle);
361-
k_yield(); // Yield CPU to allow waiting threads to run
366+
// NOTE: k_yield() removed - was causing deadlock during thread creation
367+
// When main thread creates new thread and releases thread_mutex, yielding
368+
// immediately causes new thread to run and block on GIL (held by main),
369+
// creating deadlock. Rely on timeslicing for context switches instead.
362370
}
363371

364372
// Recursive mutex functions (only compiled when GIL is disabled)
@@ -368,6 +376,8 @@ void mp_thread_mutex_unlock(mp_thread_mutex_t *mutex) {
368376
#if MICROPY_PY_THREAD_RECURSIVE_MUTEX
369377

370378
void mp_thread_recursive_mutex_init(mp_thread_recursive_mutex_t *mutex) {
379+
// Zero the k_mutex structure before init to ensure clean state
380+
memset(&mutex->handle, 0, sizeof(struct k_mutex));
371381
k_mutex_init(&mutex->handle);
372382
}
373383

0 commit comments

Comments
 (0)