From 5e5251d0905bcf7a0d6ea6eb644a486055ac42f4 Mon Sep 17 00:00:00 2001 From: Wenhan Ji Date: Wed, 30 Jul 2025 13:32:53 -0400 Subject: [PATCH 1/2] =?UTF-8?q?[LibOS]=20Fix=20the=20race=20condition=20in?= =?UTF-8?q?=20syscall=20execve=20by=20waiting=20for=20other=20threads?= =?UTF-8?q?=E2=80=99=20clear=5Fchild=5Ftid=20before=20unmapping=20VMAs?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Wenhan Ji --- libos/include/libos_thread.h | 5 ++++ libos/src/bookkeep/libos_thread.c | 49 +++++++++++++++++++++++++++++++ libos/src/sys/libos_exec.c | 24 +++++++++++++++ libos/src/sys/libos_futex.c | 8 +++++ 4 files changed, 86 insertions(+) diff --git a/libos/include/libos_thread.h b/libos/include/libos_thread.h index a3e35b0259..990ca406eb 100644 --- a/libos/include/libos_thread.h +++ b/libos/include/libos_thread.h @@ -315,6 +315,10 @@ bool check_last_thread(bool mark_self_dead); int walk_thread_list(int (*callback)(struct libos_thread*, void*), void* arg, bool one_shot); +/* Dump all current libos_thread entries from g_thread_list, getting one ref on each. */ +int dump_all_threads(struct libos_thread*** out_threads, size_t* out_cnt); +void free_threads_array(struct libos_thread** threads, size_t count); + void get_handle_map(struct libos_handle_map* map); void put_handle_map(struct libos_handle_map* map); @@ -339,3 +343,4 @@ noreturn void process_exit(int error_code, int term_signal); void release_robust_list(struct robust_list_head* head); void release_clear_child_tid(int* clear_child_tid); +void wait_clear_child_tid(int* clear_child_tid, int child_tid); diff --git a/libos/src/bookkeep/libos_thread.c b/libos/src/bookkeep/libos_thread.c index f2db3831a9..78f67e44ba 100644 --- a/libos/src/bookkeep/libos_thread.c +++ b/libos/src/bookkeep/libos_thread.c @@ -519,6 +519,55 @@ int walk_thread_list(int (*callback)(struct libos_thread*, void*), void* arg, bo return ret; } +void free_threads_array(struct libos_thread** threads, size_t count) { + for (size_t i = 0; i < count; i++) { + if (threads[i]) { + put_thread(threads[i]); + } + } + + free(threads); +} + +static size_t dump_all_threads_with_buf(struct libos_thread** threads, size_t max_count) { + size_t size = 0; + + lock(&g_thread_list_lock); + + struct libos_thread* thread; + LISTP_FOR_EACH_ENTRY(thread, &g_thread_list, list) { + if (size < max_count) { + threads[size] = thread; + get_thread(thread); + } + size++; + } + + unlock(&g_thread_list_lock); + return size; +} + +int dump_all_threads(struct libos_thread*** out_threads, size_t* out_cnt) { + size_t count = 1; + + while (true) { + struct libos_thread** threads = calloc(count, sizeof(*threads)); + if (!threads) { + return -ENOMEM; + } + + size_t needed_count = dump_all_threads_with_buf(threads, count); + if (needed_count <= count) { + *out_threads = threads; + *out_cnt = needed_count; + return 0; + } + + free_threads_array(threads, count); + count = needed_count; + } +} + BEGIN_CP_FUNC(signal_dispositions) { __UNUSED(size); assert(size == sizeof(struct libos_signal_dispositions)); diff --git a/libos/src/sys/libos_exec.c b/libos/src/sys/libos_exec.c index 72f414096d..dc54c02f53 100644 --- a/libos/src/sys/libos_exec.c +++ b/libos/src/sys/libos_exec.c @@ -180,8 +180,32 @@ long libos_syscall_execve(const char* file, const char* const* argv, const char* /* Just exit current thread. */ thread_exit(/*error_code=*/0, /*term_signal=*/0); } + + size_t count; + struct libos_thread** threads; + ret = dump_all_threads(&threads, &count); + if (ret < 0) { + return ret; + } + (void)kill_other_threads(); + /* Wait until the async‑worker has cleared each remaining thread’s + * clear_child_tid before we unmap their VMAs. */ + struct libos_thread* cur_thread = get_cur_thread(); + for (size_t i = 0; i < count; i++) { + struct libos_thread* thread = threads[i]; + + /* Skip the main thread and current thread. */ + if (thread->pal_handle == g_pal_public_state->first_thread || thread == cur_thread) { + continue; + } + + wait_clear_child_tid(thread->clear_child_tid, thread->tid); + } + + /* Put down the references (possibly deallocate them) and free the array */ + free_threads_array(threads, count); /* All other threads are dead. Restoring initial value in case we stay inside same process * instance and call execve again. */ __atomic_store_n(&first, 0, __ATOMIC_RELAXED); diff --git a/libos/src/sys/libos_futex.c b/libos/src/sys/libos_futex.c index 8017924bd5..050c700d96 100644 --- a/libos/src/sys/libos_futex.c +++ b/libos/src/sys/libos_futex.c @@ -985,3 +985,11 @@ void release_clear_child_tid(int* clear_child_tid) { __atomic_store_n(clear_child_tid, 0, __ATOMIC_RELEASE); futex_wake((uint32_t*)clear_child_tid, 1, FUTEX_BITSET_MATCH_ANY); } + +void wait_clear_child_tid(int* clear_child_tid, int child_tid) { + while (__atomic_load_n(clear_child_tid, __ATOMIC_ACQUIRE) != 0) { + /* Sleep until the async‑worker (release_clear_child_tid) + * writes 0 and issues FUTEX_WAKE on the same word. */ + futex_wait((uint32_t*)clear_child_tid, child_tid, NULL, FUTEX_BITSET_MATCH_ANY); + } +} From 7be342fb4e12254967da05b81d651189811ab52b Mon Sep 17 00:00:00 2001 From: Wenhan Ji Date: Mon, 11 Aug 2025 05:53:01 -0400 Subject: [PATCH 2/2] =?UTF-8?q?fixup!=20[LibOS]=20Fix=20the=20race=20condi?= =?UTF-8?q?tion=20in=20syscall=20execve=20by=20waiting=20for=20other=20thr?= =?UTF-8?q?eads=E2=80=99=20clear=5Fchild=5Ftid=20before=20unmapping=20VMAs?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Wenhan Ji --- libos/src/bookkeep/libos_thread.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/libos/src/bookkeep/libos_thread.c b/libos/src/bookkeep/libos_thread.c index 78f67e44ba..f575569ee9 100644 --- a/libos/src/bookkeep/libos_thread.c +++ b/libos/src/bookkeep/libos_thread.c @@ -550,6 +550,12 @@ static size_t dump_all_threads_with_buf(struct libos_thread** threads, size_t ma int dump_all_threads(struct libos_thread*** out_threads, size_t* out_cnt) { size_t count = 1; + /* We take a snapshot of g_thread_list while holding g_thread_list_lock. + * If our buffer is too small, dump_all_threads_with_buf() returns the + * total number of threads ("needed_count"). We then retry with that exact + * size. Because "count" monotonically increases to "needed_count" for a + * given snapshot, the loop terminates as long as new threads aren't being + * created at this point. */ while (true) { struct libos_thread** threads = calloc(count, sizeof(*threads)); if (!threads) { @@ -563,6 +569,9 @@ int dump_all_threads(struct libos_thread*** out_threads, size_t* out_cnt) { return 0; } + /* Need more space - retry with the actual count needed */ + assert(needed_count > count); + free_threads_array(threads, count); count = needed_count; }