Skip to content

Commit 5af0842

Browse files
committed
smp: prefaulter: don't leave zombie worker threads
As explained in scylladb#2623 in detail, the prefaulter worker threads that have completed but not joined are left in a zombie state, which confuses gdb thread_local processing. As seastar relies on thread locals heavily, it becomes impossible to debug core dumps. Fix this by joining the threads after they complete. Use seastar::alien to ask the main reactor threads to join the completed threads when they are done, so it won't stall. Fixes scylladb#2623.
1 parent 9cb6a25 commit 5af0842

File tree

2 files changed

+24
-5
lines changed

2 files changed

+24
-5
lines changed

src/core/prefault.hh

+9-1
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,12 @@
3030
#include <seastar/core/task.hh>
3131
#include <seastar/core/memory.hh>
3232

33+
namespace seastar::alien {
34+
35+
class instance;
36+
37+
};
38+
3339
namespace seastar::internal {
3440

3541
// Responsible for pre-faulting in memory so soft page fault latency doesn't impact applications
@@ -38,11 +44,13 @@ class memory_prefaulter {
3844
std::vector<posix_thread> _worker_threads;
3945
// Keep this in object scope to avoid allocating in worker thread
4046
std::unordered_map<unsigned, std::vector<memory::internal::memory_range>> _layout_by_node_id;
47+
std::atomic<unsigned> _active_threads = 0;
4148
public:
42-
explicit memory_prefaulter(const resource::resources& res, memory::internal::numa_layout layout);
49+
explicit memory_prefaulter(alien::instance& alien, const resource::resources& res, memory::internal::numa_layout layout);
4350
~memory_prefaulter();
4451
private:
4552
void work(std::vector<memory::internal::memory_range>& ranges, size_t page_size, std::optional<size_t> huge_page_size_opt);
53+
void join_threads() noexcept;
4654
};
4755

4856

src/core/smp.cc

+15-4
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ module;
3434
module seastar;
3535
#else
3636
#include <seastar/core/smp.hh>
37+
#include <seastar/core/alien.hh>
3738
#include <seastar/core/resource.hh>
3839
#include <seastar/core/loop.hh>
3940
#include <seastar/core/semaphore.hh>
@@ -175,7 +176,7 @@ void
175176
smp::setup_prefaulter(const seastar::resource::resources& res, seastar::memory::internal::numa_layout layout) {
176177
// Stack guards mprotect() random pages, so the prefaulter will hard-fault.
177178
#ifndef SEASTAR_THREAD_STACK_GUARDS
178-
_prefaulter = std::make_unique<internal::memory_prefaulter>(res, std::move(layout));
179+
_prefaulter = std::make_unique<internal::memory_prefaulter>(_alien, res, std::move(layout));
179180
#endif
180181
}
181182

@@ -199,7 +200,7 @@ get_huge_page_size() {
199200
return std::nullopt;
200201
}
201202

202-
internal::memory_prefaulter::memory_prefaulter(const resource::resources& res, memory::internal::numa_layout layout) {
203+
internal::memory_prefaulter::memory_prefaulter(alien::instance& alien, const resource::resources& res, memory::internal::numa_layout layout) {
203204
for (auto& range : layout.ranges) {
204205
_layout_by_node_id[range.numa_node_id].push_back(std::move(range));
205206
}
@@ -218,17 +219,27 @@ internal::memory_prefaulter::memory_prefaulter(const resource::resources& res, m
218219
}
219220
a.set(cpuset);
220221
}
221-
_worker_threads.emplace_back(a, [this, &ranges, page_size, huge_page_size_opt] {
222+
_worker_threads.emplace_back(a, [this, &alien, &ranges, page_size, huge_page_size_opt] {
223+
++_active_threads;
222224
work(ranges, page_size, huge_page_size_opt);
225+
if (!--_active_threads) {
226+
run_on(alien, 0, [this] () noexcept { join_threads(); });
227+
}
223228
});
224229
}
225230
}
226231

227-
internal::memory_prefaulter::~memory_prefaulter() {
232+
void
233+
internal::memory_prefaulter::join_threads() noexcept {
228234
_stop_request.store(true, std::memory_order_relaxed);
229235
for (auto& t : _worker_threads) {
230236
t.join();
231237
}
238+
_worker_threads.clear();
239+
}
240+
241+
internal::memory_prefaulter::~memory_prefaulter() {
242+
join_threads();
232243
}
233244

234245
void

0 commit comments

Comments
 (0)