Skip to content

Commit 792df36

Browse files
feat(info): add thread CPU time measurement for worker thread monitoring (#3379)
## Summary This PR introduces worker thread CPU time monitoring to identify bottlenecks and fixes precision issues in process CPU time calculations. ## Problem Statement 1. **Lack of granular thread monitoring**: The current `INFO` command provides only overall process CPU time, which includes background threads. This makes it difficult to identify bottlenecks in worker threads specifically, as background thread activity can mask true worker thread utilization. 2. **Precision loss in CPU time calculation**: The existing CPU time calculation truncates microsecond values due to integer division (`tv_usec / 1000000`), causing millisecond data loss. This results in inaccurate reporting, especially for short operations where sub-second precision matters. ## Solution ### 1. Worker Thread CPU Time Monitoring - Added new `worker_cpu_time` metric to track CPU usage per worker thread - Implemented per-thread CPU time collection using platform-specific APIs - Formatted output with microsecond precision (6 decimal places) to match system precision - Sample output: `worker_cpu_time:[0.123456,0.456789,1.234567]` ### 2. CPU Time Calculation Fix - Fixed precision loss in `used_cpu_user` and `used_cpu_sys` calculations - Replaced integer division with floating point division: **Before**: `tv_usec / 1000000` → truncates to integer **After**: `static_cast<double>(tv_usec) / 1e6` → preserves microseconds ## Benefits - **Pinpoint worker thread bottlenecks**: Isolate worker thread CPU usage from background threads - **Accurate performance analysis**: Microsecond-precision timing enables precise performance profiling - **Better load diagnosis**: Distinguish between actual worker saturation and background activity --------- Co-authored-by: yxj25245 <yxj25245@ly.com>
1 parent cc2bb3c commit 792df36

5 files changed

Lines changed: 67 additions & 5 deletions

File tree

src/common/thread_util.cc

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,11 @@
2323
#include <fmt/std.h>
2424
#include <pthread.h>
2525

26+
#ifdef __APPLE__
27+
#include <mach/mach.h>
28+
#include <mach/thread_act.h>
29+
#endif
30+
2631
namespace util {
2732

2833
void ThreadSetName(const char *name) {
@@ -33,6 +38,43 @@ void ThreadSetName(const char *name) {
3338
#endif
3439
}
3540

41+
#ifdef __APPLE__
42+
double ThreadGetCPUTime(std::thread::native_handle_type thread_id) {
43+
if (!thread_id) {
44+
return 0.0;
45+
}
46+
47+
mach_port_t mach_thread = pthread_mach_thread_np(thread_id);
48+
49+
thread_basic_info_data_t info;
50+
mach_msg_type_number_t count = THREAD_BASIC_INFO_COUNT;
51+
52+
if (thread_info(mach_thread, THREAD_BASIC_INFO, (thread_info_t)&info, &count) != KERN_SUCCESS) {
53+
return 0.0;
54+
}
55+
56+
return (static_cast<double>(info.user_time.seconds) + static_cast<double>(info.user_time.microseconds) / 1e6) +
57+
(static_cast<double>(info.system_time.seconds) + static_cast<double>(info.system_time.microseconds) / 1e6);
58+
}
59+
#else
60+
double ThreadGetCPUTime(std::thread::native_handle_type thread_id) {
61+
if (!thread_id) {
62+
return 0.0;
63+
}
64+
65+
clockid_t clock_id = 0;
66+
if (pthread_getcpuclockid(thread_id, &clock_id) != 0) {
67+
return 0.0;
68+
}
69+
70+
timespec ts;
71+
if (clock_gettime(clock_id, &ts) != 0) {
72+
return 0.0;
73+
}
74+
return static_cast<double>(ts.tv_sec) + static_cast<double>(ts.tv_nsec) / 1e9;
75+
}
76+
#endif
77+
3678
template <void (std::thread::*F)(), typename... Args>
3779
Status ThreadOperationImpl(std::thread &t, const char *op, Args &&...args) {
3880
try {

src/common/thread_util.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ namespace util {
3232

3333
void ThreadSetName(const char *name);
3434

35+
double ThreadGetCPUTime(std::thread::native_handle_type thread_id);
36+
3537
template <typename F>
3638
StatusOr<std::thread> CreateThread(const char *name, F f) {
3739
try {

src/server/server.cc

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1402,10 +1402,18 @@ Server::InfoEntries Server::GetCpuInfo() { // NOLINT(readability-convert-member
14021402

14031403
rusage self_ru;
14041404
getrusage(RUSAGE_SELF, &self_ru);
1405-
entries.emplace_back("used_cpu_sys", static_cast<float>(self_ru.ru_stime.tv_sec) +
1406-
static_cast<float>(self_ru.ru_stime.tv_usec / 1000000));
1405+
entries.emplace_back(
1406+
"used_cpu_sys", static_cast<float>(self_ru.ru_stime.tv_sec) + static_cast<float>(self_ru.ru_stime.tv_usec) / 1e6);
14071407
entries.emplace_back("used_cpu_user", static_cast<float>(self_ru.ru_utime.tv_sec) +
1408-
static_cast<float>(self_ru.ru_utime.tv_usec / 1000000));
1408+
static_cast<float>(self_ru.ru_utime.tv_usec) / 1e6);
1409+
1410+
std::vector<double> thread_cpu_times(worker_threads_.size());
1411+
for (std::size_t i{0}; i < worker_threads_.size(); ++i) {
1412+
thread_cpu_times[i] = util::ThreadGetCPUTime(worker_threads_[i]->GetNativeHandle());
1413+
}
1414+
entries.emplace_back(
1415+
"worker_cpu_time",
1416+
fmt::format("[{}]", util::StringJoin(thread_cpu_times, [](auto v) { return std::to_string(v); }, ",")));
14091417

14101418
return entries;
14111419
}

src/server/worker.cc

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include <event2/util.h>
2424
#include <unistd.h>
2525

26+
#include <atomic>
2627
#include <cstdint>
2728
#include <stdexcept>
2829
#include <string>
@@ -589,6 +590,7 @@ void WorkerThread::Start() {
589590

590591
if (s) {
591592
t_ = std::move(*s);
593+
native_thread_handle_.store(t_.native_handle(), std::memory_order_relaxed);
592594
} else {
593595
ERROR("[worker] Failed to start worker thread, err: {}", s.Msg());
594596
return;
@@ -597,7 +599,10 @@ void WorkerThread::Start() {
597599
INFO("[worker] Thread #{} started", fmt::streamed(t_.get_id()));
598600
}
599601

600-
void WorkerThread::Stop(uint32_t wait_seconds) { worker_->Stop(wait_seconds); }
602+
void WorkerThread::Stop(uint32_t wait_seconds) {
603+
native_thread_handle_.store(std::thread::native_handle_type{}, std::memory_order_relaxed);
604+
worker_->Stop(wait_seconds);
605+
}
601606

602607
void WorkerThread::Join() {
603608
if (auto s = util::ThreadJoin(t_); !s) {

src/server/worker.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include <event2/listener.h>
2626
#include <event2/util.h>
2727

28+
#include <atomic>
2829
#include <cstdint>
2930
#include <cstring>
3031
#include <lua.hpp>
@@ -104,7 +105,8 @@ class Worker : EventCallbackBase<Worker>, EvconnlistenerBase<Worker> {
104105

105106
class WorkerThread {
106107
public:
107-
explicit WorkerThread(std::unique_ptr<Worker> worker) : worker_(std::move(worker)) {}
108+
explicit WorkerThread(std::unique_ptr<Worker> worker)
109+
: native_thread_handle_{std::thread::native_handle_type{}}, worker_(std::move(worker)) {}
108110
~WorkerThread() = default;
109111
WorkerThread(const WorkerThread &) = delete;
110112
WorkerThread(WorkerThread &&) = delete;
@@ -116,7 +118,10 @@ class WorkerThread {
116118
void Join();
117119
bool IsTerminated() const { return worker_->IsTerminated(); }
118120

121+
std::thread::native_handle_type GetNativeHandle() { return native_thread_handle_.load(std::memory_order_relaxed); }
122+
119123
private:
120124
std::thread t_;
125+
std::atomic<std::thread::native_handle_type> native_thread_handle_;
121126
std::unique_ptr<Worker> worker_;
122127
};

0 commit comments

Comments
 (0)