cpu_profiler: add scheduler group

travisdowns · travisdowns · commit d28b526c1072 · 2025-03-15T01:00:38.000-03:00
Add scheduler group to each sample. This is recorded on the seastar at
the moment of the sample side and we now include it in the output.

This is useful for understanding what is running in what scheduler
group.

Add a new unit test case for this functionality.
diff --git a/src/v/redpanda/admin/api-doc/debug.json b/src/v/redpanda/admin/api-doc/debug.json
@@ -782,6 +782,10 @@
                     "type": "string",
                     "description": "user backtrace"
                 },
+                "scheduling_group": {
+                    "type": "string",
+                    "description": "The scheduling group that was active when the sample was taken."
+                },
                 "occurrences": {
                     "type": "long",
                     "description": "number of times this backtrace has occurred"
@@ -1510,4 +1514,4 @@
             }
         }
     }
-}
+}
diff --git a/src/v/redpanda/admin/debug.cc b/src/v/redpanda/admin/debug.cc
@@ -604,8 +604,9 @@ admin_server::cpu_profile_handler(std::unique_ptr<ss::http::request> req) {
         samples.reserve(shard_profile.samples.size());
         for (auto& sample : shard_profile.samples) {
             ss::httpd::debug_json::cpu_profile_sample json_sample;
-            json_sample.occurrences = sample.occurrences;
             json_sample.user_backtrace = sample.user_backtrace;
+            json_sample.scheduling_group = sample.sg;
+            json_sample.occurrences = sample.occurrences;
             samples.emplace_back(std::move(json_sample));
         }
 
diff --git a/src/v/resource_mgmt/cpu_profiler.cc b/src/v/resource_mgmt/cpu_profiler.cc
@@ -95,19 +95,33 @@ ss::future<std::vector<cpu_profiler::shard_samples>> cpu_profiler::results(
     co_return results;
 }
 
+// hashable struct holding a single sample
+struct single_sample {
+    ss::simple_backtrace backtrace;
+    ss::sstring sg;
+
+    bool operator==(const single_sample&) const = default;
+
+    template<typename H>
+    friend H AbslHashValue(H h, const single_sample& s) {
+        return H::combine(std::move(h), s.backtrace, s.sg);
+    }
+};
+
 cpu_profiler::shard_samples cpu_profiler::shard_results(
   std::optional<ss::lowres_clock::time_point> filter_before) const {
     size_t dropped_samples = 0, total_samples = 0;
-    chunked_hash_map<ss::simple_backtrace, size_t> backtraces;
+    chunked_hash_map<single_sample, size_t> backtraces;
     for (auto& results_buffer : _results_buffers) {
         if (filter_before && results_buffer.polled_time < *filter_before) {
             continue;
         }
 
         dropped_samples += results_buffer.dropped_samples;
         total_samples += results_buffer.samples.size();
+
         for (auto& result : results_buffer.samples) {
-            backtraces[result.user_backtrace]++;
+            ++backtraces[{result.user_backtrace, result.sg.name()}];
         }
     }
 
@@ -116,15 +130,17 @@ cpu_profiler::shard_samples cpu_profiler::shard_results(
       total_samples,
       backtraces.size());
 
-    std::vector<sample> results{};
+    std::vector<sample> results;
     results.reserve(backtraces.size());
 
     for (auto& backtrace : backtraces) {
         results.emplace_back(
-          ssx::sformat("{}", backtrace.first), backtrace.second);
+          ssx::sformat("{}", backtrace.first.backtrace),
+          backtrace.first.sg,
+          backtrace.second);
     }
 
-    return {ss::this_shard_id(), dropped_samples, results};
+    return {ss::this_shard_id(), dropped_samples, std::move(results)};
 }
 
 void cpu_profiler::poll_samples() {
diff --git a/src/v/resource_mgmt/cpu_profiler.h b/src/v/resource_mgmt/cpu_profiler.h
@@ -50,11 +50,8 @@ class cpu_profiler : public ss::peering_sharded_service<cpu_profiler> {
 public:
     struct sample {
         ss::sstring user_backtrace;
-        size_t occurrences;
-
-        sample(ss::sstring ub, size_t o)
-          : user_backtrace(std::move(ub))
-          , occurrences(o) {}
+        ss::sstring sg;
+        size_t occurrences = 0;
     };
 
     struct shard_samples {
diff --git a/src/v/resource_mgmt/tests/BUILD b/src/v/resource_mgmt/tests/BUILD
@@ -9,6 +9,7 @@ redpanda_cc_btest(
     deps = [
         "//src/v/config",
         "//src/v/resource_mgmt:cpu_profiler",
+        "//src/v/resource_mgmt:cpu_scheduling",
         "//src/v/test_utils:seastar_boost",
         "@boost//:test",
         "@seastar",
diff --git a/src/v/resource_mgmt/tests/cpu_profiler_test.cc b/src/v/resource_mgmt/tests/cpu_profiler_test.cc
@@ -11,6 +11,7 @@
 
 #include "config/property.h"
 #include "resource_mgmt/cpu_profiler.h"
+#include "resource_mgmt/cpu_scheduling.h"
 
 #include <seastar/core/future.hh>
 #include <seastar/core/internal/cpu_profiler.hh>
@@ -19,7 +20,9 @@
 #include <seastar/core/smp.hh>
 #include <seastar/core/timer.hh>
 #include <seastar/coroutine/maybe_yield.hh>
+#include <seastar/coroutine/switch_to.hh>
 #include <seastar/testing/thread_test_case.hh>
+#include <seastar/util/defer.hh>
 
 #include <boost/test/unit_test.hpp>
 
@@ -32,7 +35,13 @@ using shard_samples = resources::cpu_profiler::shard_samples;
 using sharded_profiler = ss::sharded<resources::cpu_profiler>;
 
 namespace {
-ss::future<> busy_loop(std::chrono::milliseconds duration) {
+ss::future<> busy_loop(
+  std::chrono::milliseconds duration,
+  std::optional<ss::scheduling_group> sg = {}) {
+    if (sg) {
+        co_await ss::coroutine::switch_to(*sg);
+    }
+
     auto end_time = ss::lowres_clock::now() + duration;
     while (ss::lowres_clock::now() < end_time) {
         // yield to allow timer to trigger and lowres_clock to update
@@ -69,6 +78,7 @@ SEASTAR_THREAD_TEST_CASE(test_cpu_profiler) {
     resources::cpu_profiler cp(
       config::mock_binding(true), config::mock_binding(2ms));
     cp.start().get();
+    auto stop = ss::defer([&] { cp.stop().get(); });
 
     // The profiler service will request samples from seastar every
     // 256ms since the sample rate is 2ms. So we need to be running
@@ -79,6 +89,51 @@ SEASTAR_THREAD_TEST_CASE(test_cpu_profiler) {
     BOOST_TEST(results.samples.size() >= 1);
 }
 
+// We should create the sgs only once and not destroy them because sgs may be
+// captured by the profiler in one test and leak into the next (e.g., because
+// they are still in the seastar-side sample buffer), where accessing them
+// would be UB as they are destroyed.
+static scheduling_groups get_sgs() {
+    static scheduling_groups sg = [] {
+        scheduling_groups sg;
+        sg.create_groups().get();
+        return sg;
+    }();
+    return sg;
+}
+
+SEASTAR_THREAD_TEST_CASE(test_cpu_scheduler_groups) {
+    scheduling_groups sg = get_sgs();
+
+    resources::cpu_profiler cp(
+      config::mock_binding(true), config::mock_binding(2ms));
+    cp.start().get();
+    auto stop = ss::defer([&] { cp.stop().get(); });
+
+    busy_loop(256ms + 10ms).get();
+
+    auto results = cp.shard_results();
+    BOOST_TEST(results.samples.size() >= 1);
+    for (auto& r : results.samples) {
+        BOOST_REQUIRE_EQUAL(r.sg, "main");
+    }
+
+    busy_loop(256ms + 10ms, sg.kafka_sg()).get();
+
+    results = cp.shard_results();
+    BOOST_TEST(results.samples.size() >= 1);
+    int found_kafka = 0;
+    for (auto& r : results.samples) {
+        // we accept both main and kafka as some internal reactor work
+        // will be recorded as main group
+        BOOST_REQUIRE_MESSAGE(
+          r.sg == "main" || r.sg == "kafka", "unexpected group: " << r.sg);
+        found_kafka += r.sg == "kafka";
+    }
+    // should get at least some kafka!
+    BOOST_REQUIRE(found_kafka > 0);
+}
+
 SEASTAR_THREAD_TEST_CASE(test_cpu_profiler_enable_override) {
     // Ensure that overrides to the profiler will enable it and collect samples
     // for the specified period of time.

Original file line number	Diff line number	Diff line change
`@@ -782,6 +782,10 @@`
`782`	`782`	`"type": "string",`
`783`	`783`	`"description": "user backtrace"`
`784`	`784`	`},`
	`785`	`+ "scheduling_group": {`
	`786`	`+ "type": "string",`
	`787`	`+ "description": "The scheduling group that was active when the sample was taken."`
	`788`	`+ },`
`785`	`789`	`"occurrences": {`
`786`	`790`	`"type": "long",`
`787`	`791`	`"description": "number of times this backtrace has occurred"`
`@@ -1510,4 +1514,4 @@`
`1510`	`1514`	`}`
`1511`	`1515`	`}`
`1512`	`1516`	`}`
`1513`		`-}`
	`1517`	`+}`