Skip to content

Commit 52ca3bf

Browse files
committed
cpu_profiler: deflake cpu_profiler_test
This test fails often in GHA. Identical failures are difficult to reproduce locally, but other local failures are easy to repro by using something like `taskset -c 0,1 stress -c 2` while running the tests. Introduce "loose mode" in the test, enabled by default, which looses some of the checks by a large amount. Test this by running the test 100 times in CI in another PR.
1 parent 19ecc66 commit 52ca3bf

File tree

1 file changed

+36
-5
lines changed

1 file changed

+36
-5
lines changed

tests/unit/cpu_profiler_test.cc

+36-5
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,15 @@
4343

4444
namespace {
4545

46+
// If true, the acceptable thresholds are greatly increased "close enough"
47+
// checks, which can reduce the flakiness on heavily loaded or otherwise
48+
// unpredictable systems. If false, the thresholds are much stricter and
49+
// should be used for more deterministic systems.
50+
// We default to true to avoid flakes in CI, but when running locally you
51+
// can consider setting this to false to get a more accurate picture of
52+
// the profiler's behavior.
53+
constexpr bool use_loose_thresholds = true;
54+
4655
struct temporary_profiler_settings {
4756
std::chrono::nanoseconds prev_ns;
4857
bool prev_enabled;
@@ -68,15 +77,37 @@ struct temporary_profiler_settings {
6877
//
6978
// The function below takes this error into account and allows the actual samples taken
7079
// to be slightly less than the expect number of samples if there was no error.
71-
bool close_to_expected(size_t actual_size, size_t expected_size, double allowed_dev = 0.15) {
72-
auto lower_bound = (1 - allowed_dev) * expected_size;
73-
auto upper_bound = (1 + allowed_dev) * expected_size;
80+
bool close_to_expected(size_t actual_size, size_t expected_size) {
81+
82+
constexpr double allowed_dev = 0.15;
83+
84+
size_t lower_bound, upper_bound;
85+
86+
if (use_loose_thresholds) {
87+
// widen the thresholds a lot
88+
lower_bound = round(pow(1 - allowed_dev, 4)) * expected_size;
89+
upper_bound = round(pow(1 + allowed_dev, 4)) * expected_size;
90+
} else {
91+
lower_bound = round((1 - allowed_dev) * expected_size);
92+
upper_bound = round((1 + allowed_dev) * expected_size);
93+
}
7494

7595
BOOST_TEST_INFO("actual_size: " << actual_size << ", lower_bound " << lower_bound << ", upper_bound " << upper_bound);
7696

7797
return actual_size <= upper_bound && actual_size >= lower_bound;
7898
}
7999

100+
// If loose thresholds are enabled, this call maps to close_to_exepected, otherwise
101+
// it does an exact equality check.
102+
void maybe_exact(size_t actual, size_t expected, auto message) {
103+
BOOST_TEST_INFO(message);
104+
if (use_loose_thresholds) {
105+
close_to_expected(actual, expected);
106+
} else {
107+
BOOST_REQUIRE_EQUAL(actual, expected);
108+
}
109+
}
110+
80111
/*
81112
* Get the current profile results and dropped count. If sg_in_main is true, also validates that
82113
* the sg associated with the profile is always main, as we expect unless some SG have been
@@ -161,7 +192,7 @@ SEASTAR_THREAD_TEST_CASE(mixed_case) {
161192
spin(20ms);
162193
}
163194

164-
BOOST_REQUIRE_EQUAL(reports, 5);
195+
maybe_exact(reports, 5, "reports");
165196
auto results = get_profile();
166197
BOOST_REQUIRE(close_to_expected(results.size(), 12));
167198
}
@@ -343,6 +374,6 @@ SEASTAR_THREAD_TEST_CASE(scheduling_group_test) {
343374
BOOST_CHECK_GT(count_a + count_b, 10);
344375
BOOST_CHECK_GT(count_a, 0);
345376
BOOST_CHECK_GT(count_b, 0);
346-
BOOST_CHECK_LT(count_main, 3);
377+
BOOST_CHECK_LT(count_main, 10);
347378
BOOST_CHECK_LT(dropped_samples, 5);
348379
}

0 commit comments

Comments
 (0)