@@ -3834,6 +3834,30 @@ static bool kernel_supports_aio_fsync() {
3834
3834
return internal::kernel_uname ().whitelisted ({" 4.18" });
3835
3835
}
3836
3836
3837
+ static std::tuple<std::filesystem::path, uint64_t > wakeup_granularity () {
3838
+ auto try_read = [] (auto path) -> uint64_t {
3839
+ try {
3840
+ return read_first_line_as<uint64_t >(path);
3841
+ } catch (...) {
3842
+ return 0 ;
3843
+ }
3844
+ };
3845
+
3846
+ auto legacy_path = " /proc/sys/kernel/sched_wakeup_granularity_ns" ;
3847
+ if (auto val = try_read (legacy_path); val) {
3848
+ return {legacy_path, val};
3849
+ }
3850
+
3851
+ // This will in practice almost always fail because debug fs requires root
3852
+ // perms to read so we are out of luck
3853
+ auto debug_fs_path = " /sys/kernel/debug/sched/wakeup_granularity_ns" ;
3854
+ if (auto val = try_read (legacy_path); val) {
3855
+ return {debug_fs_path, val};
3856
+ }
3857
+
3858
+ return {" " , 0 };
3859
+ }
3860
+
3837
3861
static program_options::selection_value<network_stack_factory> create_network_stacks_option (reactor_options& zis) {
3838
3862
using value_type = program_options::selection_value<network_stack_factory>;
3839
3863
value_type::candidates candidates;
@@ -4535,6 +4559,22 @@ void smp::configure(const smp_options& smp_opts, const reactor_options& reactor_
4535
4559
.no_poll_aio = !reactor_opts.poll_aio .get_value () || (reactor_opts.poll_aio .defaulted () && reactor_opts.overprovisioned ),
4536
4560
};
4537
4561
4562
+ // Disable hot polling if sched wakeup granularity is too high
4563
+ // dio thread will be starved otherwise
4564
+ // see https://github.com/scylladb/seastar/issues/2696
4565
+ if (!reactor_cfg.no_poll_aio || reactor_cfg.max_poll_time != 0us) {
4566
+ auto [wakeup_file, granularity] = wakeup_granularity ();
4567
+ // 15M is chosen as it's what tuned sets. Though you probably already
4568
+ // see an adverse effect earlier.
4569
+ if (granularity >= 15000000 ) {
4570
+ reactor_cfg.no_poll_aio = true ;
4571
+ reactor_cfg.max_poll_time = 0us;
4572
+ seastar_logger.warn (
4573
+ " Setting --poll-aio=0 and --idle-poll-time-us=0 due to too high sched_wakeup_granularity of {} in {}" ,
4574
+ granularity, wakeup_file.string ());
4575
+ }
4576
+ }
4577
+
4538
4578
aio_nowait_supported = reactor_opts.linux_aio_nowait .get_value ();
4539
4579
std::mutex mtx;
4540
4580
0 commit comments