Skip to content

Commit 08068a7

Browse files
authored
Merge pull request #3123 from hodgesds/p2dq-lat-prio
scx_p2dq: Add conservative wakeup preemption for latency-critical tasks
2 parents ee09f0e + 7d43887 commit 08068a7

File tree

3 files changed

+58
-2
lines changed

3 files changed

+58
-2
lines changed

scheds/rust/scx_p2dq/src/bpf/main.bpf.c

Lines changed: 45 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,15 @@ const volatile struct {
151151
.pelt_enabled = true,
152152
};
153153

154+
/* Latency priority and preemption configuration */
155+
const volatile struct {
156+
bool latency_priority_enabled;
157+
bool wakeup_preemption_enabled;
158+
} latency_config = {
159+
.latency_priority_enabled = false,
160+
.wakeup_preemption_enabled = false,
161+
};
162+
154163
const volatile u32 debug = 2;
155164
const u32 zero_u32 = 0;
156165
extern const volatile u32 nr_cpu_ids;
@@ -1177,6 +1186,39 @@ static s32 p2dq_select_cpu_impl(struct task_struct *p, s32 prev_cpu, u64 wake_fl
11771186
else
11781187
cpu = pick_idle_cpu(p, taskc, prev_cpu, wake_flags, &is_idle);
11791188

1189+
// Wakeup preemption for extremely latency-critical tasks
1190+
// Only attempt if: no idle CPU found AND task has very high priority
1191+
if (!is_idle && latency_config.wakeup_preemption_enabled) {
1192+
struct cpu_ctx *prev_cpuc;
1193+
1194+
// Only preempt for truly latency-critical tasks (scx.weight >= 2847, equivalent to nice <= -15)
1195+
// and only if we can check the prev_cpu state
1196+
if (p->scx.weight >= 2847 &&
1197+
bpf_cpumask_test_cpu(prev_cpu, p->cpus_ptr) &&
1198+
(prev_cpuc = lookup_cpu_ctx(prev_cpu))) {
1199+
1200+
// Don't preempt interactive tasks - they need low latency too
1201+
if (cpu_ctx_test_flag(prev_cpuc, CPU_CTX_F_INTERACTIVE)) {
1202+
goto skip_preempt;
1203+
}
1204+
1205+
// Only preempt if incoming task has higher priority than running task
1206+
// This ensures we only preempt lower priority work
1207+
if (p->scx.weight <= prev_cpuc->running_weight) {
1208+
goto skip_preempt;
1209+
}
1210+
1211+
// Queue to prev_cpu's LLC DSQ with high priority
1212+
// Don't bypass normal queueing - let vtime ordering work
1213+
// Just ensure we target prev_cpu for better cache affinity
1214+
cpu = prev_cpu;
1215+
trace("PREEMPT_TARGET [%d][%s] weight=%u > running_weight=%u on cpu=%d",
1216+
p->pid, p->comm, p->scx.weight, prev_cpuc->running_weight, prev_cpu);
1217+
}
1218+
}
1219+
1220+
skip_preempt:
1221+
11801222
if (likely(is_idle)) {
11811223
stat_inc(P2DQ_STAT_IDLE);
11821224
// Only direct dispatch non-affinitized tasks
@@ -1665,6 +1707,7 @@ static int p2dq_running_impl(struct task_struct *p)
16651707
cpu_ctx_clear_flag(cpuc, CPU_CTX_F_INTERACTIVE);
16661708

16671709
cpuc->dsq_index = taskc->dsq_index;
1710+
cpuc->running_weight = p->scx.weight;
16681711

16691712
if (p->scx.weight < 100)
16701713
cpu_ctx_set_flag(cpuc, CPU_CTX_F_NICE_TASK);
@@ -2466,6 +2509,7 @@ static s32 p2dq_init_task_impl(struct task_struct *p, struct scx_init_task_args
24662509

24672510
taskc->llc_id = cpuc->llc_id;
24682511
taskc->node_id = cpuc->node_id;
2512+
taskc->pid = p->pid;
24692513

24702514
// Adjust starting index based on niceness
24712515
if (p->scx.weight == 100) {
@@ -2500,8 +2544,6 @@ static s32 p2dq_init_task_impl(struct task_struct *p, struct scx_init_task_args
25002544
else
25012545
taskc->dsq_id = cpuc->llc_dsq;
25022546

2503-
taskc->pid = p->pid;
2504-
25052547
return 0;
25062548
}
25072549

@@ -3025,6 +3067,7 @@ void BPF_STRUCT_OPS(p2dq_running, struct task_struct *p)
30253067
void BPF_STRUCT_OPS(p2dq_enqueue, struct task_struct *p __arg_trusted, u64 enq_flags)
30263068
{
30273069
struct enqueue_promise pro;
3070+
30283071
async_p2dq_enqueue(&pro, p, enq_flags);
30293072
complete_p2dq_enqueue(&pro, p);
30303073
}

scheds/rust/scx_p2dq/src/bpf/types.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ struct cpu_ctx {
5353
u64 mig_dsq;
5454
u64 llc_dsq;
5555
u64 max_load_dsq;
56+
u32 running_weight; /* Weight of currently running task */
5657

5758
scx_atq_t *mig_atq;
5859
scx_dhq_t *mig_dhq;

scheds/rust/scx_p2dq/src/lib.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -321,6 +321,14 @@ pub struct SchedulerOpts {
321321
#[clap(long, default_value_t = true, action = clap::ArgAction::Set)]
322322
pub enable_pelt: bool,
323323

324+
/// Enable latency priority system (uses task nice value)
325+
#[clap(long, action = clap::ArgAction::SetTrue)]
326+
pub latency_priority: bool,
327+
328+
/// Enable wakeup preemption for latency-critical tasks
329+
#[clap(long, action = clap::ArgAction::SetTrue)]
330+
pub wakeup_preemption: bool,
331+
324332
#[clap(flatten, next_help_heading = "Topology Options")]
325333
pub topo: TopologyArgs,
326334
}
@@ -449,6 +457,10 @@ macro_rules! init_open_skel {
449457
rodata.p2dq_config.keep_running_enabled = MaybeUninit::new(opts.keep_running);
450458
rodata.p2dq_config.pelt_enabled = MaybeUninit::new(opts.enable_pelt);
451459

460+
// Latency priority config
461+
rodata.latency_config.latency_priority_enabled = MaybeUninit::new(opts.latency_priority);
462+
rodata.latency_config.wakeup_preemption_enabled = MaybeUninit::new(opts.wakeup_preemption);
463+
452464
rodata.debug = verbose as u32;
453465
rodata.nr_cpu_ids = *NR_CPU_IDS as u32;
454466

0 commit comments

Comments
 (0)