Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 58 additions & 5 deletions scheds/include/bpf_experimental.h
Original file line number Diff line number Diff line change
Expand Up @@ -580,11 +580,6 @@ extern void bpf_iter_css_destroy(struct bpf_iter_css *it) __weak __ksym;

extern int bpf_wq_init(struct bpf_wq *wq, void *p__map, unsigned int flags) __weak __ksym;
extern int bpf_wq_start(struct bpf_wq *wq, unsigned int flags) __weak __ksym;
extern int bpf_wq_set_callback_impl(struct bpf_wq *wq,
int (callback_fn)(void *map, int *key, void *value),
unsigned int flags__k, void *aux__ign) __ksym;
#define bpf_wq_set_callback(timer, cb, flags) \
bpf_wq_set_callback_impl(timer, cb, flags, NULL)

struct bpf_iter_kmem_cache;
extern int bpf_iter_kmem_cache_new(struct bpf_iter_kmem_cache *it) __weak __ksym;
Expand Down Expand Up @@ -615,6 +610,8 @@ extern int bpf_cgroup_read_xattr(struct cgroup *cgroup, const char *name__str,
#define HARDIRQ_MASK (__IRQ_MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT)
#define NMI_MASK (__IRQ_MASK(NMI_BITS) << NMI_SHIFT)

#define SOFTIRQ_OFFSET (1UL << SOFTIRQ_SHIFT)

extern bool CONFIG_PREEMPT_RT __kconfig __weak;
#ifdef bpf_target_x86
extern const int __preempt_count __ksym;
Expand Down Expand Up @@ -653,4 +650,60 @@ static inline int bpf_in_interrupt(void)
(tsk->softirq_disable_cnt & SOFTIRQ_MASK);
}

/* Description
* Report whether it is in NMI context. Only works on the following archs:
* * x86
* * arm64
*/
static inline int bpf_in_nmi(void)
{
return get_preempt_count() & NMI_MASK;
}

/* Description
* Report whether it is in hard IRQ context. Only works on the following archs:
* * x86
* * arm64
*/
static inline int bpf_in_hardirq(void)
{
return get_preempt_count() & HARDIRQ_MASK;
}

/* Description
* Report whether it is in softirq context. Only works on the following archs:
* * x86
* * arm64
*/
static inline int bpf_in_serving_softirq(void)
{
struct task_struct___preempt_rt *tsk;
int pcnt;

pcnt = get_preempt_count();
if (!CONFIG_PREEMPT_RT)
return (pcnt & SOFTIRQ_MASK) & SOFTIRQ_OFFSET;

tsk = (void *) bpf_get_current_task_btf();
return (tsk->softirq_disable_cnt & SOFTIRQ_MASK) & SOFTIRQ_OFFSET;
}

/* Description
* Report whether it is in task context. Only works on the following archs:
* * x86
* * arm64
*/
static inline int bpf_in_task(void)
{
struct task_struct___preempt_rt *tsk;
int pcnt;

pcnt = get_preempt_count();
if (!CONFIG_PREEMPT_RT)
return !(pcnt & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET));

tsk = (void *) bpf_get_current_task_btf();
return !((pcnt & (NMI_MASK | HARDIRQ_MASK)) |
((tsk->softirq_disable_cnt & SOFTIRQ_MASK) & SOFTIRQ_OFFSET));
}
#endif
41 changes: 33 additions & 8 deletions scheds/rust/scx_lavd/src/bpf/lat_cri.bpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,39 @@ static u64 calc_weight_factor(struct task_struct *p, task_ctx *taskc)
if (test_task_flag(taskc, LAVD_FLAG_IS_SYNC_WAKEUP))
weight_boost += LAVD_LC_WEIGHT_BOOST_REGULAR;

/*
* Prioritize a task woken by a hardirq or softirq.
* - hardirq: The top half of an interrupt processing (e.g., mouse
* move, keypress, disk I/O completion, or GPU V-Sync) has just
* been completed, and it hands off further processing to a fair
* task. The task that was waiting for this specific hardware
* signal gets the "Express Lane."
*
* - softirq: The kernel just finished the bottom half of an
* interrupt processing, like network packets and timers. If a
* packet arrives for your Browser, or a timer expires for a
* frame refresh, the task gets a "High" boost. This keeps the
* data pipeline flowing smoothly.
*
* Note that the irq-boosted criticality will flow through the forward
* & backward propagation mechanism, which will be described below.
*/
if (test_task_flag(taskc, LAVD_FLAG_WOKEN_BY_HARDIRQ)) {
reset_task_flag(taskc, LAVD_FLAG_WOKEN_BY_HARDIRQ);
weight_boost += LAVD_LC_WEIGHT_BOOST_HIGHEST;
} else if (test_task_flag(taskc, LAVD_FLAG_WOKEN_BY_SOFTIRQ)) {
reset_task_flag(taskc, LAVD_FLAG_WOKEN_BY_SOFTIRQ);
weight_boost += LAVD_LC_WEIGHT_BOOST_HIGH;
}

/*
* Prioritize a task woken by an RT/DL task.
*/
if (test_task_flag(taskc, LAVD_FLAG_WOKEN_BY_RT_DL)) {
reset_task_flag(taskc, LAVD_FLAG_WOKEN_BY_RT_DL);
weight_boost += LAVD_LC_WEIGHT_BOOST_HIGH;
}

/*
* Prioritize a kernel task since many kernel tasks serve
* latency-critical jobs.
Expand All @@ -51,14 +84,6 @@ static u64 calc_weight_factor(struct task_struct *p, task_ctx *taskc)
if (is_kernel_worker(p))
weight_boost += LAVD_LC_WEIGHT_BOOST_REGULAR;

/*
* Prioritize a task woken by an RT/DL task.
*/
if (test_task_flag(taskc, LAVD_FLAG_WOKEN_BY_RT_DL)) {
reset_task_flag(taskc, LAVD_FLAG_WOKEN_BY_RT_DL);
weight_boost += LAVD_LC_WEIGHT_BOOST_HIGH;
}

/*
* Prioritize an affinitized task since it has restrictions
* in placement so it tends to be delayed.
Expand Down
3 changes: 3 additions & 0 deletions scheds/rust/scx_lavd/src/bpf/lavd.bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ enum consts_internal {
LAVD_LC_WEIGHT_BOOST_REGULAR = 128, /* 2^7 */
LAVD_LC_WEIGHT_BOOST_MEDIUM = (2 * LAVD_LC_WEIGHT_BOOST_REGULAR),
LAVD_LC_WEIGHT_BOOST_HIGH = (2 * LAVD_LC_WEIGHT_BOOST_MEDIUM),
LAVD_LC_WEIGHT_BOOST_HIGHEST = (2 * LAVD_LC_WEIGHT_BOOST_HIGH),
LAVD_LC_GREEDY_SHIFT = 3, /* 12.5% */
LAVD_LC_WAKE_INTERVAL_MIN = LAVD_SLICE_MIN_NS_DFL,
LAVD_LC_INH_RECEIVER_SHIFT = 2, /* 25.0% of receiver's latency criticality */
Expand Down Expand Up @@ -125,6 +126,8 @@ enum consts_flags {
LAVD_FLAG_IDLE_CPU_PICKED = (0x1 << 9), /* an idle CPU is picked at ops.select_cpu() */
LAVD_FLAG_KSOFTIRQD = (0x1 << 10), /* ksoftirqd/%u thread */
LAVD_FLAG_WOKEN_BY_RT_DL = (0x1 << 11), /* woken by a RT/DL task */
LAVD_FLAG_WOKEN_BY_HARDIRQ = (0x1 << 12), /* woken by a hardware interrupt */
LAVD_FLAG_WOKEN_BY_SOFTIRQ = (0x1 << 13), /* woken by a softirq */
};

/*
Expand Down
39 changes: 39 additions & 0 deletions scheds/rust/scx_lavd/src/bpf/main.bpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,7 @@
*/
#include <scx/common.bpf.h>
#include <bpf_arena_common.bpf.h>
#include <bpf_experimental.h>
#include "intf.h"
#include "lavd.bpf.h"
#include "util.bpf.h"
Expand Down Expand Up @@ -560,17 +561,43 @@ s32 BPF_STRUCT_OPS(lavd_select_cpu, struct task_struct *p, s32 prev_cpu,
.cpuc_cur = get_cpu_ctx(),
.wake_flags = wake_flags,
};
struct task_struct *waker;
bool found_idle = false;
s32 cpu_id;

if (!ictx.taskc || !ictx.cpuc_cur)
return prev_cpu;

/*
* Check whether it is a synchronous wake-up to boost
* latency-criticality later.
*/
if (wake_flags & SCX_WAKE_SYNC)
set_task_flag(ictx.taskc, LAVD_FLAG_IS_SYNC_WAKEUP);
else
reset_task_flag(ictx.taskc, LAVD_FLAG_IS_SYNC_WAKEUP);

/*
* Check whether the task is woken by an interrupt handler (either the
* top or bottom half) to boost its latency-criticality later.
*
* WARNING: bpf_in_nmi/task/hardirq/serving_softirq() is supported only
* in x86 and arm64. On the unsupported architectures (e.g., s390x),
* it will always return 0. So, never use !bpf_in_xxx() and keep the
* logic below optional. See more details in below link:
* - https://lore.kernel.org/bpf/20260124132706.183681-2-changwoo@igalia.com/T/#u
*/
if (unlikely((bpf_in_hardirq() || bpf_in_nmi()) &&
!bpf_in_serving_softirq())) {
set_task_flag(ictx.taskc, LAVD_FLAG_WOKEN_BY_HARDIRQ);
reset_task_flag(ictx.taskc, LAVD_FLAG_WOKEN_BY_SOFTIRQ);
} else if (unlikely(bpf_in_serving_softirq() ||
((waker = bpf_get_current_task_btf()) &&
is_ksoftirqd(waker)))) {
set_task_flag(ictx.taskc, LAVD_FLAG_WOKEN_BY_SOFTIRQ);
reset_task_flag(ictx.taskc, LAVD_FLAG_WOKEN_BY_HARDIRQ);
}

/*
* Find an idle cpu and reserve it since the task @p will run
* on the idle cpu. Even if there is no idle cpu, still respect
Expand Down Expand Up @@ -1145,11 +1172,23 @@ void BPF_STRUCT_OPS(lavd_runnable, struct task_struct *p, u64 enq_flags)
* Filter out unrelated tasks. We keep track of tasks under the same
* parent process to confine the waker-wakee relationship within
* closely related tasks.
*
* WARNING: bpf_in_nmi/task/hardirq/serving_softirq() is supported only
* in x86 and arm64. On the unsupported architectures (e.g., s390x),
* it will always return 0. So, never use !bpf_in_xxx() and keep the
* logic below optional. See more details in below link:
* - https://lore.kernel.org/bpf/20260124132706.183681-2-changwoo@igalia.com/T/#u
*/
if (enq_flags & (SCX_ENQ_PREEMPT | SCX_ENQ_REENQ | SCX_ENQ_LAST))
return;

if (bpf_in_hardirq() || bpf_in_serving_softirq() || bpf_in_nmi())
return;

waker = bpf_get_current_task_btf();
if (is_ksoftirqd(waker))
return;

if ((p->real_parent != waker->real_parent))
return;

Expand Down