Skip to content

Commit 13ac4a1

Browse files
committed
x86,fs/resctrl: Remove inappropriate references to cacheinfo in the resctrl subsystem
commit 594902c986e269660302f09df9ec4bf1cf017b77 upstream. In the resctrl subsystem's Sub-NUMA Cluster (SNC) mode, the rdt_mon_domain structure representing a NUMA node relies on the cacheinfo interface (rdt_mon_domain::ci) to store L3 cache information (e.g., shared_cpu_map) for monitoring. The L3 cache information of a SNC NUMA node determines which domains are summed for the "top level" L3-scoped events. rdt_mon_domain::ci is initialized using the first online CPU of a NUMA node. When this CPU goes offline, its shared_cpu_map is cleared to contain only the offline CPU itself. Subsequently, attempting to read counters via smp_call_on_cpu(offline_cpu) fails (and error ignored), returning zero values for "top-level events" without any error indication. Replace the cacheinfo references in struct rdt_mon_domain and struct rmid_read with the cacheinfo ID (a unique identifier for the L3 cache). rdt_domain_hdr::cpu_mask contains the online CPUs associated with that domain. When reading "top-level events", select a CPU from rdt_domain_hdr::cpu_mask and utilize its L3 shared_cpu_map to determine valid CPUs for reading RMID counter via the MSR interface. Considering all CPUs associated with the L3 cache improves the chances of picking a housekeeping CPU on which the counter reading work can be queued, avoiding an unnecessary IPI. Intel-SIG: commit 594902c986e x86,fs/resctrl: Remove inappropriate references to cacheinfo in the resctrl subsystem backport to RDT driver for CWF Fixes: 328ea68 ("x86/resctrl: Prepare for new Sub-NUMA Cluster (SNC) monitor files") Signed-off-by: Qinyun Tan <qinyuntan@linux.alibaba.com> Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de> Reviewed-by: Reinette Chatre <reinette.chatre@intel.com> Tested-by: Tony Luck <tony.luck@intel.com> Link: https://lore.kernel.org/20250530182053.37502-2-qinyuntan@linux.alibaba.com Signed-off-by: Kui Wen <kui.wen@intel.com>
1 parent c73db7e commit 13ac4a1

File tree

6 files changed

+25
-16
lines changed

6 files changed

+25
-16
lines changed

arch/x86/kernel/cpu/resctrl/core.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -588,6 +588,7 @@ static void domain_add_cpu_mon(int cpu, struct rdt_resource *r)
588588
struct rdt_hw_mon_domain *hw_dom;
589589
struct rdt_domain_hdr *hdr;
590590
struct rdt_mon_domain *d;
591+
struct cacheinfo *ci;
591592
int err;
592593

593594
lockdep_assert_held(&domain_list_lock);
@@ -615,12 +616,13 @@ static void domain_add_cpu_mon(int cpu, struct rdt_resource *r)
615616
d = &hw_dom->d_resctrl;
616617
d->hdr.id = id;
617618
d->hdr.type = RESCTRL_MON_DOMAIN;
618-
d->ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE);
619-
if (!d->ci) {
619+
ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE);
620+
if (!ci) {
620621
pr_warn_once("Can't find L3 cache for CPU:%d resource %s\n", cpu, r->name);
621622
mon_domain_free(hw_dom);
622623
return;
623624
}
625+
d->ci_id = ci->id;
624626
cpumask_set_cpu(cpu, &d->hdr.cpu_mask);
625627

626628
arch_mon_domain_online(r, d);

arch/x86/kernel/cpu/resctrl/ctrlmondata.c

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -558,11 +558,12 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg)
558558
struct rdt_domain_hdr *hdr;
559559
struct rmid_read rr = {0};
560560
struct rdt_mon_domain *d;
561-
u32 resid, evtid, domid;
561+
u32 resid, evtid;
562562
struct rdtgroup *rdtgrp;
563+
int domid, cpu, ret = 0;
563564
struct rdt_resource *r;
565+
struct cacheinfo *ci;
564566
union mon_data_bits md;
565-
int ret = 0;
566567

567568
rdtgrp = rdtgroup_kn_lock_live(of->kn);
568569
if (!rdtgrp) {
@@ -584,10 +585,14 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg)
584585
* one that matches this cache id.
585586
*/
586587
list_for_each_entry(d, &r->mon_domains, hdr.list) {
587-
if (d->ci->id == domid) {
588-
rr.ci = d->ci;
588+
if (d->ci_id == domid) {
589+
rr.ci_id = d->ci_id;
590+
cpu = cpumask_any(&d->hdr.cpu_mask);
591+
ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE);
592+
if (!ci)
593+
continue;
589594
mon_event_read(&rr, r, NULL, rdtgrp,
590-
&d->ci->shared_cpu_map, evtid, false);
595+
&ci->shared_cpu_map, evtid, false);
591596
goto checkresult;
592597
}
593598
}

arch/x86/kernel/cpu/resctrl/internal.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ union mon_data_bits {
160160
* domains in @r sharing L3 @ci.id
161161
* @evtid: Which monitor event to read.
162162
* @first: Initialize MBM counter when true.
163-
* @ci: Cacheinfo for L3. Only set when @d is NULL. Used when summing domains.
163+
* @ci_id: Cacheinfo id for L3. Only set when @d is NULL. Used when summing domains.
164164
* @err: Error encountered when reading counter.
165165
* @val: Returned value of event counter. If @rgrp is a parent resource group,
166166
* @val includes the sum of event counts from its child resource groups.
@@ -174,7 +174,7 @@ struct rmid_read {
174174
struct rdt_mon_domain *d;
175175
enum resctrl_event_id evtid;
176176
bool first;
177-
struct cacheinfo *ci;
177+
unsigned int ci_id;
178178
int err;
179179
u64 val;
180180
void *arch_mon_ctx;

arch/x86/kernel/cpu/resctrl/monitor.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -594,6 +594,7 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr)
594594
{
595595
int cpu = smp_processor_id();
596596
struct rdt_mon_domain *d;
597+
struct cacheinfo *ci;
597598
struct mbm_state *m;
598599
int err, ret;
599600
u64 tval = 0;
@@ -621,7 +622,8 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr)
621622
}
622623

623624
/* Summing domains that share a cache, must be on a CPU for that cache. */
624-
if (!cpumask_test_cpu(cpu, &rr->ci->shared_cpu_map))
625+
ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE);
626+
if (!ci || ci->id != rr->ci_id)
625627
return -EINVAL;
626628

627629
/*
@@ -633,7 +635,7 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr)
633635
*/
634636
ret = -EINVAL;
635637
list_for_each_entry(d, &rr->r->mon_domains, hdr.list) {
636-
if (d->ci->id != rr->ci->id)
638+
if (d->ci_id != rr->ci_id)
637639
continue;
638640
err = resctrl_arch_rmid_read(rr->r, d, closid, rmid,
639641
rr->evtid, &tval, rr->arch_mon_ctx);

arch/x86/kernel/cpu/resctrl/rdtgroup.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3036,7 +3036,7 @@ static void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
30363036
char name[32];
30373037

30383038
snc_mode = r->mon_scope == RESCTRL_L3_NODE;
3039-
sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci->id : d->hdr.id);
3039+
sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci_id : d->hdr.id);
30403040
if (snc_mode)
30413041
sprintf(subname, "mon_sub_%s_%02d", r->name, d->hdr.id);
30423042

@@ -3061,7 +3061,7 @@ static int mon_add_all_files(struct kernfs_node *kn, struct rdt_mon_domain *d,
30613061
return -EPERM;
30623062

30633063
priv.u.rid = r->rid;
3064-
priv.u.domid = do_sum ? d->ci->id : d->hdr.id;
3064+
priv.u.domid = do_sum ? d->ci_id : d->hdr.id;
30653065
priv.u.sum = do_sum;
30663066
list_for_each_entry(mevt, &r->evt_list, list) {
30673067
priv.u.evtid = mevt->evtid;
@@ -3088,7 +3088,7 @@ static int mkdir_mondata_subdir(struct kernfs_node *parent_kn,
30883088
lockdep_assert_held(&rdtgroup_mutex);
30893089

30903090
snc_mode = r->mon_scope == RESCTRL_L3_NODE;
3091-
sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci->id : d->hdr.id);
3091+
sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci_id : d->hdr.id);
30923092
kn = kernfs_find_and_get(parent_kn, name);
30933093
if (kn) {
30943094
/*

include/linux/resctrl.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ struct rdt_ctrl_domain {
9797
/**
9898
* struct rdt_mon_domain - group of CPUs sharing a resctrl monitor resource
9999
* @hdr: common header for different domain types
100-
* @ci: cache info for this domain
100+
* @ci_id: cache info id for this domain
101101
* @rmid_busy_llc: bitmap of which limbo RMIDs are above threshold
102102
* @mbm_total: saved state for MBM total bandwidth
103103
* @mbm_local: saved state for MBM local bandwidth
@@ -108,7 +108,7 @@ struct rdt_ctrl_domain {
108108
*/
109109
struct rdt_mon_domain {
110110
struct rdt_domain_hdr hdr;
111-
struct cacheinfo *ci;
111+
unsigned long ci_id;
112112
unsigned long *rmid_busy_llc;
113113
struct mbm_state *mbm_total;
114114
struct mbm_state *mbm_local;

0 commit comments

Comments
 (0)