Skip to content

Commit 3f0f87a

Browse files
authored
Fix incorrect number of CPUs being detected on Linux with offline CPUs (#2555)
* Only consider online CPUs for building CPU Topology in Linux * Correctly set thread affinity when there are offline CPUs Previous code was using CPU_COUNT and assuming cpu_set_t is always contigious. This is not the case when CPUs are offlined. Now we are always just checking the first 32 CPUs (engine's current MAX_CPU limit).
1 parent a2de4de commit 3f0f87a

File tree

2 files changed

+64
-31
lines changed

2 files changed

+64
-31
lines changed

rts/System/Platform/Linux/CpuTopology.cpp

Lines changed: 61 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,61 @@ Vendor detect_cpu_vendor() {
3131
return VENDOR_UNKNOWN;
3232
}
3333

34-
// Get number of logical CPUs
35-
int get_cpu_count() {
36-
return sysconf(_SC_NPROCESSORS_CONF);
34+
std::vector<int> get_online_cpus() {
35+
std::vector<int> cpus;
36+
std::ifstream file("/sys/devices/system/cpu/online");
37+
if (file) {
38+
// This is a comma-seperated list of ranges
39+
// or single values.
40+
// Ex: 0,2,4,6 or 0-7 or 0-3,8-15
41+
std::string line;
42+
std::getline(file, line);
43+
std::istringstream ss(line);
44+
int min_cpu;
45+
int max_cpu;
46+
char sep;
47+
while (ss >> min_cpu) {
48+
if ((ss >> sep) && sep == '-') {
49+
// Range of CPUs separted by '-'
50+
if (!(ss >> max_cpu)) {
51+
// Should not ever happen (would need to be a malformed online file)
52+
if (min_cpu >= MAX_CPUS) {
53+
LOG_L(L_WARNING, "CPU index %d exceeds bitset limit.", min_cpu);
54+
} else {
55+
cpus.push_back(min_cpu);
56+
}
57+
break;
58+
}
59+
for (int cpu = min_cpu; cpu <= max_cpu; ++cpu) {
60+
if (cpu >= MAX_CPUS) {
61+
LOG_L(L_WARNING, "CPU index %d exceeds bitset limit.", cpu);
62+
continue;
63+
}
64+
cpus.push_back(cpu);
65+
}
66+
// Consume the trailing comma
67+
ss >> sep;
68+
} else {
69+
// Single CPU
70+
if (min_cpu >= MAX_CPUS) {
71+
LOG_L(L_WARNING, "CPU index %d exceeds bitset limit.", min_cpu);
72+
continue;
73+
}
74+
cpus.push_back(min_cpu);
75+
}
76+
}
77+
} else {
78+
// Fallback in case of permission issues reading from sysfs
79+
int num_cpus = sysconf(_SC_NPROCESSORS_CONF);
80+
for (int cpu = 0; cpu < num_cpus; ++cpu) {
81+
if (cpu >= MAX_CPUS) {
82+
LOG_L(L_WARNING, "CPU index %d exceeds bitset limit.", cpu);
83+
continue;
84+
}
85+
cpus.push_back(cpu);
86+
}
87+
}
88+
return cpus;
3789
}
3890

3991
// Set CPU affinity to a specific core
@@ -98,14 +150,9 @@ void collect_intel_affinity_masks(std::bitset<MAX_CPUS> &eff_mask,
98150
std::bitset<MAX_CPUS> &perf_mask,
99151
std::bitset<MAX_CPUS> &low_ht_mask,
100152
std::bitset<MAX_CPUS> &high_ht_mask) {
101-
int num_cpus = get_cpu_count();
102-
103-
for (int cpu = 0; cpu < num_cpus; ++cpu) {
104-
if (cpu >= MAX_CPUS) {
105-
LOG_L(L_WARNING, "CPU index %d exceeds bitset limit.", cpu);
106-
continue;
107-
}
153+
const auto cpus = get_online_cpus();
108154

155+
for (const auto cpu : cpus) {
109156
CoreType core_type = get_intel_core_type(cpu);
110157
// default to performance core.
111158
if (core_type == CORE_UNKNOWN) core_type = CORE_PERFORMANCE;
@@ -122,14 +169,9 @@ void collect_amd_affinity_masks(std::bitset<MAX_CPUS> &eff_mask,
122169
std::bitset<MAX_CPUS> &perf_mask,
123170
std::bitset<MAX_CPUS> &low_smt_mask,
124171
std::bitset<MAX_CPUS> &high_smt_mask) {
125-
int num_cpus = get_cpu_count();
126-
127-
for (int cpu = 0; cpu < num_cpus; ++cpu) {
128-
if (cpu >= MAX_CPUS) {
129-
LOG_L(L_WARNING, "CPU index %d exceeds bitset limit.", cpu);
130-
continue;
131-
}
172+
const auto cpus = get_online_cpus();
132173

174+
for (const auto cpu : cpus) {
133175
perf_mask.set(cpu);
134176

135177
collect_smt_affinity_masks(cpu, low_smt_mask, high_smt_mask);
@@ -195,13 +237,9 @@ ProcessorGroupCaches& get_group_cache(ProcessorCaches& processorCaches, uint32_t
195237
// We are also only looking at L3 caches at the moment.
196238
ProcessorCaches GetProcessorCache() {
197239
ProcessorCaches processorCaches;
198-
int num_cpus = get_cpu_count();
240+
const auto cpus = get_online_cpus();
199241

200-
for (int cpu = 0; cpu < num_cpus; ++cpu) {
201-
if (cpu >= MAX_CPUS) {
202-
LOG_L(L_WARNING, "CPU index %d exceeds bitset limit.", cpu);
203-
continue;
204-
}
242+
for (const auto cpu : cpus) {
205243
uint32_t cacheSize = get_thread_cache(cpu);
206244
ProcessorGroupCaches& groupCache = get_group_cache(processorCaches, cacheSize);
207245

rts/System/Platform/Threading.cpp

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -118,10 +118,7 @@ namespace Threading {
118118
static std::uint32_t CalcCoreAffinityMask(const cpu_set_t* cpuSet) {
119119
std::uint32_t coreMask = 0;
120120

121-
// without the min(..., 32), `(1 << n)` could overflow
122-
const int numCPUs = std::min(CPU_COUNT(&cpusSystem), 32);
123-
124-
for (int n = numCPUs - 1; n >= 0; --n) {
121+
for (int n = 31; n >= 0; --n) {
125122
if (CPU_ISSET(n, cpuSet))
126123
coreMask |= (1 << n);
127124
}
@@ -132,10 +129,8 @@ namespace Threading {
132129
static void SetWantedCoreAffinityMask(cpu_set_t* cpuDstSet, std::uint32_t coreMask) {
133130
CPU_ZERO(cpuDstSet);
134131

135-
const int numCPUs = std::min(CPU_COUNT(&cpusSystem), 32);
136-
137-
for (int n = numCPUs - 1; n >= 0; --n) {
138-
if ((coreMask & (1 << n)) != 0)
132+
for (int n = 31; n >= 0; --n) {
133+
if (((coreMask & (1 << n)) != 0) && CPU_ISSET(n, &cpusSystem))
139134
CPU_SET(n, cpuDstSet);
140135
}
141136

0 commit comments

Comments
 (0)