Skip to content

Commit cc2af96

Browse files
committed
feat(xe): Use fdinfo for accurate GPU utilization
Implement fdinfo-based GPU utilization measurement for Intel Xe GPUs: - Add FdinfoCycles struct and collect_fdinfo_cycles function - Parse /proc/*/fdinfo/* for drm-cycles-rcs/vcs data - Use client-id deduplication to prevent double-counting - Apply EMA smoothing for stable readings - Fall back to gtidle when fdinfo unavailable This provides more accurate utilization data compared to residency-based gtidle measurements.
1 parent c4341c0 commit cc2af96

1 file changed

Lines changed: 185 additions & 9 deletions

File tree

src/linux/btop_collect.cpp

Lines changed: 185 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -57,11 +57,23 @@ tab-size = 4
5757
#include "../btop_tools.hpp"
5858

5959
#if defined(GPU_SUPPORT)
60+
// Redefining C++ keywords fortunately has a warning in clang, however it's unavoidable here
61+
// since the C library uses "class" as a struct member and keywords are not allowed to be used
62+
// as identifiers in C++.
63+
#if defined(__clang__)
64+
#pragma clang diagnostic push
65+
#pragma clang diagnostic ignored "-Wkeyword-macro"
66+
#endif // __clang__
67+
6068
#define class class_
6169
extern "C" {
62-
#include "./intel_gpu_top/intel_gpu_top.h"
70+
#include "intel_gpu_top/intel_gpu_top.h"
6371
}
6472
#undef class
73+
74+
#if defined(__clang__)
75+
#pragma clang diagnostic pop
76+
#endif // __clang__
6577
#include <linux/perf_event.h>
6678
#include <sys/syscall.h>
6779
#include <sys/ioctl.h>
@@ -290,6 +302,7 @@ namespace Gpu {
290302

291303
struct XeState {
292304
string pmu_device;
305+
string pci_slot; // e.g., "0000:00:02.0"
293306
string freq_sysfs_path;
294307
int pmu_type = -1;
295308
int drm_fd = -1;
@@ -300,6 +313,13 @@ namespace Gpu {
300313
uint64_t mem_total = 0;
301314
uint64_t prev_time_ns = 0;
302315
bool first_sample = true;
316+
// fdinfo cycle tracking
317+
uint64_t prev_rcs_cycles = 0;
318+
uint64_t prev_vcs_cycles = 0;
319+
uint64_t prev_total_cycles = 0;
320+
double smoothed_rcs_util = 0.0;
321+
double smoothed_vcs_util = 0.0;
322+
bool fdinfo_available = false;
303323
};
304324

305325
XeState state;
@@ -572,11 +592,11 @@ namespace Cpu {
572592

573593
int64_t high = 0;
574594
int64_t crit = 0;
575-
for (int ii = 0; fs::exists(basepath / string("trip_point_" + to_string(ii) + "_temp")); ii++) {
576-
const string trip_type = readfile(basepath / string("trip_point_" + to_string(ii) + "_type"));
595+
for (int ii = 0; fs::exists(basepath / fmt::format("trip_point_{}_temp", ii)); ii++) {
596+
const string trip_type = readfile(basepath / fmt::format("trip_point_{}_type", ii));
577597
if (not is_in(trip_type, "high", "critical")) continue;
578598
auto& val = (trip_type == "high" ? high : crit);
579-
val = stol(readfile(basepath / string("trip_point_" + to_string(ii) + "_temp"), "0")) / 1000;
599+
val = stol(readfile(basepath / fmt::format("trip_point_{}_temp", ii), "0")) / 1000;
580600
}
581601
if (high < 1) high = 80;
582602
if (crit < 1) crit = 95;
@@ -1940,6 +1960,98 @@ namespace Gpu {
19401960
return not engines.empty();
19411961
}
19421962

1963+
// Struct to hold fdinfo cycle counts
1964+
struct FdinfoCycles {
1965+
uint64_t rcs_cycles = 0; // Render/Compute
1966+
uint64_t vcs_cycles = 0; // Video decode
1967+
uint64_t total_cycles = 0;
1968+
bool found = false;
1969+
};
1970+
1971+
// Collect GPU cycles from all processes' fdinfo with client-id deduplication
1972+
static FdinfoCycles collect_fdinfo_cycles(const string &pci_slot) {
1973+
FdinfoCycles result;
1974+
std::unordered_set<unsigned> seen_clients;
1975+
1976+
try {
1977+
for (const auto &proc_entry : fs::directory_iterator("/proc")) {
1978+
if (not proc_entry.is_directory())
1979+
continue;
1980+
string pid_str = proc_entry.path().filename().string();
1981+
if (pid_str.empty() or not std::isdigit(pid_str[0]))
1982+
continue;
1983+
1984+
fs::path fdinfo_dir = proc_entry.path() / "fdinfo";
1985+
if (not fs::exists(fdinfo_dir))
1986+
continue;
1987+
1988+
try {
1989+
for (const auto &fd_entry : fs::directory_iterator(fdinfo_dir)) {
1990+
ifstream file(fd_entry.path());
1991+
if (not file) continue;
1992+
1993+
string line;
1994+
bool is_xe = false;
1995+
bool matches_slot = false;
1996+
unsigned client_id = 0;
1997+
bool has_client_id = false;
1998+
FdinfoCycles fd_cycles;
1999+
2000+
while (getline(file, line)) {
2001+
if (line.rfind("drm-driver:", 0) == 0) {
2002+
string driver = line.substr(11);
2003+
size_t start = driver.find_first_not_of(" \t");
2004+
if (start != string::npos) driver = driver.substr(start);
2005+
is_xe = (driver == "xe");
2006+
}
2007+
else if (line.rfind("drm-pdev:", 0) == 0) {
2008+
string slot = line.substr(line.find(':') + 1);
2009+
size_t start = slot.find_first_not_of(" \t");
2010+
if (start != string::npos) slot = slot.substr(start);
2011+
matches_slot = (slot == pci_slot);
2012+
}
2013+
else if (line.rfind("drm-client-id:", 0) == 0) {
2014+
try {
2015+
client_id = stoul(line.substr(14));
2016+
has_client_id = true;
2017+
} catch (...) {}
2018+
}
2019+
else if (is_xe and matches_slot) {
2020+
if (line.rfind("drm-cycles-rcs:", 0) == 0) {
2021+
try { fd_cycles.rcs_cycles = stoull(line.substr(15)); } catch (...) {}
2022+
}
2023+
else if (line.rfind("drm-cycles-vcs:", 0) == 0) {
2024+
try { fd_cycles.vcs_cycles = stoull(line.substr(15)); } catch (...) {}
2025+
}
2026+
else if (line.rfind("drm-total-cycles-rcs:", 0) == 0) {
2027+
try {
2028+
fd_cycles.total_cycles = stoull(line.substr(21));
2029+
fd_cycles.found = true;
2030+
} catch (...) {}
2031+
}
2032+
}
2033+
}
2034+
2035+
// Only count each client once
2036+
if (is_xe and matches_slot and fd_cycles.found and has_client_id) {
2037+
if (seen_clients.find(client_id) == seen_clients.end()) {
2038+
seen_clients.insert(client_id);
2039+
result.rcs_cycles += fd_cycles.rcs_cycles;
2040+
result.vcs_cycles += fd_cycles.vcs_cycles;
2041+
if (fd_cycles.total_cycles > result.total_cycles) {
2042+
result.total_cycles = fd_cycles.total_cycles;
2043+
}
2044+
result.found = true;
2045+
}
2046+
}
2047+
}
2048+
} catch (...) {}
2049+
}
2050+
} catch (...) {}
2051+
2052+
return result;
2053+
}
2054+
19432055
// Add a GT idle entry from sysfs gtidle directory
19442056
static bool add_gt_idle_entry(const fs::path& gtidle_dir, vector<XeGtIdle>& gt_idle) {
19452057
fs::path idle_path = gtidle_dir / "idle_residency_ms";
@@ -2044,6 +2156,15 @@ namespace Gpu {
20442156
st.pmu_device = pmu_dev;
20452157
st.pmu_type = get_pmu_type(pmu_dev);
20462158

2159+
// Extract PCI slot from PMU device name (e.g., "xe_0000_00_02.0" -> "0000:00:02.0")
2160+
if (pmu_dev.rfind("xe_", 0) == 0 and pmu_dev.size() > 3) {
2161+
string slot = pmu_dev.substr(3); // Remove "xe_" prefix
2162+
for (size_t i = 0; i < slot.size(); ++i) {
2163+
if (slot[i] == '_') slot[i] = ':';
2164+
}
2165+
st.pci_slot = slot;
2166+
}
2167+
20472168
string drm_path = "/dev/dri/" + fs::path(card_path).filename().string();
20482169
st.drm_fd = open(drm_path.c_str(), O_RDONLY);
20492170
if (st.drm_fd < 0) {
@@ -2063,6 +2184,20 @@ namespace Gpu {
20632184
st.mem_total = mem_total;
20642185
}
20652186

2187+
// Test fdinfo availability
2188+
if (not st.pci_slot.empty()) {
2189+
FdinfoCycles test_cycles = collect_fdinfo_cycles(st.pci_slot);
2190+
st.fdinfo_available = test_cycles.found and test_cycles.total_cycles > 0;
2191+
if (st.fdinfo_available) {
2192+
st.prev_total_cycles = test_cycles.total_cycles;
2193+
st.prev_rcs_cycles = test_cycles.rcs_cycles;
2194+
st.prev_vcs_cycles = test_cycles.vcs_cycles;
2195+
Logger::debug("Xe: Using fdinfo for GPU utilization (PCI: {})", st.pci_slot);
2196+
} else {
2197+
Logger::debug("Xe: fdinfo not available, falling back to gtidle");
2198+
}
2199+
}
2200+
20662201
// PMU fallback when gtidle sysfs is unavailable
20672202
if (st.gt_idle.empty()) {
20682203
if (st.pmu_type < 0) {
@@ -2238,7 +2373,46 @@ namespace Gpu {
22382373
double media_util = 0;
22392374
bool has_main = false;
22402375
bool has_media = false;
2241-
if (not st.gt_idle.empty()) {
2376+
2377+
// Use fdinfo for accurate utilization if available
2378+
if (st.fdinfo_available) {
2379+
constexpr double EMA_ALPHA = 0.3;
2380+
FdinfoCycles current = collect_fdinfo_cycles(st.pci_slot);
2381+
2382+
if (current.found and current.total_cycles > st.prev_total_cycles) {
2383+
uint64_t delta_total = current.total_cycles - st.prev_total_cycles;
2384+
uint64_t delta_rcs = current.rcs_cycles - st.prev_rcs_cycles;
2385+
uint64_t delta_vcs = current.vcs_cycles - st.prev_vcs_cycles;
2386+
2387+
double raw_rcs_util = 100.0 * (double)delta_rcs / (double)delta_total;
2388+
double raw_vcs_util = 100.0 * (double)delta_vcs / (double)delta_total;
2389+
2390+
if (raw_rcs_util > 100.0) raw_rcs_util = 100.0;
2391+
if (raw_vcs_util > 100.0) raw_vcs_util = 100.0;
2392+
2393+
st.smoothed_rcs_util = EMA_ALPHA * raw_rcs_util + (1.0 - EMA_ALPHA) * st.smoothed_rcs_util;
2394+
st.smoothed_vcs_util = EMA_ALPHA * raw_vcs_util + (1.0 - EMA_ALPHA) * st.smoothed_vcs_util;
2395+
2396+
st.prev_total_cycles = current.total_cycles;
2397+
st.prev_rcs_cycles = current.rcs_cycles;
2398+
st.prev_vcs_cycles = current.vcs_cycles;
2399+
}
2400+
2401+
main_util = st.smoothed_rcs_util;
2402+
media_util = st.smoothed_vcs_util;
2403+
max_util = std::max(main_util, media_util);
2404+
has_main = true;
2405+
has_media = true;
2406+
2407+
if (gpus_slice->supported_functions.gt_utilization) {
2408+
long long rc_util = clamp((long long)round(main_util), 0ll, 100ll);
2409+
long long mc_util = clamp((long long)round(media_util), 0ll, 100ll);
2410+
gpus_slice->encoder_utilization = rc_util;
2411+
gpus_slice->decoder_utilization = mc_util;
2412+
gpus_slice->gpu_percent.at("gpu-rc-totals").push_back(rc_util);
2413+
gpus_slice->gpu_percent.at("gpu-mc-totals").push_back(mc_util);
2414+
}
2415+
} else if (not st.gt_idle.empty()) {
22422416
double dt_ms = dt * 1000.0;
22432417
if (dt_ms <= 0.0) dt_ms = 1.0;
22442418
// EMA smoothing factor: higher = more responsive, lower = smoother
@@ -2972,11 +3146,13 @@ namespace Mem {
29723146
string devname = disks.at(mountpoint).dev.filename();
29733147
int c = 0;
29743148
while (devname.size() >= 2) {
2975-
if (fs::exists("/sys/block/" + devname + "/stat", ec) and access(string("/sys/block/" + devname + "/stat").c_str(), R_OK) == 0) {
2976-
if (c > 0 and fs::exists("/sys/block/" + devname + '/' + disks.at(mountpoint).dev.filename().string() + "/stat", ec))
2977-
disks.at(mountpoint).stat = "/sys/block/" + devname + '/' + disks.at(mountpoint).dev.filename().string() + "/stat";
3149+
const auto stat = fmt::format("/sys/block/{}/stat", devname);
3150+
if (fs::exists(stat, ec) and access(stat.c_str(), R_OK) == 0) {
3151+
const auto mount_stat = fmt::format("/sys/block/{}/{}/stat", devname, disks.at(mountpoint).dev.filename());
3152+
if (c > 0 and fs::exists(mount_stat, ec))
3153+
disks.at(mountpoint).stat = std::move(mount_stat);
29783154
else
2979-
disks.at(mountpoint).stat = "/sys/block/" + devname + "/stat";
3155+
disks.at(mountpoint).stat = std::move(stat);
29803156
break;
29813157
//? Set ZFS stat filepath
29823158
} else if (fstype == "zfs") {

0 commit comments

Comments
 (0)