@@ -202,6 +202,16 @@ namespace Gpu {
202202 unsigned int computeInstanceId;
203203 unsigned long long usedGpuCcProtectedMemory;
204204 };
205+ struct nvmlProcessInfo_v2_t {
206+ unsigned int pid;
207+ unsigned long long usedGpuMemory;
208+ unsigned int gpuInstanceId;
209+ unsigned int computeInstanceId;
210+ };
211+ struct nvmlProcessInfo_v1_t {
212+ unsigned int pid;
213+ unsigned long long usedGpuMemory;
214+ };
205215 struct proc_stat {
206216 double gpu{};
207217 uint64_t mem{};
@@ -226,7 +236,11 @@ namespace Gpu {
226236 nvmlReturn_t (*nvmlDeviceGetEncoderUtilization)(nvmlDevice_t, unsigned int *, unsigned int *);
227237 nvmlReturn_t (*nvmlDeviceGetDecoderUtilization)(nvmlDevice_t, unsigned int *, unsigned int *);
228238 nvmlReturn_t (*nvmlDeviceGetProcessUtilization)(nvmlDevice_t, nvmlProcessUtilizationSample_t*, unsigned int *, unsigned long long );
239+ nvmlReturn_t (*nvmlDeviceGetGraphicsRunningProcesses_v1)(nvmlDevice_t, unsigned int *, nvmlProcessInfo_v1_t*);
240+ nvmlReturn_t (*nvmlDeviceGetGraphicsRunningProcesses_v2)(nvmlDevice_t, unsigned int *, nvmlProcessInfo_v2_t*);
229241 nvmlReturn_t (*nvmlDeviceGetGraphicsRunningProcesses_v3)(nvmlDevice_t, unsigned int *, nvmlProcessInfo_t*);
242+ nvmlReturn_t (*nvmlDeviceGetComputeRunningProcesses_v1)(nvmlDevice_t, unsigned int *, nvmlProcessInfo_v1_t*);
243+ nvmlReturn_t (*nvmlDeviceGetComputeRunningProcesses_v2)(nvmlDevice_t, unsigned int *, nvmlProcessInfo_v2_t*);
230244 nvmlReturn_t (*nvmlDeviceGetComputeRunningProcesses_v3)(nvmlDevice_t, unsigned int *, nvmlProcessInfo_t*);
231245
232246 // ? Data
@@ -1300,15 +1314,20 @@ namespace Gpu {
13001314 LOAD_SYM (nvmlDeviceGetEncoderUtilization);
13011315 LOAD_SYM (nvmlDeviceGetDecoderUtilization);
13021316 nvmlDeviceGetProcessUtilization = (decltype (nvmlDeviceGetProcessUtilization))try_load_nvml_sym (" nvmlDeviceGetProcessUtilization" );
1317+ nvmlDeviceGetGraphicsRunningProcesses_v1 = (decltype (nvmlDeviceGetGraphicsRunningProcesses_v1))try_load_nvml_sym (" nvmlDeviceGetGraphicsRunningProcesses" );
1318+ nvmlDeviceGetGraphicsRunningProcesses_v2 = (decltype (nvmlDeviceGetGraphicsRunningProcesses_v2))try_load_nvml_sym (" nvmlDeviceGetGraphicsRunningProcesses_v2" );
13031319 nvmlDeviceGetGraphicsRunningProcesses_v3 = (decltype (nvmlDeviceGetGraphicsRunningProcesses_v3))try_load_nvml_sym (" nvmlDeviceGetGraphicsRunningProcesses_v3" );
1320+ nvmlDeviceGetComputeRunningProcesses_v1 = (decltype (nvmlDeviceGetComputeRunningProcesses_v1))try_load_nvml_sym (" nvmlDeviceGetComputeRunningProcesses" );
1321+ nvmlDeviceGetComputeRunningProcesses_v2 = (decltype (nvmlDeviceGetComputeRunningProcesses_v2))try_load_nvml_sym (" nvmlDeviceGetComputeRunningProcesses_v2" );
13041322 nvmlDeviceGetComputeRunningProcesses_v3 = (decltype (nvmlDeviceGetComputeRunningProcesses_v3))try_load_nvml_sym (" nvmlDeviceGetComputeRunningProcesses_v3" );
1305- if (nvmlDeviceGetGraphicsRunningProcesses_v3 == nullptr )
1306- nvmlDeviceGetGraphicsRunningProcesses_v3 = (decltype (nvmlDeviceGetGraphicsRunningProcesses_v3))try_load_nvml_sym (" nvmlDeviceGetGraphicsRunningProcesses_v2" );
1307- if (nvmlDeviceGetComputeRunningProcesses_v3 == nullptr )
1308- nvmlDeviceGetComputeRunningProcesses_v3 = (decltype (nvmlDeviceGetComputeRunningProcesses_v3))try_load_nvml_sym (" nvmlDeviceGetComputeRunningProcesses_v2" );
13091323 process_utilization_function_available = nvmlDeviceGetProcessUtilization != nullptr ;
1310- process_memory_functions_available = nvmlDeviceGetGraphicsRunningProcesses_v3 != nullptr
1311- and nvmlDeviceGetComputeRunningProcesses_v3 != nullptr ;
1324+ process_memory_functions_available =
1325+ nvmlDeviceGetGraphicsRunningProcesses_v3 != nullptr or
1326+ nvmlDeviceGetGraphicsRunningProcesses_v2 != nullptr or
1327+ nvmlDeviceGetGraphicsRunningProcesses_v1 != nullptr or
1328+ nvmlDeviceGetComputeRunningProcesses_v3 != nullptr or
1329+ nvmlDeviceGetComputeRunningProcesses_v2 != nullptr or
1330+ nvmlDeviceGetComputeRunningProcesses_v1 != nullptr ;
13121331
13131332 #undef LOAD_SYM
13141333
@@ -1562,30 +1581,72 @@ namespace Gpu {
15621581
15631582 process_stats.clear ();
15641583
1565- auto append_process_memory = [&](nvmlDevice_t device, auto get_processes, std::unordered_map<size_t , uint64_t >& mem_by_pid) {
1566- unsigned int proc_count = 64 ;
1584+ auto merge_process_memory = [&](unsigned int pid, unsigned long long used_mem, std::unordered_map<size_t , uint64_t >& mem_by_pid) {
1585+ if (used_mem == NVML_VALUE_NOT_AVAILABLE_ULL) return ;
1586+ auto & mem = mem_by_pid[pid];
1587+ mem = max (mem, (uint64_t )used_mem);
1588+ };
1589+
1590+ auto append_process_memory_v3 = [&](nvmlDevice_t device, auto fn, std::unordered_map<size_t , uint64_t >& mem_by_pid) {
1591+ if (fn == nullptr ) return ;
1592+ unsigned int proc_count = 0 ;
1593+ auto result = fn (device, &proc_count, nullptr );
1594+ if (result == NVML_ERROR_NOT_FOUND) return ;
1595+ if (result != NVML_SUCCESS and result != NVML_ERROR_INSUFFICIENT_SIZE) return ;
1596+ if (proc_count == 0 ) return ;
1597+
15671598 std::vector<nvmlProcessInfo_t> processes (proc_count);
1568- auto result = get_processes (device, &proc_count, processes.data ());
1599+ result = fn (device, &proc_count, processes.data ());
1600+ if (result != NVML_SUCCESS) return ;
15691601
1570- if (result == NVML_ERROR_INSUFFICIENT_SIZE and proc_count > 0 ) {
1571- processes.resize (proc_count);
1572- result = get_processes (device, &proc_count, processes.data ());
1602+ for (unsigned int n = 0 ; n < proc_count; ++n) {
1603+ merge_process_memory (processes[n].pid , processes[n].usedGpuMemory , mem_by_pid);
15731604 }
1605+ };
1606+
1607+ auto append_process_memory_v2 = [&](nvmlDevice_t device, auto fn, std::unordered_map<size_t , uint64_t >& mem_by_pid) {
1608+ if (fn == nullptr ) return ;
1609+ unsigned int proc_count = 0 ;
1610+ auto result = fn (device, &proc_count, nullptr );
1611+ if (result == NVML_ERROR_NOT_FOUND) return ;
1612+ if (result != NVML_SUCCESS and result != NVML_ERROR_INSUFFICIENT_SIZE) return ;
1613+ if (proc_count == 0 ) return ;
1614+
1615+ std::vector<nvmlProcessInfo_v2_t> processes (proc_count);
1616+ result = fn (device, &proc_count, processes.data ());
1617+ if (result != NVML_SUCCESS) return ;
1618+
1619+ for (unsigned int n = 0 ; n < proc_count; ++n) {
1620+ merge_process_memory (processes[n].pid , processes[n].usedGpuMemory , mem_by_pid);
1621+ }
1622+ };
1623+
1624+ auto append_process_memory_v1 = [&](nvmlDevice_t device, auto fn, std::unordered_map<size_t , uint64_t >& mem_by_pid) {
1625+ if (fn == nullptr ) return ;
1626+ unsigned int proc_count = 0 ;
1627+ auto result = fn (device, &proc_count, nullptr );
1628+ if (result == NVML_ERROR_NOT_FOUND) return ;
1629+ if (result != NVML_SUCCESS and result != NVML_ERROR_INSUFFICIENT_SIZE) return ;
1630+ if (proc_count == 0 ) return ;
1631+
1632+ std::vector<nvmlProcessInfo_v1_t> processes (proc_count);
1633+ result = fn (device, &proc_count, processes.data ());
15741634 if (result != NVML_SUCCESS) return ;
15751635
15761636 for (unsigned int n = 0 ; n < proc_count; ++n) {
1577- const auto & proc = processes[n];
1578- if (proc.usedGpuMemory == NVML_VALUE_NOT_AVAILABLE_ULL) continue ;
1579- auto & mem = mem_by_pid[proc.pid ];
1580- mem = max (mem, (uint64_t )proc.usedGpuMemory );
1637+ merge_process_memory (processes[n].pid , processes[n].usedGpuMemory , mem_by_pid);
15811638 }
15821639 };
15831640
15841641 for (unsigned int i = 0 ; i < device_count; ++i) {
15851642 if (process_memory_functions_available) {
15861643 std::unordered_map<size_t , uint64_t > mem_by_pid;
1587- append_process_memory (devices[i], nvmlDeviceGetGraphicsRunningProcesses_v3, mem_by_pid);
1588- append_process_memory (devices[i], nvmlDeviceGetComputeRunningProcesses_v3, mem_by_pid);
1644+ append_process_memory_v3 (devices[i], nvmlDeviceGetGraphicsRunningProcesses_v3, mem_by_pid);
1645+ append_process_memory_v3 (devices[i], nvmlDeviceGetComputeRunningProcesses_v3, mem_by_pid);
1646+ append_process_memory_v2 (devices[i], nvmlDeviceGetGraphicsRunningProcesses_v2, mem_by_pid);
1647+ append_process_memory_v2 (devices[i], nvmlDeviceGetComputeRunningProcesses_v2, mem_by_pid);
1648+ append_process_memory_v1 (devices[i], nvmlDeviceGetGraphicsRunningProcesses_v1, mem_by_pid);
1649+ append_process_memory_v1 (devices[i], nvmlDeviceGetComputeRunningProcesses_v1, mem_by_pid);
15891650
15901651 for (const auto & [pid, mem] : mem_by_pid) {
15911652 process_stats[pid].mem += mem;
@@ -2926,6 +2987,7 @@ namespace Proc {
29262987 string current_sort;
29272988 string current_filter;
29282989 bool current_rev{};
2990+ bool current_gpu_only{};
29292991 bool is_tree_mode;
29302992
29312993 fs::file_time_type passwd_time;
@@ -3158,14 +3220,17 @@ namespace Proc {
31583220 const auto & sorting = Config::getS (" proc_sorting" );
31593221 auto reverse = Config::getB (" proc_reversed" );
31603222 const auto & filter = Config::getS (" proc_filter" );
3223+ const bool gpu_only = Config::getB (" proc_gpu_only" );
31613224 auto per_core = Config::getB (" proc_per_core" );
31623225 auto should_filter_kernel = Config::getB (" proc_filter_kernel" );
31633226 auto tree = Config::getB (" proc_tree" );
31643227 auto show_detailed = Config::getB (" show_detailed" );
31653228 const auto pause_proc_list = Config::getB (" pause_proc_list" );
31663229 const size_t detailed_pid = Config::getI (" detailed_pid" );
3167- bool should_filter = current_filter != filter;
3230+ bool should_filter = current_filter != filter or current_gpu_only != gpu_only;
3231+ if (gpu_only) should_filter = true ;
31683232 if (should_filter) current_filter = filter;
3233+ if (should_filter) current_gpu_only = gpu_only;
31693234 bool sorted_change = (sorting != current_sort or reverse != current_rev or should_filter);
31703235 bool tree_mode_change = tree != is_tree_mode;
31713236 if (sorted_change) {
@@ -3509,8 +3574,8 @@ namespace Proc {
35093574 if (should_filter) {
35103575 filter_found = 0 ;
35113576 for (auto & p : current_procs) {
3512- if (not tree and not filter.empty ()) {
3513- if (! matches_filter (p, filter)) {
3577+ if (( not tree and ( not filter.empty () or gpu_only)) or gpu_only ) {
3578+ if (not matches_filter (p, filter)) {
35143579 p.filtered = true ;
35153580 filter_found++;
35163581 } else {
0 commit comments