Skip to content

Commit 9e4d040

Browse files
committed
Support GPU utilization and memory Criterion
1 parent 24a3b30 commit 9e4d040

File tree

2 files changed

+24
-3
lines changed

2 files changed

+24
-3
lines changed

sumsjob/config.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,14 @@
1212
# Emply list if no GPUs to be excluded
1313
gpus_exclude = []
1414

15+
# Criterion of using a GPU:
16+
# (1) its utilization available > gpu_utilization
17+
# (2) its memory available (GB) > gpu_memory
18+
# By default, the GPU utilization should <= 10%, i.e., utilization available >= 90%
19+
gpu_utilization = 0.90
20+
# By default, the GPU should have >= 4 GB memory available
21+
gpu_memory = 4
22+
1523
# Root folder of the files
1624
# A new folder will be created in this root folder to store your codes.
1725
# For example, if the jobname is "myjob", then the folder is "~/scratch/myjob".

sumsjob/gpuresource.py

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,23 @@ def exclude_gpus(lines):
1818

1919

2020
def gpustat(machine, stat):
21-
lines = stat.split("\n")
21+
# Example of stat:
22+
# chitu Fri Dec 31 01:33:24 2021 470.74
23+
# [0] NVIDIA GeForce RTX 3080 | 70'C, 67 % | 3637 / 10018 MB | shuaim:python3/3589(689M)
24+
# [1] NVIDIA GeForce RTX 3080 | 66'C, 37 % | 6412 / 10014 MB | shuaim:python3/3589(361M)
25+
lines = stat.strip().split("\n")
2226
lines = exclude_gpus(lines)
2327
avail = None
24-
for l in lines:
25-
if l != "" and l.split("|")[-1] == "":
28+
for l in lines[1:]:
29+
contents = l.split("|")
30+
utilization = int(contents[1].split(",", 1)[1].split("%")[0]) / 100
31+
memory = contents[2].split("/", 1)
32+
memory_used = int(memory[0])
33+
memory_total = int(memory[1].split("MB")[0])
34+
if (
35+
1 - utilization >= config.gpu_utilization
36+
and memory_total - memory_used >= config.gpu_memory * 1024
37+
):
2638
avail = int(l[1])
2739
return machine, "\n".join(lines), avail
2840

@@ -46,6 +58,7 @@ def gpu_available(first_only=False, verbose=0):
4658
for m, stat, avail in gpuresource():
4759
if verbose == 2:
4860
print(stat)
61+
print("")
4962
if gavail is None and avail is not None:
5063
gm, gavail = m, avail
5164
if first_only:

0 commit comments

Comments
 (0)