|
3 | 3 | def get_runner_resources(runner_label): |
4 | 4 | """ |
5 | 5 | Returns the resources required for a runner based on the runner label. |
6 | | - CPU count and memory in GB, tmpdisk in bytes. |
| 6 | + CPU count and memory in GiB, tmpdisk in MiB. |
7 | 7 | Based on https://github.com/WATonomous/infra-config/blob/b604376f4ee9fa3336b11dc084ba90b962ec7ee1/kubernetes/github-arc/get-config.py#L120-L142 |
8 | 8 | """ |
| 9 | + TMPDISK_DEFAULT = 16 * 1045 # 16 GiB |
| 10 | + |
9 | 11 | if runner_label == "slurm-runner-small": |
10 | 12 | return {"cpu" : 1, "mem-per-cpu" : "2G", "tmpdisk" : 4096, "time" : "00:30:00"} |
11 | 13 | elif runner_label == "slurm-runner-medium": |
12 | | - return {"cpu" : 2, "mem-per-cpu" : "2G", "tmpdisk" : 16384, "time" : "00:30:00"} |
| 14 | + return {"cpu" : 2, "mem-per-cpu" : "2G", "tmpdisk" : TMPDISK_DEFAULT, "time" : "00:30:00"} |
13 | 15 | elif runner_label == "slurm-runner-large": |
14 | | - return {"cpu" : 4, "mem-per-cpu" : "2G", "tmpdisk" : 16384, "time" : "00:30:00"} |
| 16 | + return {"cpu" : 4, "mem-per-cpu" : "2G", "tmpdisk" : TMPDISK_DEFAULT, "time" : "00:30:00"} |
15 | 17 | elif runner_label == "slurm-runner-xlarge": |
16 | | - return {"cpu" : 16, "mem-per-cpu" : "2G", "tmpdisk" : 16384, "time" : "00:30:00"} |
| 18 | + return {"cpu" : 16, "mem-per-cpu" : "2G", "tmpdisk" : TMPDISK_DEFAULT, "time" : "00:30:00"} |
17 | 19 | elif runner_label == "slurm-runner-medium-long-running": |
18 | | - return {"cpu" : 4, "mem-per-cpu" : "2G", "tmpdisk" : 16384, "time" : "06:00:00"} |
| 20 | + return {"cpu" : 4, "mem-per-cpu" : "2G", "tmpdisk" : TMPDISK_DEFAULT, "time" : "06:00:00"} |
19 | 21 | elif runner_label.startswith("slurm-runner-"): |
20 | | - # format like slurm-runner-4cpu-2mempercpu-30:00time-16384tmpdisk |
21 | | - # default values if not found |
22 | | - parts = runner_label.split("-") |
23 | | - resources = parts[2:] # remove slurm-runner- |
24 | | - cpu, mem_per_cpu, time, tmpdisk = 2, 2, "30:00", 16384 |
25 | | - for part in resources: |
26 | | - if part.endswith("mempercpu"): |
27 | | - mem_per_cpu = int(part[:-9]) |
28 | | - elif part.endswith("cpu"): |
29 | | - cpu = int(part[:-3]) |
30 | | - elif part.endswith("time"): |
31 | | - time = part[:-4] |
32 | | - elif part.endswith("tmpdisk"): |
33 | | - tmpdisk = int(part[:-7]) |
34 | | - else: |
35 | | - raise ValueError(f"Unknown resource type {part} in runner label {runner_label}.") |
36 | | - return {"cpu" : cpu, "mem-per-cpu" : f"{mem_per_cpu}G", "tmpdisk" : tmpdisk, "time" : f"{time}"} |
37 | | - else: |
38 | | - raise ValueError(f"Runner label {runner_label} not found.") |
| 22 | + # expiremental custom sized runners |
| 23 | + # format the label as slurm-runner-4cpu-2mempercpu-30:00time-16tmpdisk |
| 24 | + # The above label would give: 4 CPUs, 2 GiB RAM / CPU, 30 minute time limit, 16 GiB tmpdisk |
| 25 | + try: |
| 26 | + pattern = re.compile( |
| 27 | + r"^slurm-runner-" |
| 28 | + r"(?P<cpu>\d+)cpu-" |
| 29 | + r"(?P<mem_per_cpu>\d+)mempercpu" |
| 30 | + r"(?:-(?P<time>[\d:]+)time)?" |
| 31 | + r"(?:-(?P<tmpdisk>\d+)tmpdisk)?$" |
| 32 | + ) |
| 33 | + match = pattern.fullmatch(runner_label) |
| 34 | + if not match: |
| 35 | + raise ValueError("Runner label format did not match expected pattern.") |
| 36 | + |
| 37 | + cpu = int(match.group("cpu")) or 2 |
| 38 | + mem_per_cpu = int(match.group("mem_per_cpu")) or 2 |
| 39 | + time = match.group("time") or "30:00" |
| 40 | + tmpdisk = int(match.group("tmpdisk") or TMPDISK_DEFAULT) |
| 41 | + |
| 42 | + return { |
| 43 | + "cpu": cpu, |
| 44 | + "mem-per-cpu": f"{mem_per_cpu}G", |
| 45 | + "tmpdisk": tmpdisk * 1024, # GiB to MiB |
| 46 | + "time": time, |
| 47 | + } |
| 48 | + |
| 49 | + except Exception as e: |
| 50 | + raise ValueError(f"Failed to parse runner label {runner_label}. Error: {e}") |
| 51 | + else: |
| 52 | + raise ValueError(f"Runner label {runner_label} not found.") |
0 commit comments