Skip to content

Commit da069ae

Browse files
lushengt-metameta-codesync[bot]
authored andcommitted
Collect GPU index assignments from SLURM gres_detail and filter Job Analyzer GPU charts (facebookresearch#129)
Summary: Pull Request resolved: facebookresearch#129 Adds GPU index collection from the SLURM REST API's gres_detail field to the GCM pipeline, and uses it in the FAIR Job Analyzer to show only the GPUs assigned to a job (instead of all 8 GPUs on the node). ## Background: When a job uses fewer GPUs than available on a node (e.g., --gpus-per-task=1 on an 8-GPU node), the Job Analyzer previously showed metrics for all 8 GPUs. The existing GPUS_REQUESTED field comes from TRES-PER-NODE (always 8 for the full node), not the per-task allocation. TRES_GPUS_ALLOCATED correctly reports the count (e.g., 1 ) but not which specific GPU indices are assigned. The SLURM REST API provides gres_detail — an array of strings with exact GPU index assignments per node (e.g., "gpu:ampere:1(IDX:7)"). Verified on AVA RSC: scontrol show job <id> -d | grep GRES → GRES=gpu:ampere:1(IDX:7) ## Pipeline change (Python): - parsing.py: Added parse_gres_gpu_indices() that parses gres_detail strings into GPU index lists. Returns a comma-separated string of indices for single-node partial-GPU jobs (e.g., "7" or "0,3,5"), None for full-node (8 GPUs) or multi-node jobs. This avoids storing unnecessary data. - squeue.py: Added GRES_GPU_INDICES field (nullable, defaults to None) and "gres_detail" → "GRES_DETAIL" REST API mapping. Adds one string column to existing fair_job_data rows — no extra entries. - test_parsers.py: Added 12 test cases covering single GPU, multiple GPUs, range notation, full-node, multi-node, and edge cases (empty, null, N/A). ## Job Analyzer change (Hack): - FairJob.php: Added $gresGpuIndices property - FAIRJobAnalyzerLatestJobInfoModule.php: Queries GRES_GPU_INDICES from fair_job_data Scuba table - FAIRJobAnalyzerPerfAnalyzerModule.php: When gresGpuIndices is available (e.g., "7"), filters all 5 GPU ODS charts (utilization, temperature, SM util, SM occupancy, memory) to gpu=(7) with per-GPU reduceTerm. When null (full-node or multi-node), shows all GPUs with the original averaged reduceTerm. ### Scope: - Single-node partial-GPU jobs: shows only the assigned GPUs (100% accurate) - Single-node full-GPU jobs: shows all GPUs (unchanged, no filtering needed) - Multi-node jobs: shows all GPUs (unchanged — gres_detail has per-node values that can't be stored in fair_job_data's 1-row-per-job format) Differential Revision: D99787988
1 parent 93a7fde commit da069ae

3 files changed

Lines changed: 107 additions & 1 deletion

File tree

gcm/monitoring/slurm/parsing.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -278,3 +278,50 @@ def parse_scontrol_maxnodes(v: str) -> int:
278278
def parse_job_ids(s: str) -> list[str]:
279279
"""Given a comma separated string of job ids, return a list of job ids."""
280280
return s.split(",") if s else []
281+
282+
283+
def parse_gres_gpu_indices(v: str) -> str | None:
284+
"""Parse gres_detail to extract GPU indices for single-node jobs.
285+
286+
The input is a comma-joined string of gres_detail entries from the SLURM REST
287+
API (joined by _map_job_fields). Each entry looks like "gpu:ampere:1(IDX:7)"
288+
or "gpu:ampere:4(IDX:0-3)".
289+
290+
Returns a comma-separated string of GPU indices (e.g., "7" or "0,1,2,3") for
291+
single-node jobs. Returns None for multi-node jobs (multiple IDX entries) or
292+
parse failures.
293+
294+
Examples:
295+
296+
>>> parse_gres_gpu_indices("gpu:ampere:1(IDX:7)")
297+
'7'
298+
>>> parse_gres_gpu_indices("gpu:ampere:3(IDX:0,3,5)")
299+
'0,3,5'
300+
>>> parse_gres_gpu_indices("gpu:ampere:4(IDX:0-3)")
301+
'0,1,2,3'
302+
>>> parse_gres_gpu_indices("gpu:ampere:8(IDX:0-7)")
303+
'0,1,2,3,4,5,6,7'
304+
>>> parse_gres_gpu_indices("gpu:ampere:8(IDX:0-7),gpu:ampere:8(IDX:0-7)")
305+
>>> parse_gres_gpu_indices("")
306+
>>> parse_gres_gpu_indices("(null)")
307+
"""
308+
if not v or v in {"N/A", "(null)", "[]"}:
309+
return None
310+
311+
idx_matches = re.findall(r"IDX:([0-9,\-]+)", v)
312+
if len(idx_matches) != 1:
313+
# Multi-node (multiple IDX entries) or no IDX found
314+
return None
315+
316+
indices: list[int] = []
317+
for part in idx_matches[0].split(","):
318+
if "-" in part:
319+
start_s, end_s = part.split("-", 1)
320+
indices.extend(range(int(start_s), int(end_s) + 1))
321+
else:
322+
indices.append(int(part))
323+
324+
if not indices:
325+
return None
326+
327+
return ",".join(str(i) for i in sorted(indices))

gcm/schemas/slurm/squeue.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
# Copyright (c) Meta Platforms, Inc. and affiliates.
22
# All rights reserved.
3-
from dataclasses import dataclass, fields
3+
from dataclasses import dataclass, field, fields
44

55
from gcm.monitoring.clock import time_to_time_aware
66
from gcm.monitoring.coerce import maybe_float, maybe_int
77
from gcm.monitoring.slurm.nodelist_parsers import nodelist
88
from gcm.monitoring.slurm.parsing import (
99
maybe_parse_memory_to_bytes,
10+
parse_gres_gpu_indices,
1011
parse_gres_or_tres,
1112
parse_value_from_tres,
1213
)
@@ -75,6 +76,14 @@ class JobData(DerivedCluster):
7576
FEATURE: str = parsed_field(parser=str)
7677
RESTARTCNT: int = parsed_field(parser=int)
7778
SCHEDNODES: list[str] | None = parsed_field(parser=lambda s: nodelist()(s)[0])
79+
GRES_GPU_INDICES: str | None = field(
80+
default=None,
81+
metadata={
82+
"parser": parse_gres_gpu_indices,
83+
"field_name": "GRES_DETAIL",
84+
"slurm_field": False,
85+
},
86+
)
7887

7988

8089
JOB_DATA_SLURM_FIELDS = list(
@@ -125,4 +134,5 @@ class JobData(DerivedCluster):
125134
"features": "FEATURE",
126135
"restart_cnt": "RESTARTCNT",
127136
"scheduled_nodes": "SCHEDNODES",
137+
"gres_detail": "GRES_DETAIL",
128138
}

gcm/tests/test_parsers.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
maybe_parse_memory_to_bytes,
1818
mb_to_bytes,
1919
parse_gres,
20+
parse_gres_gpu_indices,
2021
parse_memory_to_bytes,
2122
parse_tres,
2223
parse_value_from_tres,
@@ -549,6 +550,54 @@ def test_parse_gpu_from_tres_bad(s: str, exc: Type[Exception]) -> None:
549550
parse_value_from_tres(s, "gres/gpu")
550551

551552

553+
@pytest.mark.parametrize(
554+
"s, expected",
555+
[
556+
# Single GPU (1-GPU job)
557+
("gpu:ampere:1(IDX:7)", "7"),
558+
# Multiple specific GPUs
559+
("gpu:ampere:3(IDX:0,3,5)", "0,3,5"),
560+
# Range notation
561+
("gpu:ampere:4(IDX:0-3)", "0,1,2,3"),
562+
# Mixed range and specific
563+
("gpu:ampere:5(IDX:0-2,5,7)", "0,1,2,5,7"),
564+
# Full node (8 GPUs) — still returns indices (caller decides whether to filter)
565+
("gpu:ampere:8(IDX:0-7)", "0,1,2,3,4,5,6,7"),
566+
# Multi-node (multiple IDX entries) — returns None, unsupported
567+
(
568+
"gpu:ampere:8(IDX:0-7),gpu:ampere:8(IDX:0-7)",
569+
None,
570+
),
571+
# Multi-node partial GPUs — returns None, unsupported
572+
(
573+
"gpu:ampere:3(IDX:0,3,5),gpu:ampere:3(IDX:1,4,7)",
574+
None,
575+
),
576+
# Empty string
577+
("", None),
578+
# SLURM null values
579+
("(null)", None),
580+
("N/A", None),
581+
# Empty array representation
582+
("[]", None),
583+
# No IDX in the string
584+
("gpu:ampere:8", None),
585+
# 16-GPU node — partial allocation (works for nodes with >8 GPUs)
586+
("gpu:ampere:10(IDX:0-9)", "0,1,2,3,4,5,6,7,8,9"),
587+
# 16-GPU node — full allocation
588+
("gpu:ampere:16(IDX:0-15)", "0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15"),
589+
# Comma-join ambiguity: IDX commas inside () are safely delimited by )
590+
(
591+
"gpu:ampere:3(IDX:0,3,5),gpu:ampere:3(IDX:1,4,7)",
592+
None,
593+
),
594+
],
595+
)
596+
@typechecked
597+
def test_parse_gres_gpu_indices(s: str, expected: str | None) -> None:
598+
assert parse_gres_gpu_indices(s) == expected
599+
600+
552601
@pytest.mark.parametrize(
553602
"value, expected",
554603
[

0 commit comments

Comments
 (0)