Collect GPU index assignments from SLURM gres_detail and filter Job Analyzer GPU charts (facebookresearch#129)

lushengt-meta · meta-codesync[bot] · commit da069ae0b8f1 · 2026-04-14T21:00:38.000-07:00
Summary: Pull Request resolved: facebookresearch#129 Adds GPU index collection from the SLURM REST API's gres_detail field to the GCM pipeline, and uses it in the FAIR Job Analyzer to show only the GPUs assigned to a job (instead of all 8 GPUs on the node). ## Background: When a job uses fewer GPUs than available on a node (e.g., --gpus-per-task=1 on an 8-GPU node), the Job Analyzer previously showed metrics for all 8 GPUs. The existing GPUS_REQUESTED field comes from TRES-PER-NODE (always 8 for the full node), not the per-task allocation. TRES_GPUS_ALLOCATED correctly reports the count (e.g., 1 ) but not which specific GPU indices are assigned. The SLURM REST API provides gres_detail — an array of strings with exact GPU index assignments per node (e.g., "gpu:ampere:1(IDX:7)"). Verified on AVA RSC: scontrol show job <id> -d | grep GRES → GRES=gpu:ampere:1(IDX:7) ## Pipeline change (Python): - parsing.py: Added parse_gres_gpu_indices() that parses gres_detail strings into GPU index lists. Returns a comma-separated string of indices for single-node partial-GPU jobs (e.g., "7" or "0,3,5"), None for full-node (8 GPUs) or multi-node jobs. This avoids storing unnecessary data. - squeue.py: Added GRES_GPU_INDICES field (nullable, defaults to None) and "gres_detail" → "GRES_DETAIL" REST API mapping. Adds one string column to existing fair_job_data rows — no extra entries. - test_parsers.py: Added 12 test cases covering single GPU, multiple GPUs, range notation, full-node, multi-node, and edge cases (empty, null, N/A). ## Job Analyzer change (Hack): - FairJob.php: Added $gresGpuIndices property - FAIRJobAnalyzerLatestJobInfoModule.php: Queries GRES_GPU_INDICES from fair_job_data Scuba table - FAIRJobAnalyzerPerfAnalyzerModule.php: When gresGpuIndices is available (e.g., "7"), filters all 5 GPU ODS charts (utilization, temperature, SM util, SM occupancy, memory) to gpu=(7) with per-GPU reduceTerm. When null (full-node or multi-node), shows all GPUs with the original averaged reduceTerm. ### Scope: - Single-node partial-GPU jobs: shows only the assigned GPUs (100% accurate) - Single-node full-GPU jobs: shows all GPUs (unchanged, no filtering needed) - Multi-node jobs: shows all GPUs (unchanged — gres_detail has per-node values that can't be stored in fair_job_data's 1-row-per-job format) Differential Revision: D99787988
diff --git a/gcm/monitoring/slurm/parsing.py b/gcm/monitoring/slurm/parsing.py
@@ -278,3 +278,50 @@ def parse_scontrol_maxnodes(v: str) -> int:
 def parse_job_ids(s: str) -> list[str]:
     """Given a comma separated string of job ids, return a list of job ids."""
     return s.split(",") if s else []
+
+
+def parse_gres_gpu_indices(v: str) -> str | None:
+    """Parse gres_detail to extract GPU indices for single-node jobs.
+
+    The input is a comma-joined string of gres_detail entries from the SLURM REST
+    API (joined by _map_job_fields). Each entry looks like "gpu:ampere:1(IDX:7)"
+    or "gpu:ampere:4(IDX:0-3)".
+
+    Returns a comma-separated string of GPU indices (e.g., "7" or "0,1,2,3") for
+    single-node jobs. Returns None for multi-node jobs (multiple IDX entries) or
+    parse failures.
+
+    Examples:
+
+    >>> parse_gres_gpu_indices("gpu:ampere:1(IDX:7)")
+    '7'
+    >>> parse_gres_gpu_indices("gpu:ampere:3(IDX:0,3,5)")
+    '0,3,5'
+    >>> parse_gres_gpu_indices("gpu:ampere:4(IDX:0-3)")
+    '0,1,2,3'
+    >>> parse_gres_gpu_indices("gpu:ampere:8(IDX:0-7)")
+    '0,1,2,3,4,5,6,7'
+    >>> parse_gres_gpu_indices("gpu:ampere:8(IDX:0-7),gpu:ampere:8(IDX:0-7)")
+    >>> parse_gres_gpu_indices("")
+    >>> parse_gres_gpu_indices("(null)")
+    """
+    if not v or v in {"N/A", "(null)", "[]"}:
+        return None
+
+    idx_matches = re.findall(r"IDX:([0-9,\-]+)", v)
+    if len(idx_matches) != 1:
+        # Multi-node (multiple IDX entries) or no IDX found
+        return None
+
+    indices: list[int] = []
+    for part in idx_matches[0].split(","):
+        if "-" in part:
+            start_s, end_s = part.split("-", 1)
+            indices.extend(range(int(start_s), int(end_s) + 1))
+        else:
+            indices.append(int(part))
+
+    if not indices:
+        return None
+
+    return ",".join(str(i) for i in sorted(indices))
diff --git a/gcm/schemas/slurm/squeue.py b/gcm/schemas/slurm/squeue.py
@@ -1,12 +1,13 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
-from dataclasses import dataclass, fields
+from dataclasses import dataclass, field, fields
 
 from gcm.monitoring.clock import time_to_time_aware
 from gcm.monitoring.coerce import maybe_float, maybe_int
 from gcm.monitoring.slurm.nodelist_parsers import nodelist
 from gcm.monitoring.slurm.parsing import (
     maybe_parse_memory_to_bytes,
+    parse_gres_gpu_indices,
     parse_gres_or_tres,
     parse_value_from_tres,
 )
@@ -75,6 +76,14 @@ class JobData(DerivedCluster):
     FEATURE: str = parsed_field(parser=str)
     RESTARTCNT: int = parsed_field(parser=int)
     SCHEDNODES: list[str] | None = parsed_field(parser=lambda s: nodelist()(s)[0])
+    GRES_GPU_INDICES: str | None = field(
+        default=None,
+        metadata={
+            "parser": parse_gres_gpu_indices,
+            "field_name": "GRES_DETAIL",
+            "slurm_field": False,
+        },
+    )
 
 
 JOB_DATA_SLURM_FIELDS = list(
@@ -125,4 +134,5 @@ class JobData(DerivedCluster):
     "features": "FEATURE",
     "restart_cnt": "RESTARTCNT",
     "scheduled_nodes": "SCHEDNODES",
+    "gres_detail": "GRES_DETAIL",
 }
diff --git a/gcm/tests/test_parsers.py b/gcm/tests/test_parsers.py
@@ -17,6 +17,7 @@
     maybe_parse_memory_to_bytes,
     mb_to_bytes,
     parse_gres,
+    parse_gres_gpu_indices,
     parse_memory_to_bytes,
     parse_tres,
     parse_value_from_tres,
@@ -549,6 +550,54 @@ def test_parse_gpu_from_tres_bad(s: str, exc: Type[Exception]) -> None:
         parse_value_from_tres(s, "gres/gpu")
 
 
+@pytest.mark.parametrize(
+    "s, expected",
+    [
+        # Single GPU (1-GPU job)
+        ("gpu:ampere:1(IDX:7)", "7"),
+        # Multiple specific GPUs
+        ("gpu:ampere:3(IDX:0,3,5)", "0,3,5"),
+        # Range notation
+        ("gpu:ampere:4(IDX:0-3)", "0,1,2,3"),
+        # Mixed range and specific
+        ("gpu:ampere:5(IDX:0-2,5,7)", "0,1,2,5,7"),
+        # Full node (8 GPUs) — still returns indices (caller decides whether to filter)
+        ("gpu:ampere:8(IDX:0-7)", "0,1,2,3,4,5,6,7"),
+        # Multi-node (multiple IDX entries) — returns None, unsupported
+        (
+            "gpu:ampere:8(IDX:0-7),gpu:ampere:8(IDX:0-7)",
+            None,
+        ),
+        # Multi-node partial GPUs — returns None, unsupported
+        (
+            "gpu:ampere:3(IDX:0,3,5),gpu:ampere:3(IDX:1,4,7)",
+            None,
+        ),
+        # Empty string
+        ("", None),
+        # SLURM null values
+        ("(null)", None),
+        ("N/A", None),
+        # Empty array representation
+        ("[]", None),
+        # No IDX in the string
+        ("gpu:ampere:8", None),
+        # 16-GPU node — partial allocation (works for nodes with >8 GPUs)
+        ("gpu:ampere:10(IDX:0-9)", "0,1,2,3,4,5,6,7,8,9"),
+        # 16-GPU node — full allocation
+        ("gpu:ampere:16(IDX:0-15)", "0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15"),
+        # Comma-join ambiguity: IDX commas inside () are safely delimited by )
+        (
+            "gpu:ampere:3(IDX:0,3,5),gpu:ampere:3(IDX:1,4,7)",
+            None,
+        ),
+    ],
+)
+@typechecked
+def test_parse_gres_gpu_indices(s: str, expected: str | None) -> None:
+    assert parse_gres_gpu_indices(s) == expected
+
+
 @pytest.mark.parametrize(
     "value, expected",
     [