NVIDIA
diff --git a/‎src/srtctl/benchmarks/scripts/sa-bench/rollup.py‎
Lines changed: 327 additions & 10 deletions b/‎src/srtctl/benchmarks/scripts/sa-bench/rollup.py‎
Lines changed: 327 additions & 10 deletions
@@ -2,12 +2,39 @@
 # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 
-"""Generate benchmark-rollup.json from sa-bench results."""
+"""Generate benchmark-rollup.json and benchmark-rollup.csv from sa-bench results."""
 
+from __future__ import annotations
+
+import csv
 import json
+from collections import Counter
+import math
+import re
 import sys
 from datetime import datetime, timezone
 from pathlib import Path
+from typing import Any
+
+import yaml
+
+
+OUTPUT_FIELDS = [
+    "Config",
+    "Total GPU Count",
+    "Decode GPU Count",
+    "Concurrency",
+    "Total Token Throughput",
+    "Output Token Throughput",
+    "Median TTFT",
+    "Median TPOT",
+    "Median ITL",
+    "P90 Decode Running Requests",
+    "Output Token Throughput per User",
+    "Total Token Throughput per GPU",
+]
+
+RUNNING_REQ_PATTERN = re.compile(r"#running-req:\s*(\d+)")
 
 
 def _get_percentile(percentiles: list, target: float) -> float | None:
@@ -20,24 +47,292 @@ def _get_percentile(percentiles: list, target: float) -> float | None:
     return None
 
 
+def _extract_gpu_num(path: Path) -> int | None:
+    """Extract total GPU count from the result filename."""
+    match = re.search(r"_gpus_(\d+)", path.name)
+    if not match:
+        return None
+    return int(match.group(1))
+
+
+def _read_resources_from_yaml(path: Path) -> dict[str, Any] | None:
+    """Read the top-level resources field from a YAML file."""
+    try:
+        data = yaml.safe_load(path.read_text()) or {}
+    except Exception as exc:
+        print(f"Failed to parse {path}: {exc}", file=sys.stderr)
+        return None
+
+    resources = data.get("resources")
+    return resources if isinstance(resources, dict) else None
+
+
+def _read_runtime_resources(log_dir: Path) -> dict[str, Any] | None:
+    """Read resolved runtime resources, preferring override-expanded configs."""
+    output_dir = log_dir.parent
+
+    runtime_configs = sorted(output_dir.glob("config_*.yaml"))
+    for config_path in runtime_configs:
+        resources = _read_resources_from_yaml(config_path)
+        if resources:
+            return resources
+
+    config_path = log_dir / "config.yaml"
+    if config_path.exists():
+        return _read_resources_from_yaml(config_path)
+
+    return None
+
+
+def _compute_prefill_gpus_per_worker(resources: dict[str, Any], gpus_per_node: int | None) -> int | None:
+    """Compute prefill GPUs per worker using the same fallback order as ResourceConfig."""
+    explicit = resources.get("gpus_per_prefill")
+    if explicit not in (None, 0):
+        return int(explicit)
+
+    prefill_nodes = resources.get("prefill_nodes")
+    prefill_workers = resources.get("prefill_workers")
+    if prefill_nodes not in (None, 0) and prefill_workers not in (None, 0) and gpus_per_node not in (None, 0):
+        return (int(prefill_nodes) * int(gpus_per_node)) // int(prefill_workers)
+
+    return gpus_per_node
+
+
+def _compute_decode_gpu_count(resources: dict[str, Any], total_gpu_count: int | None) -> int | None:
+    """Compute total decode GPUs using ResourceConfig-compatible rules when possible."""
+    decode_workers = int(resources.get("decode_workers", 0) or 0)
+    decode_nodes_raw = resources.get("decode_nodes")
+    decode_nodes = int(decode_nodes_raw) if decode_nodes_raw not in (None, "") else None
+
+    explicit = resources.get("gpus_per_decode")
+    if explicit not in (None, 0):
+        gpus_per_decode = int(explicit)
+        return decode_workers * gpus_per_decode if decode_workers else gpus_per_decode
+
+    gpus_per_node_raw = resources.get("gpus_per_node")
+    gpus_per_node = int(gpus_per_node_raw) if gpus_per_node_raw not in (None, 0) else None
+
+    if gpus_per_node is None and total_gpu_count not in (None, 0):
+        prefill_nodes = int(resources.get("prefill_nodes", 0) or 0)
+        total_nodes = prefill_nodes + (decode_nodes or 0)
+        if total_nodes > 0 and total_gpu_count % total_nodes == 0:
+            gpus_per_node = total_gpu_count // total_nodes
+
+    if decode_nodes not in (None, 0) and gpus_per_node not in (None, 0):
+        if decode_workers:
+            gpus_per_decode = (decode_nodes * gpus_per_node) // decode_workers
+            return decode_workers * gpus_per_decode
+        return decode_nodes * gpus_per_node
+
+    if decode_nodes == 0 and decode_workers:
+        gpus_per_prefill = _compute_prefill_gpus_per_worker(resources, gpus_per_node)
+        if gpus_per_prefill not in (None, 0):
+            return decode_workers * gpus_per_prefill
+
+    return None
+
+
+def _extract_decode_gpu_count(log_dir: Path, total_gpu_count: int | None) -> int | None:
+    """Extract decode GPU count from metadata or resolved runtime config."""
+    metadata = _read_job_metadata(log_dir)
+    if metadata:
+        resources = metadata.get("resources")
+        if isinstance(resources, dict):
+            decode_gpu_count = _compute_decode_gpu_count(resources, total_gpu_count)
+            if decode_gpu_count is not None:
+                return decode_gpu_count
+
+    runtime_resources = _read_runtime_resources(log_dir)
+    if runtime_resources:
+        return _compute_decode_gpu_count(runtime_resources, total_gpu_count)
+
+    return None
+
+
+def _read_yaml_name(path: Path) -> str | None:
+    """Read the top-level name field from a YAML file."""
+    try:
+        data = yaml.safe_load(path.read_text()) or {}
+    except Exception as exc:
+        print(f"Failed to parse {path}: {exc}", file=sys.stderr)
+        return None
+
+    name = data.get("name")
+    return str(name) if name else None
+
+
+def _read_job_name(path: Path) -> str | None:
+    """Read the job_name field from submit metadata JSON."""
+    try:
+        data: dict[str, Any] = json.loads(path.read_text())
+    except Exception as exc:
+        print(f"Failed to parse {path}: {exc}", file=sys.stderr)
+        return None
+
+    job_name = data.get("job_name")
+    return str(job_name) if job_name else None
+
+
+def _read_job_metadata(log_dir: Path) -> dict[str, Any] | None:
+    """Read submit metadata JSON from the output directory when available."""
+    output_dir = log_dir.parent
+    for metadata_path in sorted(output_dir.glob("*.json")):
+        try:
+            data = json.loads(metadata_path.read_text())
+        except Exception as exc:
+            print(f"Failed to parse {metadata_path}: {exc}", file=sys.stderr)
+            continue
+        if isinstance(data, dict):
+            return data
+    return None
+
+
+def _read_config_name(log_dir: Path) -> str | None:
+    """Read the effective config name, preferring resolved override configs."""
+    output_dir = log_dir.parent
+
+    # For override jobs, submit.py saves the resolved runtime config as config_<suffix>.yaml.
+    runtime_configs = sorted(output_dir.glob("config_*.yaml"))
+    for config_path in runtime_configs:
+        name = _read_yaml_name(config_path)
+        if name:
+            return name
+
+    # Job metadata also stores the final job/config name.
+    metadata_files = sorted(output_dir.glob("*.json"))
+    for metadata_path in metadata_files:
+        job_name = _read_job_name(metadata_path)
+        if job_name:
+            return job_name
+
+    # Fall back to the copied source config in logs/.
+    config_path = log_dir / "config.yaml"
+    if config_path.exists():
+        return _read_yaml_name(config_path)
+
+    return None
+
+
+def _is_sglang_disagg(log_dir: Path) -> bool:
+    """Return whether the current run is an SGLang disaggregated deployment."""
+    metadata = _read_job_metadata(log_dir)
+    if not metadata:
+        return False
+
+    if metadata.get("backend_type") != "sglang":
+        return False
+
+    resources = metadata.get("resources")
+    if not isinstance(resources, dict):
+        return False
+
+    prefill_nodes = int(resources.get("prefill_nodes", 0) or 0)
+    decode_nodes = int(resources.get("decode_nodes", 0) or 0)
+    agg_workers = int(resources.get("agg_workers", 0) or 0)
+    return prefill_nodes > 0 and decode_nodes > 0 and agg_workers == 0
+
+
+def _extract_p90_decode_running_requests(log_dir: Path) -> int | None:
+    """Stream decode logs and compute the nearest-rank P90 of #running-req values."""
+    if not _is_sglang_disagg(log_dir):
+        return None
+
+    counts: Counter[int] = Counter()
+    total = 0
+
+    for decode_log in sorted(log_dir.glob("*decode*.out")):
+        try:
+            with decode_log.open("r", errors="replace") as f:
+                for line in f:
+                    match = RUNNING_REQ_PATTERN.search(line)
+                    if not match:
+                        continue
+                    value = int(match.group(1))
+                    counts[value] += 1
+                    total += 1
+        except OSError as exc:
+            print(f"Failed to read {decode_log}: {exc}", file=sys.stderr)
+
+    if total == 0:
+        return None
+
+    rank = math.ceil(total * 0.9)
+    cumulative = 0
+    for value in sorted(counts):
+        cumulative += counts[value]
+        if cumulative >= rank:
+            return value
+
+    return None
+
+
+def _safe_ratio(numerator: float | int | None, denominator: float | int | None) -> float | None:
+    """Return numerator / denominator when both values are valid and denominator != 0."""
+    if numerator is None or denominator in (None, 0):
+        return None
+    return float(numerator) / float(denominator)
+
+
+def _format_csv_value(value: object) -> str:
+    """Format CSV values with at most three decimal places for numeric fields."""
+    if value is None:
+        return ""
+    if isinstance(value, int):
+        return str(value)
+    if isinstance(value, float):
+        return f"{value:.3f}".rstrip("0").rstrip(".")
+    return str(value)
+
+
+def _build_csv_row(
+    data: dict[str, object],
+    config_name: str,
+    gpu_num: int | None,
+    decode_gpu_count: int | None,
+    p90_decode_running_requests: int | None,
+) -> dict[str, object]:
+    """Build one CSV row from a parsed sa-bench result."""
+    total_token_throughput = data.get("total_token_throughput")
+    median_tpot = data.get("median_tpot_ms")
+    row = {
+        "Config": config_name,
+        "Total GPU Count": gpu_num,
+        "Decode GPU Count": decode_gpu_count,
+        "Concurrency": data.get("max_concurrency"),
+        "Total Token Throughput": total_token_throughput,
+        "Output Token Throughput": data.get("output_throughput"),
+        "Median TTFT": data.get("median_ttft_ms"),
+        "Median TPOT": median_tpot,
+        "Median ITL": data.get("median_itl_ms"),
+        "P90 Decode Running Requests": p90_decode_running_requests,
+        "Output Token Throughput per User": _safe_ratio(1000.0, median_tpot),
+        "Total Token Throughput per GPU": _safe_ratio(total_token_throughput, gpu_num),
+    }
+    return {key: _format_csv_value(value) for key, value in row.items()}
+
+
 def main(log_dir: Path) -> None:
-    """Generate benchmark-rollup.json from sa-bench result files."""
+    """Generate benchmark-rollup.json and benchmark-rollup.csv from sa-bench result files."""
     result_files = sorted(log_dir.glob("sa-bench_*/results_*.json"))
     if not result_files:
         print("No sa-bench results found", file=sys.stderr)
         return
 
     runs = []
+    csv_rows = []
     config = {}
+    config_name = _read_config_name(log_dir)
+    first_gpu_num = _extract_gpu_num(result_files[0]) if result_files else None
+    decode_gpu_count = _extract_decode_gpu_count(log_dir, first_gpu_num)
+    p90_decode_running_requests = _extract_p90_decode_running_requests(log_dir)
 
-    for f in result_files:
+    for result_file in result_files:
         try:
-            data = json.loads(f.read_text())
-        except json.JSONDecodeError as e:
-            print(f"Failed to parse {f}: {e}", file=sys.stderr)
+            data = json.loads(result_file.read_text())
+        except json.JSONDecodeError as exc:
+            print(f"Failed to parse {result_file}: {exc}", file=sys.stderr)
             continue
 
-        # Extract config from first file
         if not config:
             config = {
                 "model": data.get("model_id"),
@@ -61,16 +356,38 @@ def main(log_dir: Path) -> None:
             "total_output_tokens": data.get("total_output"),
         })
 
+        csv_rows.append(
+            _build_csv_row(
+                data=data,
+                config_name=config_name or str(data.get("model_id") or "unknown"),
+                gpu_num=_extract_gpu_num(result_file),
+                decode_gpu_count=decode_gpu_count,
+                p90_decode_running_requests=p90_decode_running_requests,
+            )
+        )
+
+    if not runs:
+        print("No valid sa-bench results found", file=sys.stderr)
+        return
+
     rollup = {
         "benchmark_type": "sa-bench",
         "timestamp": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
         "config": config,
         "runs": runs,
     }
 
-    output_path = log_dir / "benchmark-rollup.json"
-    output_path.write_text(json.dumps(rollup, indent=2))
-    print(f"Wrote {output_path}")
+    json_path = log_dir / "benchmark-rollup.json"
+    json_path.write_text(json.dumps(rollup, indent=2))
+    print(f"Wrote {json_path}")
+
+    csv_rows.sort(key=lambda row: int(row["Concurrency"]) if row["Concurrency"] else -1)
+    csv_path = log_dir / "benchmark-rollup.csv"
+    with csv_path.open("w", newline="") as csv_file:
+        writer = csv.DictWriter(csv_file, fieldnames=OUTPUT_FIELDS)
+        writer.writeheader()
+        writer.writerows(csv_rows)
+    print(f"Wrote {csv_path}")
 
 
 if __name__ == "__main__":