address resonble comments

srivatsankrishnan · srivatsankrishnan · commit fc4f1ca5c8af · 2025-12-17T12:57:39.000-08:00
diff --git a/src/cloudai/workloads/aiconfig/aiconfigurator.py b/src/cloudai/workloads/aiconfig/aiconfigurator.py
@@ -18,7 +18,7 @@
 
 from typing import List, Optional, Union
 
-from pydantic import BaseModel, ConfigDict
+from pydantic import BaseModel, ConfigDict, model_validator
 
 from cloudai.core import CmdArgs, Installable, TestDefinition
 
@@ -74,6 +74,12 @@ class AiconfiguratorCmdArgs(CmdArgs):
     agg: Optional[Agg] = None
     disagg: Optional[Disagg] = None
 
+    @model_validator(mode="after")
+    def _validate_agg_disagg(self) -> "AiconfiguratorCmdArgs":
+        if self.agg is not None and self.disagg is not None:
+            raise ValueError("Only one of 'agg' or 'disagg' may be specified.")
+        return self
+
 
 class AiconfiguratorTestDefinition(TestDefinition):
     """Test object for running Aiconfigurator predictor as a workload."""
diff --git a/src/cloudai/workloads/aiconfig/predictor.py b/src/cloudai/workloads/aiconfig/predictor.py
@@ -16,9 +16,15 @@
 
 from __future__ import annotations
 
-import sys
 from typing import Any, Dict, Optional, cast
 
+from aiconfigurator.sdk import common
+from aiconfigurator.sdk import config as aic_config
+from aiconfigurator.sdk import inference_session as aic_inference_session
+from aiconfigurator.sdk import models as aic_models
+from aiconfigurator.sdk import perf_database as aic_perf_database
+from aiconfigurator.sdk.backends import factory as aic_backends_factory
+
 
 def _to_enum(enum_cls: Any, value_or_name: Any) -> Any:
     """
@@ -39,40 +45,6 @@ def _validate_nextn(nextn: int, nextn_accept_rates: Optional[list[float]]) -> li
     return nextn_accept_rates or []
 
 
-def _ensure_aiconfigurator_available(*, need_inference_session: bool) -> dict[str, Any]:
-    """
-    Import required aiconfigurator symbols or raise a consistent ModuleNotFoundError.
-
-    Returns a dict of imported symbols so call sites can stay concise.
-    """
-    try:
-        from aiconfigurator.sdk import common
-        from aiconfigurator.sdk.backends.factory import get_backend
-        from aiconfigurator.sdk.config import ModelConfig, RuntimeConfig
-        from aiconfigurator.sdk.models import get_model
-        from aiconfigurator.sdk.perf_database import get_database
-
-        if need_inference_session:
-            from aiconfigurator.sdk.inference_session import InferenceSession
-        else:
-            InferenceSession = None  # type: ignore[assignment]
-    except ModuleNotFoundError as e:
-        raise ModuleNotFoundError(
-            "Missing dependency 'aiconfigurator'. Install it in the Python environment used for this test. "
-            f"(python={sys.executable})"
-        ) from e
-
-    return {
-        "common": common,
-        "get_backend": get_backend,
-        "ModelConfig": ModelConfig,
-        "RuntimeConfig": RuntimeConfig,
-        "get_model": get_model,
-        "get_database": get_database,
-        "InferenceSession": InferenceSession,
-    }
-
-
 def predict_ifb_single(
     *,
     model_name: str,
@@ -103,22 +75,14 @@ def predict_ifb_single(
     overwrite_num_layers: int = 0,
 ) -> Dict[str, Any]:
     """Predict metrics for a single IFB configuration using the aiconfigurator SDK primitives."""
-    syms = _ensure_aiconfigurator_available(need_inference_session=False)
-    common = syms["common"]
-    get_backend = syms["get_backend"]
-    ModelConfig = syms["ModelConfig"]
-    RuntimeConfig = syms["RuntimeConfig"]
-    get_model = syms["get_model"]
-    get_database = syms["get_database"]
-
-    database = get_database(system=system, backend=backend, version=version)
+    database = aic_perf_database.get_database(system=system, backend=backend, version=version)
     if database is None:
         raise ValueError(f"No perf database found for system={system} backend={backend} version={version}")
-    backend_impl = cast(Any, get_backend(backend))
+    backend_impl = cast(Any, aic_backends_factory.get_backend(backend))
 
     accept_rates = _validate_nextn(nextn, nextn_accept_rates)
 
-    mc = ModelConfig(
+    mc = aic_config.ModelConfig(
         tp_size=tp,
         pp_size=pp,
         attention_dp_size=dp,
@@ -133,9 +97,9 @@ def predict_ifb_single(
         nextn_accept_rates=accept_rates,
         overwrite_num_layers=overwrite_num_layers,
     )
-    model = get_model(model_name, mc, backend)
+    model = aic_models.get_model(model_name, mc, backend)
 
-    rc = RuntimeConfig(batch_size=batch_size, isl=isl, osl=osl)
+    rc = aic_config.RuntimeConfig(batch_size=batch_size, isl=isl, osl=osl)
     summary = backend_impl.run_ifb(model=model, database=database, runtime_config=rc, ctx_tokens=ctx_tokens)
     df = summary.get_summary_df()
     if df is None or df.empty:
@@ -197,24 +161,15 @@ def predict_disagg_single(
     decode_correction_scale: float = 1.0,
 ) -> Dict[str, Any]:
     """Predict metrics for a single disaggregated configuration (explicit prefill/decode workers)."""
-    syms = _ensure_aiconfigurator_available(need_inference_session=True)
-    common = syms["common"]
-    get_backend = syms["get_backend"]
-    ModelConfig = syms["ModelConfig"]
-    RuntimeConfig = syms["RuntimeConfig"]
-    get_model = syms["get_model"]
-    get_database = syms["get_database"]
-    InferenceSession = syms["InferenceSession"]
-
-    perf_db = get_database(system=system, backend=backend, version=version)
+    perf_db = aic_perf_database.get_database(system=system, backend=backend, version=version)
     if perf_db is None:
         raise ValueError(f"No perf database found for system={system} backend={backend} version={version}")
 
-    perf_backend = cast(Any, get_backend(backend))
+    perf_backend = cast(Any, aic_backends_factory.get_backend(backend))
 
     accept_rates = _validate_nextn(nextn, nextn_accept_rates)
 
-    p_mc = ModelConfig(
+    p_mc = aic_config.ModelConfig(
         tp_size=p_tp,
         pp_size=p_pp,
         attention_dp_size=p_dp,
@@ -229,7 +184,7 @@ def predict_disagg_single(
         nextn_accept_rates=accept_rates,
         overwrite_num_layers=overwrite_num_layers,
     )
-    d_mc = ModelConfig(
+    d_mc = aic_config.ModelConfig(
         tp_size=d_tp,
         pp_size=d_pp,
         attention_dp_size=d_dp,
@@ -245,14 +200,14 @@ def predict_disagg_single(
         overwrite_num_layers=overwrite_num_layers,
     )
 
-    rc_prefill = RuntimeConfig(batch_size=p_bs, isl=isl, osl=osl)
-    rc_decode = RuntimeConfig(batch_size=d_bs, isl=isl, osl=osl)
+    rc_prefill = aic_config.RuntimeConfig(batch_size=p_bs, isl=isl, osl=osl)
+    rc_decode = aic_config.RuntimeConfig(batch_size=d_bs, isl=isl, osl=osl)
 
-    prefill_model = get_model(model_name, p_mc, backend)
-    decode_model = get_model(model_name, d_mc, backend)
+    prefill_model = aic_models.get_model(model_name, p_mc, backend)
+    decode_model = aic_models.get_model(model_name, d_mc, backend)
 
-    prefill_sess = InferenceSession(prefill_model, perf_db, perf_backend)
-    decode_sess = InferenceSession(decode_model, perf_db, perf_backend)
+    prefill_sess = aic_inference_session.InferenceSession(prefill_model, perf_db, perf_backend)
+    decode_sess = aic_inference_session.InferenceSession(decode_model, perf_db, perf_backend)
 
     prefill_summary = prefill_sess.run_static(mode="static_ctx", runtime_config=rc_prefill)
     decode_summary = decode_sess.run_static(mode="static_gen", runtime_config=rc_decode)
diff --git a/src/cloudai/workloads/aiconfig/report_generation_strategy.py b/src/cloudai/workloads/aiconfig/report_generation_strategy.py
@@ -38,11 +38,12 @@ class AiconfiguratorReportGenerationStrategy(ReportGenerationStrategy):
 
     def can_handle_directory(self) -> bool:
         return isinstance(self.test_run.test, AiconfiguratorTestDefinition) and (
-            (self.test_run.output_path / "report.txt").is_file() or (self.test_run.output_path / "stdout.txt").is_file()
+            (self.test_run.output_path / "report.json").is_file()
+            or (self.test_run.output_path / "stdout.txt").is_file()
         )
 
     def _load_results(self) -> Optional[dict]:
-        result_path = self.test_run.output_path / "report.txt"
+        result_path = self.test_run.output_path / "report.json"
         if result_path.is_file():
             try:
                 with result_path.open("r", encoding="utf-8") as f:
diff --git a/src/cloudai/workloads/aiconfig/simple_predictor.py b/src/cloudai/workloads/aiconfig/simple_predictor.py
@@ -65,7 +65,12 @@ def parse_args() -> argparse.Namespace:
     parser.add_argument("--decode-correction-scale", type=float, default=1.0)
 
     # output
-    parser.add_argument("--output", required=True, type=Path, help="Path to write report.txt")
+    parser.add_argument(
+        "--output",
+        required=True,
+        type=Path,
+        help="Path to write predictor JSON output (filename is user-specified).",
+    )
 
     # optional quantization and features (strings to be converted by SDK)
     parser.add_argument("--gemm-quant-mode", default="fp8_block")
diff --git a/src/cloudai/workloads/aiconfig/standalone_command_gen_strategy.py b/src/cloudai/workloads/aiconfig/standalone_command_gen_strategy.py
@@ -38,7 +38,7 @@ def gen_exec_command(self) -> str:
         args: AiconfiguratorCmdArgs = tdef.cmd_args
         out_dir = Path(self.test_run.output_path).resolve()
 
-        report_txt = Path(out_dir) / "report.txt"
+        report_json = Path(out_dir) / "report.json"
         stdout_txt = Path(out_dir) / "stdout.txt"
         stderr_txt = Path(out_dir) / "stderr.txt"
 
@@ -117,7 +117,7 @@ def gen_exec_command(self) -> str:
         else:
             cmd = [*base_cmd, "--mode", "agg"]
 
-        cmd.extend(["--output", str(report_txt)])
+        cmd.extend(["--output", str(report_json)])
 
         cmd_str = " ".join(shlex.quote(str(x)) for x in cmd)
         full_cmd = f"{cmd_str} 1> {shlex.quote(str(stdout_txt))} 2> {shlex.quote(str(stderr_txt))}"
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -27,6 +27,7 @@
 from cloudai.systems.kubernetes import KubernetesSystem
 from cloudai.systems.runai import RunAISystem
 from cloudai.systems.slurm import SlurmGroup, SlurmPartition, SlurmSystem
+from cloudai.systems.standalone import StandaloneSystem
 from cloudai.workloads.nccl_test.nccl import NCCLCmdArgs, NCCLTestDefinition
 
 
@@ -115,6 +116,16 @@ def runai_system(tmp_path: Path) -> RunAISystem:
     return system
 
 
+@pytest.fixture
+def standalone_system(tmp_path: Path) -> StandaloneSystem:
+    return StandaloneSystem(
+        name="standalone",
+        scheduler="standalone",
+        install_path=tmp_path / "install",
+        output_path=tmp_path / "output",
+    )
+
+
 @pytest.fixture
 def base_tr(slurm_system: SlurmSystem) -> TestRun:
     return TestRun(
diff --git a/tests/report_generation_strategy/test_aiconfigurator_report_generation_strategy.py b/tests/report_generation_strategy/test_aiconfigurator_report_generation_strategy.py
@@ -31,16 +31,6 @@
 from cloudai.workloads.aiconfig.aiconfigurator import Agg
 
 
-@pytest.fixture
-def standalone_system(tmp_path: Path) -> StandaloneSystem:
-    return StandaloneSystem(
-        name="standalone",
-        scheduler="standalone",
-        install_path=tmp_path / "install",
-        output_path=tmp_path / "output",
-    )
-
-
 def _make_tr(tmp_path: Path) -> TestRun:
     tdef = AiconfiguratorTestDefinition(
         name="aiconfig",
@@ -60,7 +50,7 @@ def _make_tr(tmp_path: Path) -> TestRun:
 def test_can_handle_directory_when_report_exists(tmp_path: Path, standalone_system: StandaloneSystem) -> None:
     tr = _make_tr(tmp_path)
     tr.output_path.mkdir(parents=True, exist_ok=True)
-    (tr.output_path / "report.txt").write_text("{}", encoding="utf-8")
+    (tr.output_path / "report.json").write_text("{}", encoding="utf-8")
 
     strategy = AiconfiguratorReportGenerationStrategy(standalone_system, tr)
     assert strategy.can_handle_directory() is True
@@ -70,7 +60,7 @@ def test_generate_report_writes_summary(tmp_path: Path, standalone_system: Stand
     tr = _make_tr(tmp_path)
     tr.output_path.mkdir(parents=True, exist_ok=True)
     payload = {"ttft_ms": 10.0, "tpot_ms": 2.0, "tokens_per_s_per_gpu": 3.0, "tokens_per_s_per_user": 4.0, "oom": False}
-    (tr.output_path / "report.txt").write_text(json.dumps(payload), encoding="utf-8")
+    (tr.output_path / "report.json").write_text(json.dumps(payload), encoding="utf-8")
 
     strategy = AiconfiguratorReportGenerationStrategy(standalone_system, tr)
     strategy.generate_report()
@@ -86,7 +76,7 @@ def test_generate_report_writes_summary(tmp_path: Path, standalone_system: Stand
 def test_get_metric_default_prefers_throughput(tmp_path: Path, standalone_system: StandaloneSystem) -> None:
     tr = _make_tr(tmp_path)
     tr.output_path.mkdir(parents=True, exist_ok=True)
-    (tr.output_path / "report.txt").write_text(json.dumps({"tokens_per_s_per_gpu": 123.0}), encoding="utf-8")
+    (tr.output_path / "report.json").write_text(json.dumps({"tokens_per_s_per_gpu": 123.0}), encoding="utf-8")
 
     strategy = AiconfiguratorReportGenerationStrategy(standalone_system, tr)
     assert strategy.get_metric("default") == 123.0
@@ -95,7 +85,7 @@ def test_get_metric_default_prefers_throughput(tmp_path: Path, standalone_system
 def test_get_metric_default_falls_back_to_inverse_latency(tmp_path: Path, standalone_system: StandaloneSystem) -> None:
     tr = _make_tr(tmp_path)
     tr.output_path.mkdir(parents=True, exist_ok=True)
-    (tr.output_path / "report.txt").write_text(json.dumps({"tpot_ms": 2.0}), encoding="utf-8")
+    (tr.output_path / "report.json").write_text(json.dumps({"tpot_ms": 2.0}), encoding="utf-8")
 
     strategy = AiconfiguratorReportGenerationStrategy(standalone_system, tr)
     assert pytest.approx(strategy.get_metric("default"), rel=1e-6) == 0.5
@@ -113,7 +103,7 @@ def test_load_results_falls_back_to_stdout_last_json(tmp_path: Path, standalone_
 def test_get_metric_unknown_returns_error(tmp_path: Path, standalone_system: StandaloneSystem) -> None:
     tr = _make_tr(tmp_path)
     tr.output_path.mkdir(parents=True, exist_ok=True)
-    (tr.output_path / "report.txt").write_text(json.dumps({"ttft_ms": 1.0}), encoding="utf-8")
+    (tr.output_path / "report.json").write_text(json.dumps({"ttft_ms": 1.0}), encoding="utf-8")
 
     strategy = AiconfiguratorReportGenerationStrategy(standalone_system, tr)
     assert strategy.get_metric("nonexistent") == METRIC_ERROR
diff --git a/tests/standalone_command_gen_strategy/test_aiconfigurator_standalone_command_gen_strategy.py b/tests/standalone_command_gen_strategy/test_aiconfigurator_standalone_command_gen_strategy.py
@@ -27,17 +27,7 @@
     AiconfiguratorStandaloneCommandGenStrategy,
     AiconfiguratorTestDefinition,
 )
-from cloudai.workloads.aiconfig.aiconfigurator import Disagg
-
-
-@pytest.fixture
-def standalone_system(tmp_path: Path) -> StandaloneSystem:
-    return StandaloneSystem(
-        name="standalone",
-        scheduler="standalone",
-        install_path=tmp_path / "install",
-        output_path=tmp_path / "output",
-    )
+from cloudai.workloads.aiconfig.aiconfigurator import Agg, Disagg
 
 
 def test_gen_exec_command_writes_repro_script_and_returns_bash(
@@ -94,6 +84,38 @@ def test_gen_exec_command_writes_repro_script_and_returns_bash(
     assert "--mode" in content and "disagg" in content
     assert "--d-bs" in content and "8" in content
 
-    assert str((out_dir.resolve() / "report.txt")) in content
+    assert str((out_dir.resolve() / "report.json")) in content
     assert str((out_dir.resolve() / "stdout.txt")) in content
     assert str((out_dir.resolve() / "stderr.txt")) in content
+
+
+def test_gen_exec_command_agg_branch(
+    tmp_path: Path, standalone_system: StandaloneSystem, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    monkeypatch.setattr("sys.executable", "/tmp/python")
+
+    tdef = AiconfiguratorTestDefinition(
+        name="aiconfig",
+        description="desc",
+        test_template_name="Aiconfigurator",
+        cmd_args=AiconfiguratorCmdArgs(
+            model_name="LLAMA3.1_70B",
+            system="h200_sxm",
+            backend="trtllm",
+            version="0.20.0",
+            isl=4000,
+            osl=500,
+            agg=Agg(batch_size=8, ctx_tokens=16, tp=1, pp=1, dp=1, moe_tp=1, moe_ep=1),
+        ),
+    )
+    out_dir = tmp_path / "out-agg"
+    tr = TestRun(name="tr", test=tdef, num_nodes=1, nodes=[], output_path=out_dir)
+
+    strategy = AiconfiguratorStandaloneCommandGenStrategy(standalone_system, tr)
+    cmd = strategy.gen_exec_command()
+    assert cmd.startswith("bash ")
+
+    content = (out_dir.resolve() / "run_simple_predictor.sh").read_text(encoding="utf-8")
+    assert "--mode" in content and "agg" in content
+    assert "--batch-size" in content and "8" in content
+    assert "--ctx-tokens" in content and "16" in content