NVIDIA
diff --git a/‎src/cloudai/_core/base_runner.py‎
Lines changed: 7 additions & 3 deletions b/‎src/cloudai/_core/base_runner.py‎
Lines changed: 7 additions & 3 deletions
diff --git a/‎src/cloudai/_core/command_gen_strategy.py‎
Lines changed: 5 additions & 0 deletions b/‎src/cloudai/_core/command_gen_strategy.py‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎src/cloudai/_core/registry.py‎
Lines changed: 24 additions & 0 deletions b/‎src/cloudai/_core/registry.py‎
Lines changed: 24 additions & 0 deletions
diff --git a/‎src/cloudai/_core/test_template.py‎
Lines changed: 0 additions & 34 deletions b/‎src/cloudai/_core/test_template.py‎
Lines changed: 0 additions & 34 deletions
diff --git a/‎src/cloudai/registration.py‎
Lines changed: 18 additions & 35 deletions b/‎src/cloudai/registration.py‎
Lines changed: 18 additions & 35 deletions
diff --git a/‎src/cloudai/systems/lsf/lsf_command_gen_strategy.py‎
Lines changed: 6 additions & 5 deletions b/‎src/cloudai/systems/lsf/lsf_command_gen_strategy.py‎
Lines changed: 6 additions & 5 deletions
diff --git a/‎src/cloudai/systems/lsf/lsf_runner.py‎
Lines changed: 1 addition & 1 deletion b/‎src/cloudai/systems/lsf/lsf_runner.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/cloudai/systems/slurm/single_sbatch_runner.py‎
Lines changed: 3 additions & 3 deletions b/‎src/cloudai/systems/slurm/single_sbatch_runner.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎src/cloudai/systems/slurm/slurm_command_gen_strategy.py‎
Lines changed: 7 additions & 9 deletions b/‎src/cloudai/systems/slurm/slurm_command_gen_strategy.py‎
Lines changed: 7 additions & 9 deletions
diff --git a/‎src/cloudai/systems/slurm/slurm_runner.py‎
Lines changed: 6 additions & 2 deletions b/‎src/cloudai/systems/slurm/slurm_runner.py‎
Lines changed: 6 additions & 2 deletions
@@ -22,8 +22,10 @@
 from typing import Dict, List
 
 from .base_job import BaseJob
+from .command_gen_strategy import CommandGenStrategy
 from .exceptions import JobFailureError, JobSubmissionError
 from .job_status_result import JobStatusResult
+from .registry import Registry
 from .system import System
 from .test_scenario import TestRun, TestScenario
 
@@ -113,9 +115,7 @@ async def submit_test(self, tr: TestRun):
             exit(1)
 
     def on_job_submit(self, tr: TestRun) -> None:
-        if tr.test.test_template._command_gen_strategy is not None:
-            cmd_gen = tr.test.test_template.command_gen_strategy
-            cmd_gen.store_test_run(tr)
+        return
 
     async def delayed_submit_test(self, tr: TestRun, delay: int = 5):
         """
@@ -372,3 +372,7 @@ async def delayed_kill_job(self, job: BaseJob, delay: int = 0):
         await asyncio.sleep(delay)
         job.terminated_by_dependency = True
         self.system.kill(job)
+
+    def get_cmd_gen_strategy(self, system: System, test_run: TestRun) -> CommandGenStrategy:
+        strategy_cls = Registry().get_command_gen_strategy(type(system), type(test_run.test.test_definition))
+        return strategy_cls(system, test_run)
@@ -16,6 +16,7 @@
 
 from abc import ABC, abstractmethod
 
+from .system import System
 from .test_scenario import TestRun
 from .test_template_strategy import TestTemplateStrategy
 
@@ -25,6 +26,10 @@ class CommandGenStrategy(TestTemplateStrategy, ABC):
 
     TEST_RUN_DUMP_FILE_NAME: str = "test-run.toml"
 
+    def __init__(self, system: System, test_run: TestRun) -> None:
+        super().__init__(system)
+        self.test_run = test_run
+
     @abstractmethod
     def gen_exec_command(self, tr: TestRun) -> str:
         """
 
@@ -25,6 +25,7 @@
     from ..reporter import Reporter
     from .base_installer import BaseInstaller
     from .base_runner import BaseRunner
+    from .command_gen_strategy import CommandGenStrategy
     from .grading_strategy import GradingStrategy
     from .report_generation_strategy import ReportGenerationStrategy
     from .system import System
@@ -66,6 +67,7 @@ class Registry(metaclass=Singleton):
     scenario_reports: ClassVar[dict[str, type[Reporter]]] = {}
     report_configs: ClassVar[dict[str, ReportConfig]] = {}
     reward_functions_map: ClassVar[dict[str, RewardFunction]] = {}
+    command_gen_strategies_map: ClassVar[dict[tuple[Type[System], Type[TestDefinition]], Type[CommandGenStrategy]]] = {}
 
     def add_runner(self, name: str, value: Type[BaseRunner]) -> None:
         """
@@ -250,3 +252,25 @@ def get_reward_function(self, name: str) -> RewardFunction:
                 f"Reward function '{name}' not found. Available functions: {list(self.reward_functions_map.keys())}"
             )
         return self.reward_functions_map[name]
+
+    def add_command_gen_strategy(
+        self, system_type: Type[System], tdef_type: Type[TestDefinition], value: Type[CommandGenStrategy]
+    ) -> None:
+        if (system_type, tdef_type) in self.command_gen_strategies_map:
+            raise ValueError(
+                f"Duplicating implementation for '{system_type.__name__}, {tdef_type.__name__}', use 'update()' "
+                "for replacement."
+            )
+        self.update_command_gen_strategy(system_type, tdef_type, value)
+
+    def update_command_gen_strategy(
+        self, system_type: Type[System], tdef_type: Type[TestDefinition], value: Type[CommandGenStrategy]
+    ) -> None:
+        self.command_gen_strategies_map[(system_type, tdef_type)] = value
+
+    def get_command_gen_strategy(
+        self, system_type: Type[System], tdef_type: Type[TestDefinition]
+    ) -> Type[CommandGenStrategy]:
+        if (system_type, tdef_type) not in self.command_gen_strategies_map:
+            raise KeyError(f"Command gen strategy for '{system_type.__name__}, {tdef_type.__name__}' not found.")
+        return self.command_gen_strategies_map[(system_type, tdef_type)]
@@ -17,7 +17,6 @@
 from pathlib import Path
 from typing import Any, Dict, Optional
 
-from .command_gen_strategy import CommandGenStrategy
 from .grading_strategy import GradingStrategy
 from .json_gen_strategy import JsonGenStrategy
 from .system import System
@@ -30,13 +29,6 @@ class TestTemplate:
 
     Providing a framework for test execution, including installation, uninstallation, and execution command generation
     based on system configurations and test parameters.
-
-    Attributes
-        cmd_args (Dict[str, Any]): Default command-line arguments.
-        logger (logging.Logger): Logger for the test template.
-        command_gen_strategy (CommandGenStrategy): Strategy for generating execution commands.
-        json_gen_strategy (JsonGenStrategy): Strategy for generating json string.
-        grading_strategy (GradingStrategy): Strategy for grading performance based on test outcomes.
     """
 
     __test__ = False
@@ -49,23 +41,9 @@ def __init__(self, system: System) -> None:
             system (System): System configuration for the test template.
         """
         self.system = system
-        self._command_gen_strategy: Optional[CommandGenStrategy] = None
         self._json_gen_strategy: Optional[JsonGenStrategy] = None
         self.grading_strategy: Optional[GradingStrategy] = None
 
-    @property
-    def command_gen_strategy(self) -> CommandGenStrategy:
-        if self._command_gen_strategy is None:
-            raise ValueError(
-                "command_gen_strategy is missing. Ensure the strategy is registered in the Registry "
-                "by calling the appropriate registration function for the system type."
-            )
-        return self._command_gen_strategy
-
-    @command_gen_strategy.setter
-    def command_gen_strategy(self, value: CommandGenStrategy) -> None:
-        self._command_gen_strategy = value
-
     @property
     def json_gen_strategy(self) -> JsonGenStrategy:
         if self._json_gen_strategy is None:
@@ -79,18 +57,6 @@ def json_gen_strategy(self) -> JsonGenStrategy:
     def json_gen_strategy(self, value: JsonGenStrategy) -> None:
         self._json_gen_strategy = value
 
-    def gen_exec_command(self, tr: TestRun) -> str:
-        """
-        Generate an execution command for a test using this template.
-
-        Args:
-            tr (TestRun): Contains the test and its run-specific configurations.
-
-        Returns:
-            str: The generated execution command.
-        """
-        return self.command_gen_strategy.gen_exec_command(tr)
-
     def gen_json(self, tr: TestRun) -> Dict[Any, Any]:
         """
         Generate a JSON string representing the Kubernetes job specification for this test using this template.
 
@@ -23,7 +23,7 @@ def register_all():
         inverse_reward,
         negative_reward,
     )
-    from cloudai.core import CommandGenStrategy, GradingStrategy, JsonGenStrategy, Registry
+    from cloudai.core import GradingStrategy, JsonGenStrategy, Registry
     from cloudai.models.scenario import ReportConfig
     from cloudai.reporter import PerTestReporter, StatusReporter, TarballReporter
 
@@ -117,31 +117,23 @@ def register_all():
     Registry().add_runner("lsf", LSFRunner)
     Registry().add_runner("runai", RunAIRunner)
 
-    Registry().add_strategy(
-        CommandGenStrategy, [StandaloneSystem], [SleepTestDefinition], SleepStandaloneCommandGenStrategy
-    )
-    Registry().add_strategy(CommandGenStrategy, [LSFSystem], [SleepTestDefinition], SleepLSFCommandGenStrategy)
-    Registry().add_strategy(CommandGenStrategy, [SlurmSystem], [SleepTestDefinition], SleepSlurmCommandGenStrategy)
+    Registry().add_command_gen_strategy(StandaloneSystem, SleepTestDefinition, SleepStandaloneCommandGenStrategy)
+    Registry().add_command_gen_strategy(LSFSystem, SleepTestDefinition, SleepLSFCommandGenStrategy)
+    Registry().add_command_gen_strategy(SlurmSystem, SleepTestDefinition, SleepSlurmCommandGenStrategy)
     Registry().add_strategy(JsonGenStrategy, [KubernetesSystem], [SleepTestDefinition], SleepKubernetesJsonGenStrategy)
     Registry().add_strategy(
         JsonGenStrategy, [KubernetesSystem], [NCCLTestDefinition], NcclTestKubernetesJsonGenStrategy
     )
     Registry().add_strategy(JsonGenStrategy, [RunAISystem], [NCCLTestDefinition], NcclTestRunAIJsonGenStrategy)
     Registry().add_strategy(GradingStrategy, [SlurmSystem], [NCCLTestDefinition], NcclTestGradingStrategy)
 
-    Registry().add_strategy(
-        CommandGenStrategy, [SlurmSystem], [MegatronRunTestDefinition], MegatronRunSlurmCommandGenStrategy
-    )
-    Registry().add_strategy(CommandGenStrategy, [SlurmSystem], [NCCLTestDefinition], NcclTestSlurmCommandGenStrategy)
+    Registry().add_command_gen_strategy(SlurmSystem, MegatronRunTestDefinition, MegatronRunSlurmCommandGenStrategy)
+    Registry().add_command_gen_strategy(SlurmSystem, NCCLTestDefinition, NcclTestSlurmCommandGenStrategy)
     Registry().add_strategy(GradingStrategy, [SlurmSystem], [SleepTestDefinition], SleepGradingStrategy)
 
-    Registry().add_strategy(
-        CommandGenStrategy, [SlurmSystem], [NeMoLauncherTestDefinition], NeMoLauncherSlurmCommandGenStrategy
-    )
-    Registry().add_strategy(CommandGenStrategy, [SlurmSystem], [NeMoRunTestDefinition], NeMoRunSlurmCommandGenStrategy)
-    Registry().add_strategy(
-        CommandGenStrategy, [SlurmSystem], [NIXLBenchTestDefinition], NIXLBenchSlurmCommandGenStrategy
-    )
+    Registry().add_command_gen_strategy(SlurmSystem, NeMoLauncherTestDefinition, NeMoLauncherSlurmCommandGenStrategy)
+    Registry().add_command_gen_strategy(SlurmSystem, NeMoRunTestDefinition, NeMoRunSlurmCommandGenStrategy)
+    Registry().add_command_gen_strategy(SlurmSystem, NIXLBenchTestDefinition, NIXLBenchSlurmCommandGenStrategy)
 
     Registry().add_strategy(GradingStrategy, [SlurmSystem], [NeMoLauncherTestDefinition], NeMoLauncherGradingStrategy)
     Registry().add_strategy(
@@ -151,29 +143,20 @@ def register_all():
         JaxToolboxGradingStrategy,
     )
     Registry().add_strategy(GradingStrategy, [SlurmSystem], [UCCTestDefinition], UCCTestGradingStrategy)
-    Registry().add_strategy(
-        CommandGenStrategy,
-        [SlurmSystem],
-        [GPTTestDefinition, GrokTestDefinition, NemotronTestDefinition],
-        JaxToolboxSlurmCommandGenStrategy,
-    )
+    Registry().add_command_gen_strategy(SlurmSystem, GPTTestDefinition, JaxToolboxSlurmCommandGenStrategy)
+    Registry().add_command_gen_strategy(SlurmSystem, GrokTestDefinition, JaxToolboxSlurmCommandGenStrategy)
+    Registry().add_command_gen_strategy(SlurmSystem, NemotronTestDefinition, JaxToolboxSlurmCommandGenStrategy)
 
-    Registry().add_strategy(CommandGenStrategy, [SlurmSystem], [UCCTestDefinition], UCCTestSlurmCommandGenStrategy)
+    Registry().add_command_gen_strategy(SlurmSystem, UCCTestDefinition, UCCTestSlurmCommandGenStrategy)
 
     Registry().add_strategy(GradingStrategy, [SlurmSystem], [ChakraReplayTestDefinition], ChakraReplayGradingStrategy)
-    Registry().add_strategy(
-        CommandGenStrategy, [SlurmSystem], [ChakraReplayTestDefinition], ChakraReplaySlurmCommandGenStrategy
-    )
-    Registry().add_strategy(
-        CommandGenStrategy, [SlurmSystem], [SlurmContainerTestDefinition], SlurmContainerCommandGenStrategy
-    )
-    Registry().add_strategy(
-        CommandGenStrategy, [SlurmSystem], [TritonInferenceTestDefinition], TritonInferenceSlurmCommandGenStrategy
+    Registry().add_command_gen_strategy(SlurmSystem, ChakraReplayTestDefinition, ChakraReplaySlurmCommandGenStrategy)
+    Registry().add_command_gen_strategy(SlurmSystem, SlurmContainerTestDefinition, SlurmContainerCommandGenStrategy)
+    Registry().add_command_gen_strategy(
+        SlurmSystem, TritonInferenceTestDefinition, TritonInferenceSlurmCommandGenStrategy
     )
 
-    Registry().add_strategy(
-        CommandGenStrategy, [SlurmSystem], [AIDynamoTestDefinition], AIDynamoSlurmCommandGenStrategy
-    )
+    Registry().add_command_gen_strategy(SlurmSystem, AIDynamoTestDefinition, AIDynamoSlurmCommandGenStrategy)
 
     Registry().add_installer("slurm", SlurmInstaller)
     Registry().add_installer("standalone", StandaloneInstaller)
 
@@ -15,9 +15,9 @@
 # limitations under the License.
 
 from datetime import datetime
-from typing import Any, Dict, List, Union
+from typing import Any, Dict, List, Union, cast
 
-from cloudai.core import CommandGenStrategy, TestRun
+from cloudai.core import CommandGenStrategy, System, TestRun
 
 from .lsf_system import LSFSystem
 
@@ -31,15 +31,16 @@ class LSFCommandGenStrategy(CommandGenStrategy):
             properties and methods.
     """
 
-    def __init__(self, system: LSFSystem) -> None:
+    def __init__(self, system: System, test_run: TestRun) -> None:
         """
         Initialize a new LSFCommandGenStrategy instance.
 
         Args:
             system (LSFSystem): The system schema object.
+            test_run (TestRun): The test run object.
         """
-        super().__init__(system)
-        self.system = system
+        super().__init__(system, test_run)
+        self.system = cast(LSFSystem, system)
 
     def gen_exec_command(self, tr: TestRun) -> str:
         """
 
@@ -54,7 +54,7 @@ def get_job_id(self, stdout: str, stderr: str) -> int | None:
     def _submit_test(self, tr: TestRun) -> LSFJob:
         logging.info(f"Running test: {tr.name}")
         tr.output_path = self.get_job_output_path(tr)
-        exec_cmd = tr.test.test_template.gen_exec_command(tr)
+        exec_cmd = self.get_cmd_gen_strategy(self.system, tr).gen_exec_command(tr)
         logging.debug(f"Executing command for test {tr.name}: {exec_cmd}")
         job_id = 0
         if self.mode == "run":
 
@@ -108,11 +108,11 @@ def aux_commands(self) -> list[str]:
         tr.output_path = self.scenario_root
         max_nodes, _ = self.extract_sbatch_nodes_spec()
         tr.num_nodes = max_nodes
-        cmd_gen = cast(SlurmCommandGenStrategy, tr.test.test_template.command_gen_strategy)
+        cmd_gen = cast(SlurmCommandGenStrategy, self.get_cmd_gen_strategy(self.system, tr))
         return [cmd_gen._metadata_cmd(tr), cmd_gen._ranks_mapping_cmd(tr)]
 
     def get_single_tr_block(self, tr: TestRun) -> str:
-        cmd_gen = cast(SlurmCommandGenStrategy, tr.test.test_template.command_gen_strategy)
+        cmd_gen = cast(SlurmCommandGenStrategy, self.get_cmd_gen_strategy(self.system, tr))
         srun_cmd = cmd_gen.gen_srun_command(tr)
         nnodes, node_list = self.system.get_nodes_by_spec(tr.nnodes, tr.nodes)
         node_arg = f"--nodelist={','.join(node_list)}" if node_list else f"-N{nnodes}"
@@ -161,7 +161,7 @@ def gen_sbatch_content(self) -> str:
 
     def add_pre_tests(self, pre_tc: TestScenario, base_tr: TestRun) -> str:
         content = []
-        cmd_gen = cast(SlurmCommandGenStrategy, base_tr.test.test_template.command_gen_strategy)
+        cmd_gen = cast(SlurmCommandGenStrategy, self.get_cmd_gen_strategy(self.system, base_tr))
         content.append(cmd_gen.gen_pre_test(pre_tc, self.scenario_root))
         content.append("if [ $PRE_TEST_SUCCESS -ne 1 ]; then")
         content.append("    exit 1")
 
@@ -23,7 +23,7 @@
 
 import toml
 
-from cloudai.core import CommandGenStrategy, Registry, TestRun, TestScenario
+from cloudai.core import CommandGenStrategy, Registry, System, TestRun, TestScenario
 from cloudai.models.scenario import TestRunDetails
 
 from .slurm_system import SlurmSystem
@@ -38,15 +38,16 @@ class SlurmCommandGenStrategy(CommandGenStrategy):
             properties and methods.
     """
 
-    def __init__(self, system: SlurmSystem) -> None:
+    def __init__(self, system: System, test_run: TestRun) -> None:
         """
         Initialize a new SlurmCommandGenStrategy instance.
 
         Args:
             system (SlurmSystem): The system schema object.
+            test_run (TestRun): The test run object.
         """
-        super().__init__(system)
-        self.system = system
+        super().__init__(system, test_run)
+        self.system = cast(SlurmSystem, system)
 
         self._node_spec_cache: dict[str, tuple[int, list[str]]] = {}
 
@@ -143,11 +144,8 @@ def _get_cmd_gen_strategy(self, tr: TestRun) -> "SlurmCommandGenStrategy":
         Returns:
             CommandGenStrategy: The strategy instance.
         """
-        registry = Registry()
-        key = (CommandGenStrategy, type(self.system), type(tr.test.test_definition))
-        strategy_cls = registry.strategies_map[key]
-        strategy_cls_typed = cast(type[SlurmCommandGenStrategy], strategy_cls)
-        strategy = strategy_cls_typed(self.system)
+        strategy_cls = Registry().get_command_gen_strategy(type(self.system), type(tr.test.test_definition))
+        strategy = cast(SlurmCommandGenStrategy, strategy_cls(self.system, tr))
         return strategy
 
     def _set_hook_output_path(self, tr: TestRun, base_output_path: Path) -> None:
 
@@ -56,7 +56,7 @@ def get_job_id(self, stdout: str, stderr: str) -> int | None:
 
     def _submit_test(self, tr: TestRun) -> SlurmJob:
         logging.info(f"Running test: {tr.name}")
-        exec_cmd = tr.test.test_template.gen_exec_command(tr)
+        exec_cmd = self.get_cmd_gen_strategy(self.system, tr).gen_exec_command(tr)
         logging.debug(f"Executing command for test {tr.name}: {exec_cmd}")
         job_id = 0
         if self.mode == "run":
@@ -73,6 +73,10 @@ def _submit_test(self, tr: TestRun) -> SlurmJob:
         logging.info(f"Submitted slurm job: {job_id}")
         return SlurmJob(tr, id=job_id)
 
+    def on_job_submit(self, tr: TestRun) -> None:
+        cmd_gen = self.get_cmd_gen_strategy(self.system, tr)
+        cmd_gen.store_test_run(tr)
+
     def on_job_completion(self, job: BaseJob) -> None:
         logging.debug(f"Job completion callback for job {job.id}")
         self.system.complete_job(cast(SlurmJob, job))
@@ -94,7 +98,7 @@ def _mock_job_metadata(self) -> SlurmStepMetadata:
     def _get_job_metadata(
         self, job: SlurmJob, steps_metadata: list[SlurmStepMetadata]
     ) -> tuple[Path, SlurmJobMetadata]:
-        cmd_gen = cast(SlurmCommandGenStrategy, job.test_run.test.test_template.command_gen_strategy)
+        cmd_gen = cast(SlurmCommandGenStrategy, self.get_cmd_gen_strategy(self.system, job.test_run))
         return job.test_run.output_path / "slurm-job.toml", SlurmJobMetadata(
             job_id=int(job.id),
             name=steps_metadata[0].name,