|
48 | 48 | from ._core.test_template_strategy import TestTemplateStrategy |
49 | 49 | from .installer.kubernetes_installer import KubernetesInstaller |
50 | 50 | from .installer.lsf_installer import LSFInstaller |
| 51 | +from .installer.runai_installer import RunAIInstaller |
51 | 52 | from .installer.slurm_installer import SlurmInstaller |
52 | 53 | from .installer.standalone_installer import StandaloneInstaller |
53 | 54 | from .parser import Parser |
54 | 55 | from .runner.kubernetes.kubernetes_runner import KubernetesRunner |
55 | 56 | from .runner.lsf.lsf_runner import LSFRunner |
| 57 | +from .runner.runai.runai_runner import RunAIRunner |
56 | 58 | from .runner.slurm.slurm_runner import SlurmRunner |
57 | 59 | from .runner.standalone.standalone_runner import StandaloneRunner |
58 | 60 | from .systems.kubernetes.kubernetes_system import KubernetesSystem |
59 | 61 | from .systems.lsf.lsf_system import LSFSystem |
| 62 | +from .systems.runai.runai_system import RunAISystem |
60 | 63 | from .systems.slurm.slurm_system import SlurmSystem |
61 | 64 | from .systems.standalone_system import StandaloneSystem |
62 | 65 | from .workloads.chakra_replay import ( |
|
91 | 94 | NcclTestJobStatusRetrievalStrategy, |
92 | 95 | NcclTestKubernetesJsonGenStrategy, |
93 | 96 | NcclTestPerformanceReportGenerationStrategy, |
| 97 | + NcclTestRunAIJsonGenStrategy, |
94 | 98 | NcclTestSlurmCommandGenStrategy, |
95 | 99 | ) |
96 | 100 | from .workloads.nemo_launcher import ( |
|
126 | 130 | Registry().add_runner("kubernetes", KubernetesRunner) |
127 | 131 | Registry().add_runner("standalone", StandaloneRunner) |
128 | 132 | Registry().add_runner("lsf", LSFRunner) |
| 133 | +Registry().add_runner("runai", RunAIRunner) |
129 | 134 |
|
130 | 135 | Registry().add_strategy( |
131 | 136 | CommandGenStrategy, [StandaloneSystem], [SleepTestDefinition], SleepStandaloneCommandGenStrategy |
|
134 | 139 | Registry().add_strategy(CommandGenStrategy, [SlurmSystem], [SleepTestDefinition], SleepSlurmCommandGenStrategy) |
135 | 140 | Registry().add_strategy(JsonGenStrategy, [KubernetesSystem], [SleepTestDefinition], SleepKubernetesJsonGenStrategy) |
136 | 141 | Registry().add_strategy(JsonGenStrategy, [KubernetesSystem], [NCCLTestDefinition], NcclTestKubernetesJsonGenStrategy) |
| 142 | +Registry().add_strategy(JsonGenStrategy, [RunAISystem], [NCCLTestDefinition], NcclTestRunAIJsonGenStrategy) |
137 | 143 | Registry().add_strategy(GradingStrategy, [SlurmSystem], [NCCLTestDefinition], NcclTestGradingStrategy) |
138 | 144 |
|
139 | 145 | Registry().add_strategy( |
|
164 | 170 | [GPTTestDefinition, GrokTestDefinition, NemotronTestDefinition], |
165 | 171 | JaxToolboxSlurmCommandGenStrategy, |
166 | 172 | ) |
| 173 | + |
167 | 174 | Registry().add_strategy( |
168 | 175 | JobIdRetrievalStrategy, |
169 | 176 | [SlurmSystem], |
|
184 | 191 | Registry().add_strategy( |
185 | 192 | JobIdRetrievalStrategy, [StandaloneSystem], [SleepTestDefinition], StandaloneJobIdRetrievalStrategy |
186 | 193 | ) |
187 | | - |
188 | 194 | Registry().add_strategy(JobIdRetrievalStrategy, [LSFSystem], [SleepTestDefinition], LSFJobIdRetrievalStrategy) |
| 195 | + |
189 | 196 | Registry().add_strategy( |
190 | 197 | JobStatusRetrievalStrategy, |
191 | 198 | [KubernetesSystem], |
|
221 | 228 | Registry().add_strategy( |
222 | 229 | JobStatusRetrievalStrategy, [StandaloneSystem], [SleepTestDefinition], DefaultJobStatusRetrievalStrategy |
223 | 230 | ) |
224 | | - |
225 | 231 | Registry().add_strategy( |
226 | 232 | JobStatusRetrievalStrategy, [LSFSystem], [SleepTestDefinition], DefaultJobStatusRetrievalStrategy |
227 | 233 | ) |
| 234 | +Registry().add_strategy( |
| 235 | + JobStatusRetrievalStrategy, |
| 236 | + [RunAISystem], |
| 237 | + [NCCLTestDefinition], |
| 238 | + DefaultJobStatusRetrievalStrategy, |
| 239 | +) |
| 240 | + |
228 | 241 | Registry().add_strategy(CommandGenStrategy, [SlurmSystem], [UCCTestDefinition], UCCTestSlurmCommandGenStrategy) |
229 | 242 |
|
230 | 243 | Registry().add_strategy(GradingStrategy, [SlurmSystem], [ChakraReplayTestDefinition], ChakraReplayGradingStrategy) |
|
239 | 252 | Registry().add_installer("standalone", StandaloneInstaller) |
240 | 253 | Registry().add_installer("kubernetes", KubernetesInstaller) |
241 | 254 | Registry().add_installer("lsf", LSFInstaller) |
| 255 | +Registry().add_installer("runai", RunAIInstaller) |
242 | 256 |
|
243 | 257 | Registry().add_system("slurm", SlurmSystem) |
244 | 258 | Registry().add_system("standalone", StandaloneSystem) |
245 | 259 | Registry().add_system("kubernetes", KubernetesSystem) |
246 | 260 | Registry().add_system("lsf", LSFSystem) |
| 261 | +Registry().add_system("runai", RunAISystem) |
247 | 262 |
|
248 | 263 | Registry().add_test_definition("UCCTest", UCCTestDefinition) |
249 | 264 | Registry().add_test_definition("NcclTest", NCCLTestDefinition) |
|
298 | 313 | "PythonExecutable", |
299 | 314 | "ReportGenerationStrategy", |
300 | 315 | "Reporter", |
| 316 | + "RunAISystem", |
301 | 317 | "Runner", |
302 | 318 | "System", |
303 | 319 | "SystemConfigParsingError", |
|
0 commit comments