Skip to content

Commit c6ffbf8

Browse files
authored
Mount NCCL_TOPO_FILE in NCCL test (#337)
1 parent 242680b commit c6ffbf8

File tree

2 files changed

+6
-7
lines changed

2 files changed

+6
-7
lines changed

src/cloudai/schema/test_template/nccl_test/slurm_command_gen_strategy.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,9 @@ def _parse_slurm_args(
3131
base_args = super()._parse_slurm_args(job_name_prefix, env_vars, cmd_args, tr)
3232

3333
container_mounts = ""
34-
if "NCCL_TOPO_FILE" in env_vars and "DOCKER_NCCL_TOPO_FILE" in env_vars:
35-
nccl_graph_path = Path(env_vars["NCCL_TOPO_FILE"]).resolve()
36-
nccl_graph_file = env_vars["DOCKER_NCCL_TOPO_FILE"]
37-
container_mounts = f"{nccl_graph_path}:{nccl_graph_file}"
34+
if "NCCL_TOPO_FILE" in env_vars:
35+
nccl_topo_file = Path(env_vars["NCCL_TOPO_FILE"]).resolve()
36+
container_mounts = f"{nccl_topo_file}:{nccl_topo_file}"
3837
elif "NCCL_TOPO_FILE" in env_vars:
3938
del env_vars["NCCL_TOPO_FILE"]
4039

tests/slurm_command_gen_strategy/test_nccl_slurm_command_gen_strategy.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,12 +35,12 @@ def cmd_gen_strategy(self, slurm_system: SlurmSystem) -> NcclTestSlurmCommandGen
3535
[
3636
(
3737
"nccl_test",
38-
{"NCCL_TOPO_FILE": "/path/to/topo", "DOCKER_NCCL_TOPO_FILE": "/docker/topo"},
38+
{"NCCL_TOPO_FILE": "/path/to/topo"},
3939
{"subtest_name": "all_reduce_perf", "docker_image_url": "fake_image_url"},
4040
2,
4141
["node1", "node2"],
4242
{
43-
"container_mounts": "/path/to/topo:/docker/topo",
43+
"container_mounts": "/path/to/topo:/path/to/topo",
4444
},
4545
),
4646
(
@@ -50,7 +50,7 @@ def cmd_gen_strategy(self, slurm_system: SlurmSystem) -> NcclTestSlurmCommandGen
5050
1,
5151
["node1"],
5252
{
53-
"container_mounts": "",
53+
"container_mounts": "/path/to/topo:/path/to/topo",
5454
},
5555
),
5656
],

0 commit comments

Comments
 (0)