Skip to content

Commit 2b6181b

Browse files
authored
Remove assigning null when the value is null (NeMo launcher) (#250)
* Remove assigning null when the value is null * Reflect Andrei's comments
1 parent 7455f42 commit 2b6181b

File tree

3 files changed

+5
-36
lines changed

3 files changed

+5
-36
lines changed

src/cloudai/schema/test_template/nemo_launcher/slurm_command_gen_strategy.py

Lines changed: 2 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -135,18 +135,9 @@ def _set_node_config(self, nodes: List[str], num_nodes: int) -> None:
135135
self.final_cmd_args["training.trainer.num_nodes"] = str(len(nodes)) if nodes else num_nodes
136136

137137
def _validate_data_config(self) -> None:
138-
"""Validate the data directory and prefix configuration for non-mock environments."""
138+
"""Validate the data prefix configuration for non-mock environments."""
139139
if self.final_cmd_args.get("training.model.data.data_impl") != "mock":
140-
data_dir = self.final_cmd_args.get("data_dir")
141140
data_prefix = self.final_cmd_args.get("training.model.data.data_prefix")
142-
143-
if not data_dir or data_dir == "~":
144-
raise ValueError(
145-
"The 'data_dir' field of the NeMo launcher test contains an invalid placeholder '~'. "
146-
"Please provide a valid path to the dataset in the test schema TOML file. "
147-
"The 'data_dir' field must point to an actual dataset location."
148-
)
149-
150141
if data_prefix == "[]":
151142
raise ValueError(
152143
"The 'data_prefix' field of the NeMo launcher test is missing or empty. "
@@ -198,10 +189,7 @@ def _generate_cmd_args_str(self, args: Dict[str, str], nodes: List[str]) -> str:
198189
value = f"\\'{value}\\'"
199190
env_var_str_parts.append(f"+{key}={value}")
200191
else:
201-
if value == "~":
202-
cmd_arg_str_parts.append(f"~{key}=null")
203-
else:
204-
cmd_arg_str_parts.append(f"{key}={value}")
192+
cmd_arg_str_parts.append(f"{key}={value}")
205193

206194
if nodes:
207195
nodes_str = ",".join(nodes)

src/cloudai/test_definitions/nemo_launcher.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,6 @@ class NeMoLauncherCmdArgs(CmdArgs):
9797
repository_commit_hash: str = "cf411a9ede3b466677df8ee672bcc6c396e71e1a"
9898
docker_image_url: str = "nvcr.io/nvidia/nemo:24.01.01"
9999
stages: str = '["training"]'
100-
data_dir: str = "~"
101100
numa_mapping: NumaMapping = Field(default_factory=NumaMapping)
102101
cluster: Cluster = Field(default_factory=Cluster)
103102
training: Training = Field(default_factory=Training)

tests/slurm_command_gen_strategy/test_nemo_launcher.py

Lines changed: 3 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -135,40 +135,22 @@ def test_gpus_per_node_value(self, nemo_cmd_gen: NeMoLauncherSlurmCommandGenStra
135135

136136
assert "cluster.gpus_per_node=null" in cmd
137137

138-
def test_argument_with_tilde_value(self, nemo_cmd_gen: NeMoLauncherSlurmCommandGenStrategy, nemo_test_run: TestRun):
139-
tdef: NeMoLauncherTestDefinition = cast(NeMoLauncherTestDefinition, nemo_test_run.test.test_definition)
140-
tdef.cmd_args.training.model.micro_batch_size = "~" # type: ignore
141-
142-
cmd = nemo_cmd_gen.gen_exec_command(nemo_test_run)
143-
assert "~training.model.micro_batch_size=null" in cmd
144-
145138
def test_data_impl_mock_skips_checks(
146139
self, nemo_cmd_gen: NeMoLauncherSlurmCommandGenStrategy, nemo_test_run: TestRun
147140
):
148141
tdef: NeMoLauncherTestDefinition = cast(NeMoLauncherTestDefinition, nemo_test_run.test.test_definition)
149-
tdef.cmd_args.data_dir = "DATA_DIR"
150-
142+
tdef.extra_cmd_args = {"data_dir": "DATA_DIR"}
151143
cmd = nemo_cmd_gen.gen_exec_command(nemo_test_run)
152-
assert "data_dir" in cmd
144+
assert "data_dir=DATA_DIR" in cmd
153145

154146
def test_data_dir_and_data_prefix_validation(
155147
self, nemo_cmd_gen: NeMoLauncherSlurmCommandGenStrategy, nemo_test_run: TestRun
156148
):
157149
tdef: NeMoLauncherTestDefinition = cast(NeMoLauncherTestDefinition, nemo_test_run.test.test_definition)
158150
tdef.cmd_args.training.model.data.data_impl = "not_mock"
159151
tdef.cmd_args.training.model.data.data_prefix = "[]"
152+
tdef.extra_cmd_args = {"data_dir": "DATA_DIR"}
160153

161-
with pytest.raises(
162-
ValueError,
163-
match=(
164-
"The 'data_dir' field of the NeMo launcher test contains an invalid placeholder '~'. "
165-
"Please provide a valid path to the dataset in the test schema TOML file. "
166-
"The 'data_dir' field must point to an actual dataset location."
167-
),
168-
):
169-
nemo_cmd_gen.gen_exec_command(nemo_test_run)
170-
171-
tdef.cmd_args.data_dir = "/fake/data_dir"
172154
with pytest.raises(ValueError, match="The 'data_prefix' field of the NeMo launcher test is missing or empty."):
173155
nemo_cmd_gen.gen_exec_command(nemo_test_run)
174156

0 commit comments

Comments
 (0)