Skip to content

Commit ddf3795

Browse files
authored
Fixes advisory [GHSA-rghg-q7wp-9767](GHSA-rghg-q7wp-9767). ### Description A few sentences describing the changes proposed in this pull request. ### Types of changes <!--- Put an `x` in all the boxes that apply, and remove the not applicable items --> - [x] Non-breaking change (fix or new feature that would not break existing functionality). - [ ] Breaking change (fix or new feature that would cause existing functionality to change). - [ ] New tests added to cover the changes. - [ ] Integration tests passed locally by running `./runtests.sh -f -u --net --coverage`. - [ ] Quick tests passed locally by running `./runtests.sh --quick --unittests --disttests`. - [ ] In-line docstrings updated. - [ ] Documentation updated, tested `make html` command in the `docs/` folder. --------- Signed-off-by: Eric Kerfoot <17726042+ericspod@users.noreply.github.com>
1 parent 24da7c7 commit ddf3795

3 files changed

Lines changed: 124 additions & 13 deletions

File tree

monai/apps/nnunet/nnunetv2_runner.py

Lines changed: 46 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
import glob
1616
import os
17+
import re
1718
import shlex
1819
import subprocess
1920
from typing import Any
@@ -34,6 +35,8 @@
3435

3536
__all__ = ["nnUNetV2Runner"]
3637

38+
DATASET_ID_FORMAT = r"Dataset[0-9]{3}|[0-9]+" # regex format for a valid nnUnet dataset name
39+
3740

3841
class nnUNetV2Runner: # noqa: N801
3942
"""
@@ -195,6 +198,13 @@ def __init__(
195198

196199
# dataset_name_or_id has to be a string
197200
self.dataset_name_or_id = str(self.input_info.pop("dataset_name_or_id", 1))
201+
self.dataset_name: str | None = None
202+
203+
# ensure the dataset name is a single identifier/number, this prevents code injection when composing commands
204+
if re.fullmatch(DATASET_ID_FORMAT, self.dataset_name_or_id) is None:
205+
raise ValueError(
206+
f"Value for dataset_name_or_id `{self.dataset_name_or_id}` not a valid dataset name or ID."
207+
)
198208

199209
try:
200210
from nnunetv2.utilities.dataset_name_id_conversion import maybe_convert_to_dataset_name
@@ -239,7 +249,7 @@ def convert_dataset(self):
239249

240250
from nnunetv2.utilities.dataset_name_id_conversion import maybe_convert_to_dataset_name
241251

242-
self.dataset_name = maybe_convert_to_dataset_name(int(self.dataset_name_or_id))
252+
self.dataset_name = maybe_convert_to_dataset_name(self.dataset_name_or_id)
243253

244254
datalist_json = ConfigParser.load_config_file(self.input_info.pop("datalist"))
245255

@@ -548,7 +558,7 @@ def train_single_model_command(
548558
Raises:
549559
ValueError: If gpu_id is an empty tuple or list.
550560
"""
551-
env = os.environ.copy()
561+
env: dict[str, str] = os.environ.copy()
552562
device_setting: str = "0"
553563
num_gpus = 1
554564
if isinstance(gpu_id, str):
@@ -574,22 +584,25 @@ def train_single_model_command(
574584

575585
cmd = [
576586
"nnUNetv2_train",
577-
f"{self.dataset_name_or_id}",
578-
f"{config}",
579-
f"{fold}",
587+
self.dataset_name_or_id,
588+
config,
589+
fold,
580590
"-tr",
581-
f"{self.trainer_class_name}",
591+
self.trainer_class_name,
582592
"-num_gpus",
583-
f"{num_gpus}",
593+
num_gpus,
584594
]
595+
585596
if self.export_validation_probabilities:
586597
cmd.append("--npz")
598+
587599
for _key, _value in kwargs.items():
588-
if _key == "p" or _key == "pretrained_weights":
589-
cmd.extend([f"-{_key}", f"{_value}"])
590-
else:
591-
cmd.extend([f"--{_key}", f"{_value}"])
592-
return cmd, env
600+
prefix = "-" if _key in {"p", "pretrained_weights"} else "--"
601+
cmd += [f"{prefix}{_key}", str(_value)]
602+
603+
cmd_str: list[str] = [str(c) for c in cmd]
604+
605+
return cmd_str, env
593606

594607
def train(
595608
self,
@@ -641,7 +654,14 @@ def train_parallel_cmd(
641654
None (all available GPUs).
642655
kwargs: this optional parameter allows you to specify additional arguments defined in the
643656
``train_single_model`` method.
657+
658+
Raises:
659+
ValueError: self.dataset_name must have a value, ie. when using an existing dataset or after creating one.
644660
"""
661+
662+
if self.dataset_name is None:
663+
raise ValueError(f"A valid dataset name must be given in {self.dataset_name=}.")
664+
645665
# unpack compressed files
646666
folder_names = []
647667
for root, _, files in os.walk(os.path.join(self.nnunet_preprocessed, self.dataset_name)):
@@ -696,7 +716,14 @@ def train_parallel(
696716
None (all available GPUs).
697717
kwargs: this optional parameter allows you to specify additional arguments defined in the
698718
``train_single_model`` method.
719+
720+
Raises:
721+
ValueError: self.dataset_name must have a value, ie. when using an existing dataset or after creating one.
699722
"""
723+
724+
if self.dataset_name is None:
725+
raise ValueError(f"A valid dataset name must be given in {self.dataset_name=}.")
726+
700727
all_cmds = self.train_parallel_cmd(configs=configs, gpu_id_for_all=gpu_id_for_all, **kwargs)
701728
for s, cmds in enumerate(all_cmds):
702729
for gpu_id, gpu_cmd in cmds.items():
@@ -908,7 +935,14 @@ def predict_ensemble_postprocessing(
908935
run_postprocessing: whether to conduct post-processing
909936
kwargs: this optional parameter allows you to specify additional arguments defined in the
910937
``predict`` method.
938+
939+
Raises:
940+
ValueError: self.dataset_name must have a value, ie. when using an existing dataset or after creating one.
911941
"""
942+
943+
if self.dataset_name is None:
944+
raise ValueError(f"A valid dataset name must be given in {self.dataset_name=}.")
945+
912946
from nnunetv2.ensembling.ensemble import ensemble_folders
913947
from nnunetv2.postprocessing.remove_connected_components import apply_postprocessing_to_folder
914948
from nnunetv2.utilities.file_path_utilities import get_output_folder

tests/integration/test_integration_nnunet_bundle.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,8 +97,10 @@ def test_nnunet_bundle(self) -> None:
9797
with skip_if_downloading_fails():
9898
runner.run(run_train=False, run_find_best_configuration=False, run_predict_ensemble_postprocessing=False)
9999

100+
self.assertIsNotNone(runner.dataset_name)
101+
100102
nnunet_trainer = get_nnunet_trainer(
101-
dataset_name_or_id=runner.dataset_name, fold=0, configuration="3d_fullres"
103+
dataset_name_or_id=str(runner.dataset_name), fold=0, configuration="3d_fullres"
102104
)
103105

104106
print("Max Epochs: ", nnunet_trainer.num_epochs)

tests/integration/test_integration_nnunetv2_runner.py

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,16 @@
1111

1212
from __future__ import annotations
1313

14+
import logging
1415
import os
1516
import tempfile
1617
import unittest
18+
from textwrap import dedent
1719

1820
import nibabel as nib
1921
import numpy as np
2022

23+
import monai.apps.nnunet.nnunetv2_runner
2124
from monai.apps.nnunet import nnUNetV2Runner
2225
from monai.bundle.config_parser import ConfigParser
2326
from monai.data import create_test_image_3d
@@ -27,6 +30,8 @@
2730
_, has_tb = optional_import("torch.utils.tensorboard", name="SummaryWriter")
2831
_, has_nnunet = optional_import("nnunetv2")
2932

33+
monai.apps.nnunet.nnunetv2_runner.logger.setLevel(logging.ERROR) # suppress warning logging to clean up test output
34+
3035
sim_datalist: dict[str, list[dict]] = {
3136
"testing": [{"image": "val_001.fake.nii.gz"}, {"image": "val_002.fake.nii.gz"}],
3237
"training": [
@@ -91,5 +96,75 @@ def tearDown(self) -> None:
9196
self.test_dir.cleanup()
9297

9398

99+
@skip_if_quick
100+
@unittest.skipIf(not has_nnunet, "no nnunetv2")
101+
class TestnnUNetV2RunnerSecurity(unittest.TestCase):
102+
def setUp(self) -> None:
103+
self.test_dir = tempfile.TemporaryDirectory()
104+
test_path = self.test_dir.name
105+
106+
self.good_yml1 = os.path.join(test_path, "good1.yml")
107+
self.good_yml2 = os.path.join(test_path, "good2.yml")
108+
self.inject_yml = os.path.join(test_path, "test.yml")
109+
110+
good_yml_content1 = """
111+
dataset_name_or_id: Dataset123
112+
dataroot: ./data
113+
datalist: ./lists/task4.json
114+
work_dir: ./work
115+
nnunet_raw: ./nnUNet_raw
116+
nnunet_preprocessed: ./nnUNet_preprocessed
117+
nnunet_results: ./nnUNet_results
118+
"""
119+
120+
with open(self.good_yml1, "w") as o:
121+
o.write(dedent(good_yml_content1))
122+
123+
good_yml_content2 = """
124+
dataset_name_or_id: 123
125+
dataroot: ./data
126+
datalist: ./lists/task4.json
127+
work_dir: ./work
128+
nnunet_raw: ./nnUNet_raw
129+
nnunet_preprocessed: ./nnUNet_preprocessed
130+
nnunet_results: ./nnUNet_results
131+
"""
132+
133+
with open(self.good_yml2, "w") as o:
134+
o.write(dedent(good_yml_content2))
135+
136+
# define a config file with code-injecting dataset name
137+
injecting_yml_content = """
138+
dataset_name_or_id: '4 & echo "This is exploited" > "./test.txt" & rem'
139+
dataroot: ./data
140+
datalist: ./lists/task4.json
141+
work_dir: ./work
142+
nnunet_raw: ./nnUNet_raw
143+
nnunet_preprocessed: ./nnUNet_preprocessed
144+
nnunet_results: ./nnUNet_results
145+
"""
146+
147+
with open(self.inject_yml, "w") as o:
148+
o.write(dedent(injecting_yml_content))
149+
150+
def test_nnunetv2runner_good_dataset_name(self) -> None:
151+
"""
152+
Test the dataset name given must conform to the nnUNet requirement of being an int or "Dataset###".
153+
"""
154+
for ds in [self.good_yml1, self.good_yml2]:
155+
with self.subTest(f"Testing {os.path.basename(ds)}"):
156+
nnUNetV2Runner(input_config=ds, trainer_class_name="nnUNetTrainer")
157+
158+
def test_nnunetv2runner_bad_dataset_name(self) -> None:
159+
"""
160+
Test the dataset name given must conform to the nnUNet requirement of being an int or "Dataset###".
161+
"""
162+
with self.assertRaises(ValueError):
163+
nnUNetV2Runner(input_config=self.inject_yml, trainer_class_name="nnUNetTrainer")
164+
165+
def tearDown(self) -> None:
166+
self.test_dir.cleanup()
167+
168+
94169
if __name__ == "__main__":
95170
unittest.main()

0 commit comments

Comments
 (0)