Skip to content
This repository was archived by the owner on Mar 21, 2024. It is now read-only.

Commit 5b21840

Browse files
authored
ENH: Upgrading package versions for security patches (#757)
* 🚧 πŸ’₯ Update to secure versions of packages * ⬆️ Upgrade hi-ml version * 🏷️ Fix mypy and update windows env * βœ… Update current epoch value in lightning tests * πŸ› Fix wnidows line endings * βœ… Remove checkpoint load epoch check * πŸ“Œ Lock env anew * 🎨 πŸ› Add merge_conda_files() to common_util * βœ… Add logging to tests * 🚧 Update VarINetWithImageLogging logger syntax * βœ… Fix Train2Nodes tests * βœ… Remove cwd change, update CIFAR SSL metrics * ⚰️ Remove unnecessary PL backwards compatibility * πŸ“Œ Lock env * πŸ“Œ Upgrade to hi-ml v0.2.5 * πŸ“Œ Testing lightning 1.6.5 * ♻️ Resolve PR comments
1 parent 7894498 commit 5b21840

15 files changed

+210
-142
lines changed

β€ŽInnerEye/Common/common_util.py

Lines changed: 80 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,14 @@
1212
from enum import Enum
1313
from functools import wraps
1414
from pathlib import Path
15-
from typing import Any, Callable, Generator, Iterable, List, Optional, Union
15+
from typing import Any, Callable, Dict, Generator, Iterable, List, Optional, Union
16+
17+
import conda_merge
18+
import ruamel.yaml
19+
from health_azure.utils import (
20+
CONDA_CHANNELS, CONDA_DEPENDENCIES, CONDA_NAME, CONDA_PIP, CondaDependencies, PinnedOperator,
21+
_log_conda_dependencies_stats, _retrieve_unique_deps, is_conda_file_with_pip_include, is_pip_include_dependency
22+
)
1623

1724
from InnerEye.Common.fixed_paths import repository_root_directory
1825
from InnerEye.Common.type_annotations import PathOrString
@@ -427,3 +434,75 @@ def change_working_directory(path_or_str: PathOrString) -> Generator:
427434
os.chdir(new_path)
428435
yield
429436
os.chdir(old_path)
437+
438+
439+
def merge_conda_files(
440+
conda_files: List[Path],
441+
result_file: Path,
442+
pip_files: Optional[List[Path]] = None,
443+
) -> None:
444+
"""
445+
Merges the given Conda environment files using the conda_merge package, optionally adds any
446+
dependencies from pip requirements files, and writes the merged file to disk.
447+
448+
:param conda_files: The Conda environment files to read.
449+
:param result_file: The location where the merge results should be written.
450+
:param pip_files: An optional list of one or more pip requirements files including extra dependencies.
451+
"""
452+
env_definitions: List[Any] = []
453+
for file in conda_files:
454+
_, pip_without_include = is_conda_file_with_pip_include(file)
455+
env_definitions.append(pip_without_include)
456+
unified_definition = {}
457+
458+
extra_pip_deps = []
459+
for pip_file in pip_files or []:
460+
additional_pip_deps = [d for d in pip_file.read_text().split("\n") if d and not is_pip_include_dependency(d)]
461+
extra_pip_deps.extend(additional_pip_deps)
462+
463+
name = conda_merge.merge_names(env.get(CONDA_NAME) for env in env_definitions)
464+
if name:
465+
unified_definition[CONDA_NAME] = name
466+
467+
try:
468+
channels = conda_merge.merge_channels(env.get(CONDA_CHANNELS) for env in env_definitions)
469+
except conda_merge.MergeError:
470+
logging.error("Failed to merge channel priorities.")
471+
raise
472+
if channels:
473+
unified_definition[CONDA_CHANNELS] = channels
474+
475+
try:
476+
deps_to_merge = [env.get(CONDA_DEPENDENCIES) for env in env_definitions]
477+
if len(extra_pip_deps) > 0:
478+
deps_to_merge.append([{CONDA_PIP: extra_pip_deps}])
479+
deps = conda_merge.merge_dependencies(deps_to_merge)
480+
481+
# Get conda dependencies and pip dependencies from specification
482+
pip_deps_entries = [d for d in deps if isinstance(d, dict) and CONDA_PIP in d] # type: ignore
483+
if len(pip_deps_entries) == 0:
484+
raise ValueError("Didn't find a dictionary with the key 'pip' in the list of dependencies")
485+
pip_deps_entry: Dict[str, List[str]] = pip_deps_entries[0]
486+
pip_deps = pip_deps_entry[CONDA_PIP]
487+
# temporarily remove pip dependencies from deps to be added back after deduplicaton
488+
deps.remove(pip_deps_entry)
489+
490+
# remove all non-pip duplicates from the list of dependencies
491+
unique_deps = _retrieve_unique_deps(deps, PinnedOperator.CONDA)
492+
493+
unique_pip_deps = sorted(_retrieve_unique_deps(pip_deps, PinnedOperator.PIP))
494+
495+
# finally add back the deduplicated list of dependencies
496+
unique_deps.append({CONDA_PIP: unique_pip_deps}) # type: ignore
497+
498+
except conda_merge.MergeError:
499+
logging.error("Failed to merge dependencies.")
500+
raise
501+
if unique_deps:
502+
unified_definition[CONDA_DEPENDENCIES] = unique_deps
503+
else:
504+
raise ValueError("No dependencies found in any of the conda files.")
505+
506+
with result_file.open("w") as f:
507+
ruamel.yaml.dump(unified_definition, f, indent=2, default_flow_style=False)
508+
_log_conda_dependencies_stats(CondaDependencies(result_file), "Merged Conda environment")

β€ŽInnerEye/ML/SSL/lightning_modules/ssl_online_evaluator.py

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -97,16 +97,10 @@ def on_pretrain_routine_start(self, trainer: pl.Trainer, pl_module: pl.Lightning
9797
p=self.drop_p,
9898
n_hidden=self.hidden_dim)
9999
self.evaluator.to(pl_module.device)
100-
if hasattr(trainer, "accelerator_connector"):
101-
# This works with Lightning 1.3.8
102-
accelerator = trainer.accelerator_connector
103-
elif hasattr(trainer, "_accelerator_connector"):
104-
# This works with Lightning 1.5.5
105-
accelerator = trainer._accelerator_connector
106-
else:
107-
raise ValueError("Unable to retrieve the accelerator information")
100+
accelerator = trainer._accelerator_connector
101+
108102
if accelerator.is_distributed:
109-
if accelerator.use_ddp:
103+
if accelerator.strategy.strategy_name == "ddp":
110104
self.evaluator = SyncBatchNorm.convert_sync_batchnorm(self.evaluator)
111105
self.evaluator = DistributedDataParallel(self.evaluator, device_ids=[pl_module.device]) # type: ignore
112106
else:

β€ŽInnerEye/ML/configs/other/fastmri_varnet.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,8 @@ class VarNetWithImageLogging(VarNetModule):
3232
"""
3333

3434
def log_image(self, name: str, image: torch.Tensor) -> None:
35-
experiments = self.logger.experiment if isinstance(self.logger.experiment, list) \
36-
else [self.logger.experiment]
35+
experiments = self.loggers[0].experiment if isinstance(self.loggers[0].experiment, list) \
36+
else [self.loggers[0].experiment]
3737
for experiment in experiments:
3838
if isinstance(experiment, SummaryWriter):
3939
experiment.add_image(name, image, global_step=self.global_step)

β€ŽInnerEye/ML/lightning_base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -289,7 +289,7 @@ def on_train_end(self) -> None:
289289
This hook is called at the very end of training. Use that to write the very last set of training and
290290
validation metrics from the StoringLogger to disk.
291291
"""
292-
self.read_epoch_results_from_logger_and_store(epoch=self.current_epoch)
292+
self.read_epoch_results_from_logger_and_store(epoch=self.current_epoch-1)
293293

294294
@rank_zero_only
295295
def read_epoch_results_from_logger_and_store(self, epoch: int) -> None:

β€ŽInnerEye/ML/model_training.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ def create_lightning_trainer(container: LightningContainer,
120120
save_top_k=0)
121121
recovery_checkpoint_callback = ModelCheckpoint(dirpath=str(container.checkpoint_folder),
122122
filename=AUTOSAVE_CHECKPOINT_FILE_NAME,
123-
every_n_val_epochs=container.autosave_every_n_val_epochs,
123+
every_n_epochs=container.autosave_every_n_val_epochs,
124124
save_last=False)
125125
callbacks: List[Callback] = [
126126
last_checkpoint_callback,
@@ -264,11 +264,10 @@ def model_train(checkpoint_path: Optional[Path],
264264
lightning_model.storing_logger = storing_logger
265265

266266
logging.info("Starting training")
267-
# When training models that are not built-in InnerEye models, we have no guarantee that they write
268-
# files to the right folder. Best guess is to change the current working directory to where files should go.
269-
with change_working_directory(container.outputs_folder):
270-
trainer.fit(lightning_model, datamodule=data_module)
271-
trainer.logger.close() # type: ignore
267+
268+
trainer.fit(lightning_model, datamodule=data_module)
269+
trainer.logger.close() # type: ignore
270+
272271
world_size = getattr(trainer, "world_size", 0)
273272
is_azureml_run = not is_offline_run_context(RUN_CONTEXT)
274273
# Per-subject model outputs for regression models are written per rank, and need to be aggregated here.

β€ŽInnerEye/ML/run_ml.py

Lines changed: 29 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -15,33 +15,37 @@
1515
import torch.multiprocessing
1616
from azureml._restclient.constants import RunStatus
1717
from azureml.core import Model, Run, model
18+
from health_azure import AzureRunInfo
19+
from health_azure.utils import ENVIRONMENT_VERSION, create_run_recovery_id, is_global_rank_zero
1820
from pytorch_lightning import LightningModule, seed_everything
1921
from pytorch_lightning.core.datamodule import LightningDataModule
2022
from torch.utils.data import DataLoader
2123

2224
from InnerEye.Azure import azure_util
2325
from InnerEye.Azure.azure_config import AzureConfig
2426
from InnerEye.Azure.azure_runner import ENV_OMPI_COMM_WORLD_RANK, get_git_tags
25-
from InnerEye.Azure.azure_util import CROSS_VALIDATION_SPLIT_INDEX_TAG_KEY, DEFAULT_CROSS_VALIDATION_SPLIT_INDEX, \
26-
EFFECTIVE_RANDOM_SEED_KEY_NAME, IS_ENSEMBLE_KEY_NAME, MODEL_ID_KEY_NAME, PARENT_RUN_CONTEXT, \
27-
PARENT_RUN_ID_KEY_NAME, RUN_CONTEXT, RUN_RECOVERY_FROM_ID_KEY_NAME, RUN_RECOVERY_ID_KEY_NAME, \
28-
get_all_environment_files, is_offline_run_context
27+
from InnerEye.Azure.azure_util import (
28+
CROSS_VALIDATION_SPLIT_INDEX_TAG_KEY, DEFAULT_CROSS_VALIDATION_SPLIT_INDEX, EFFECTIVE_RANDOM_SEED_KEY_NAME,
29+
IS_ENSEMBLE_KEY_NAME, MODEL_ID_KEY_NAME, PARENT_RUN_CONTEXT, PARENT_RUN_ID_KEY_NAME, RUN_CONTEXT,
30+
RUN_RECOVERY_FROM_ID_KEY_NAME, RUN_RECOVERY_ID_KEY_NAME, get_all_environment_files, is_offline_run_context
31+
)
2932
from InnerEye.Common import fixed_paths
30-
from InnerEye.Common.common_util import (BASELINE_COMPARISONS_FOLDER, BASELINE_WILCOXON_RESULTS_FILE,
31-
CROSSVAL_RESULTS_FOLDER, ENSEMBLE_SPLIT_NAME, FULL_METRICS_DATAFRAME_FILE,
32-
METRICS_AGGREGATES_FILE, ModelProcessing,
33-
OTHER_RUNS_SUBDIR_NAME, SCATTERPLOTS_SUBDIR_NAME, SUBJECT_METRICS_FILE_NAME,
34-
change_working_directory, get_best_epoch_results_path, is_windows,
35-
logging_section, print_exception, remove_file_or_directory)
33+
from InnerEye.Common.common_util import (
34+
BASELINE_COMPARISONS_FOLDER, BASELINE_WILCOXON_RESULTS_FILE, CROSSVAL_RESULTS_FOLDER, ENSEMBLE_SPLIT_NAME,
35+
FULL_METRICS_DATAFRAME_FILE, METRICS_AGGREGATES_FILE, OTHER_RUNS_SUBDIR_NAME, SCATTERPLOTS_SUBDIR_NAME,
36+
SUBJECT_METRICS_FILE_NAME, ModelProcessing, change_working_directory, get_best_epoch_results_path,
37+
is_windows, logging_section, merge_conda_files, print_exception, remove_file_or_directory
38+
)
3639
from InnerEye.Common.fixed_paths import INNEREYE_PACKAGE_NAME, PYTHON_ENVIRONMENT_NAME
3740
from InnerEye.Common.type_annotations import PathOrString
3841
from InnerEye.ML.baselines_util import compare_folders_and_run_outputs
39-
from InnerEye.ML.common import CHECKPOINT_FOLDER, EXTRA_RUN_SUBFOLDER, FINAL_ENSEMBLE_MODEL_FOLDER, \
40-
FINAL_MODEL_FOLDER, \
41-
ModelExecutionMode
42+
from InnerEye.ML.common import (
43+
CHECKPOINT_FOLDER, EXTRA_RUN_SUBFOLDER, FINAL_ENSEMBLE_MODEL_FOLDER, FINAL_MODEL_FOLDER, ModelExecutionMode
44+
)
4245
from InnerEye.ML.config import SegmentationModelBase
43-
from InnerEye.ML.deep_learning_config import DeepLearningConfig, ModelCategory, MultiprocessingStartMethod, \
44-
load_checkpoint
46+
from InnerEye.ML.deep_learning_config import (
47+
DeepLearningConfig, ModelCategory, MultiprocessingStartMethod, load_checkpoint
48+
)
4549
from InnerEye.ML.lightning_base import InnerEyeContainer
4650
from InnerEye.ML.lightning_container import InnerEyeInference, LightningContainer
4751
from InnerEye.ML.lightning_loggers import StoringLogger
@@ -50,16 +54,16 @@
5054
from InnerEye.ML.model_inference_config import ModelInferenceConfig
5155
from InnerEye.ML.model_testing import model_test
5256
from InnerEye.ML.model_training import create_lightning_trainer, model_train
53-
from InnerEye.ML.reports.notebook_report import generate_classification_crossval_notebook, \
54-
generate_classification_multilabel_notebook, generate_classification_notebook, generate_segmentation_notebook, \
55-
get_ipynb_report_name, reports_folder
57+
from InnerEye.ML.reports.notebook_report import (
58+
generate_classification_crossval_notebook, generate_classification_multilabel_notebook,
59+
generate_classification_notebook, generate_segmentation_notebook, get_ipynb_report_name, reports_folder
60+
)
5661
from InnerEye.ML.scalar_config import ScalarModelBase
5762
from InnerEye.ML.utils.checkpoint_handling import CheckpointHandler, download_all_checkpoints_from_run
5863
from InnerEye.ML.visualizers import activation_maps
59-
from InnerEye.ML.visualizers.plot_cross_validation import \
64+
from InnerEye.ML.visualizers.plot_cross_validation import (
6065
get_config_and_results_for_offline_runs, plot_cross_validation_from_files
61-
from health_azure import AzureRunInfo
62-
from health_azure.utils import ENVIRONMENT_VERSION, create_run_recovery_id, is_global_rank_zero, merge_conda_files
66+
)
6367

6468
ModelDeploymentHookSignature = Callable[[LightningContainer, AzureConfig, Model, ModelProcessing], Any]
6569
PostCrossValidationHookSignature = Callable[[ModelConfigBase, Path], None]
@@ -797,8 +801,10 @@ def create_ensemble_model_and_run_inference(self) -> None:
797801
remove_file_or_directory(other_runs_dir)
798802

799803
def plot_cross_validation_and_upload_results(self) -> Path:
800-
from InnerEye.ML.visualizers.plot_cross_validation import crossval_config_from_model_config, \
801-
plot_cross_validation, unroll_aggregate_metrics
804+
from InnerEye.ML.visualizers.plot_cross_validation import (
805+
crossval_config_from_model_config, plot_cross_validation, unroll_aggregate_metrics
806+
)
807+
802808
# perform aggregation as cross val splits are now ready
803809
plot_crossval_config = crossval_config_from_model_config(self.innereye_config)
804810
plot_crossval_config.run_recovery_id = PARENT_RUN_CONTEXT.tags[RUN_RECOVERY_ID_KEY_NAME]

β€ŽInnerEye/ML/runner.py

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -24,36 +24,36 @@
2424
# in a submodule
2525
fixed_paths.add_submodules_to_path()
2626

27+
import matplotlib
2728
from azureml._base_sdk_common import user_agent
2829
from azureml._restclient.constants import RunStatus
2930
from azureml.core import Run, ScriptRunConfig
3031
from health_azure import AzureRunInfo, submit_to_azure_if_needed
31-
from health_azure.utils import create_run_recovery_id, is_global_rank_zero, is_local_rank_zero, merge_conda_files, \
32-
to_azure_friendly_string
33-
import matplotlib
32+
from health_azure.utils import create_run_recovery_id, is_global_rank_zero, is_local_rank_zero, to_azure_friendly_string
3433

35-
from InnerEye.Azure.tensorboard_monitor import AMLTensorBoardMonitorConfig, monitor
3634
from InnerEye.Azure import azure_util
3735
from InnerEye.Azure.azure_config import AzureConfig, ParserResult, SourceConfig
38-
from InnerEye.Azure.azure_runner import (DEFAULT_DOCKER_BASE_IMAGE, create_dataset_configs, create_experiment_name,
39-
create_runner_parser,
40-
get_git_tags,
41-
parse_args_and_add_yaml_variables,
42-
parse_arguments, additional_run_tags,
43-
set_environment_variables_for_multi_node)
44-
from InnerEye.Azure.azure_util import (RUN_CONTEXT, RUN_RECOVERY_ID_KEY_NAME, get_all_environment_files,
45-
is_offline_run_context)
36+
from InnerEye.Azure.azure_runner import (
37+
DEFAULT_DOCKER_BASE_IMAGE, additional_run_tags, create_dataset_configs,
38+
create_experiment_name, create_runner_parser, get_git_tags,
39+
parse_args_and_add_yaml_variables, parse_arguments, set_environment_variables_for_multi_node
40+
)
41+
from InnerEye.Azure.azure_util import (
42+
RUN_CONTEXT, RUN_RECOVERY_ID_KEY_NAME, get_all_environment_files, is_offline_run_context
43+
)
4644
from InnerEye.Azure.run_pytest import download_pytest_result, run_pytest
47-
from InnerEye.Common.common_util import (FULL_METRICS_DATAFRAME_FILE, METRICS_AGGREGATES_FILE,
48-
is_linux, logging_to_stdout)
45+
from InnerEye.Azure.tensorboard_monitor import AMLTensorBoardMonitorConfig, monitor
46+
from InnerEye.Common.common_util import (
47+
FULL_METRICS_DATAFRAME_FILE, METRICS_AGGREGATES_FILE, is_linux, logging_to_stdout, merge_conda_files
48+
)
4949
from InnerEye.Common.generic_parsing import GenericConfig
5050
from InnerEye.ML.common import DATASET_CSV_FILE_NAME
5151
from InnerEye.ML.deep_learning_config import DeepLearningConfig
5252
from InnerEye.ML.lightning_base import InnerEyeContainer
53+
from InnerEye.ML.lightning_container import LightningContainer
5354
from InnerEye.ML.model_config_base import ModelConfigBase
5455
from InnerEye.ML.run_ml import MLRunner, ModelDeploymentHookSignature, PostCrossValidationHookSignature
5556
from InnerEye.ML.utils.config_loader import ModelConfigLoader
56-
from InnerEye.ML.lightning_container import LightningContainer
5757

5858
# We change the current working directory before starting the actual training. However, this throws off starting
5959
# the child training threads because sys.argv[0] is a relative path when running in AzureML. Turn that into an absolute

0 commit comments

Comments
Β (0)