Save model config to W&B (#225)

jemrobinson · web-flow · commit 9239215bece2 · 2026-02-25T10:45:00.000Z
* 👽 Write the model_config to the files subdirectory and tell W&amp;B to save it

* ✅ Fix tests to deal with new config location

* 🚚 Move get_wandb_run method into utils as it might be more widely useful
diff --git a/icenet_mp/model_service.py b/icenet_mp/model_service.py
@@ -8,16 +8,14 @@
 from lightning import Callback, Trainer
 from lightning.fabric.utilities import suggested_max_num_workers
 from lightning.pytorch.callbacks import ModelCheckpoint
-from lightning.pytorch.loggers import WandbLogger
 from omegaconf import DictConfig, OmegaConf
 from wandb.sdk.lib.runid import generate_id
-from wandb.wandb_run import Run
 
 from icenet_mp.callbacks import UnconditionalCheckpoint
 from icenet_mp.data_loaders import CommonDataModule
 from icenet_mp.models.base_model import BaseModel
 from icenet_mp.types import SupportsMetadata
-from icenet_mp.utils import get_device_name, get_timestamp
+from icenet_mp.utils import get_device_name, get_timestamp, get_wandb_run
 
 if TYPE_CHECKING:
     from lightning.pytorch.loggers import Logger as LightningLogger
@@ -79,7 +77,7 @@ def from_checkpoint(
         # Build a combined model configuration where the command line config takes
         # precedence except for the "model", "predict" and "train" keys which are
         # related to training the model.
-        config_path = checkpoint_path.parent.parent / "model_config.yaml"
+        config_path = checkpoint_path.parent.parent / "files" / "model_config.yaml"
         try:
             # Load the model configuration from the checkpoint directory
             ckpt_config = DictConfig(OmegaConf.load(config_path))
@@ -132,16 +130,12 @@ def model(self) -> BaseModel:
 
     @property
     def run_directory(self) -> Path:
-        """Get run directory from wandb logger or generate one in the same format."""
+        """Get run directory from Wandb or generate one in the same format."""
         if not self.run_directory_:
-            # Get the run directory from the WandbLogger if it exists
-            for lightning_logger in self.trainer.loggers:
-                if not isinstance(lightning_logger, WandbLogger):
-                    continue
-                if not isinstance(experiment := lightning_logger.experiment, Run):
-                    continue
-                self.run_directory_ = Path(experiment._settings.sync_dir)
-                break
+            # Get the run directory from Wandb if it exists
+            wandb_run = get_wandb_run(self.trainer)
+            if wandb_run:
+                self.run_directory_ = Path(wandb_run._settings.sync_dir)
 
             # Otherwise generate a new run directory
             if not self.run_directory_:
@@ -233,7 +227,10 @@ def configure_trainer(
                 callback.dirpath = self.run_directory / "checkpoints"
 
         # Save model config to the run directory
-        OmegaConf.save(self.config, self.run_directory / "model_config.yaml")
+        model_config_path = self.run_directory / "files" / "model_config.yaml"
+        OmegaConf.save(self.config, model_config_path)
+        if wandb_run := get_wandb_run(self.trainer):
+            wandb_run.save(model_config_path, base_path=model_config_path.parent)
 
     def evaluate(self) -> None:
         """Evaluate a trained model."""
diff --git a/icenet_mp/utils.py b/icenet_mp/utils.py
@@ -2,6 +2,9 @@
 
 import numpy as np
 import torch
+from lightning import Trainer
+from lightning.pytorch.loggers import WandbLogger
+from wandb.wandb_run import Run
 
 
 def datetime_from_npdatetime(dt: np.datetime64) -> datetime:
@@ -31,6 +34,16 @@ def get_timestamp() -> str:
     return datetime.now(tz=UTC).strftime(r"%Y%m%d_%H%M%S")
 
 
+def get_wandb_run(trainer: Trainer) -> Run | None:
+    """Get the Wandb Run instance if it exists."""
+    for lightning_logger in trainer.loggers:
+        if isinstance(lightning_logger, WandbLogger) and isinstance(
+            experiment := lightning_logger.experiment, Run
+        ):
+            return experiment
+    return None
+
+
 def normalise_date(np_datetime: np.datetime64) -> np.datetime64:
     """Normalise a datetime to midnight."""
     dt: datetime = np_datetime.astype("datetime64[ms]").astype(datetime)
diff --git a/tests/test_model_service.py b/tests/test_model_service.py
@@ -63,7 +63,9 @@ def test_from_checkpoint_loads_model(
         checkpoint_path = checkpoints_dir / "model.ckpt"
         checkpoint_path.write_text("checkpoint")
 
-        OmegaConf.save(cfg_model_service, tmp_path / "model_config.yaml")
+        files_dir = tmp_path / "files"
+        files_dir.mkdir(parents=True)
+        OmegaConf.save(cfg_model_service, files_dir / "model_config.yaml")
 
         with pytest.MonkeyPatch.context() as mp:
             mp.setattr(
@@ -83,7 +85,9 @@ def test_from_checkpoint_config_overloads(
         checkpoint_path = checkpoints_dir / "model.ckpt"
         checkpoint_path.write_text("checkpoint")
 
-        OmegaConf.save(cfg_model_service, tmp_path / "model_config.yaml")
+        files_dir = tmp_path / "files"
+        files_dir.mkdir(parents=True)
+        OmegaConf.save(cfg_model_service, files_dir / "model_config.yaml")
 
         with pytest.MonkeyPatch.context() as mp:
             mp.setattr(