alan-turing-institute · jemrobinson · Mar 25, 2026 · Mar 2, 2026 · Mar 2, 2026 · Mar 2, 2026
diff --git a/README.md b/README.md
@@ -50,6 +50,9 @@ You can then run this with, e.g.:
 ```bash
 uv run imp <command> --config-name <your local config>.yaml
 ```
+
+This will run using the default model setup (rescaling encoder, small UNet, rescaling decoder) that is sufficient for quick tests, but not appropriate for larger training runs.
+
 You can also use this config to override other options in the `base.yaml` file, as shown below:
 
 ```yaml
@@ -74,7 +77,7 @@ uv run imp <command> ++base_path=/local/path/to/my/data
 
 See `config/demo_north.yaml` for an example of this.
 
-Note that `base_persistence.yaml` overrides the specific options in `base.yaml` needed to run the `Persistence` model.
+:warning: Note that `base_persistence.yaml` overrides the specific options in `base.yaml` needed to run the `Persistence` model.
 
 ### HPC-specific configurations
 

diff --git a/icenet_mp/callbacks/plotting_callback.py b/icenet_mp/callbacks/plotting_callback.py
@@ -8,6 +8,7 @@
 from torch import Tensor
 
 from icenet_mp.data_loaders import CombinedDataset
+from icenet_mp.models import BaseModel
 from icenet_mp.types import ModelTestOutput, PlotSpec
 from icenet_mp.utils import datetime_from_npdatetime
 from icenet_mp.visualisations import DEFAULT_SIC_SPEC, Plotter
@@ -56,7 +57,7 @@ def set_metadata(self, config: DictConfig, model_name: str) -> None:
     def on_test_batch_end(
         self,
         trainer: Trainer,
-        pl_module: LightningModule,  # noqa: ARG002
+        pl_module: LightningModule,
         outputs: Tensor | Mapping[str, Any] | None,
         batch: Any,  # noqa: ANN401, ARG002
         batch_idx: int,
@@ -93,7 +94,11 @@ def on_test_batch_end(
             map(datetime_from_npdatetime, dataset.get_forecast_steps(start_date))
         )
         # Set hemisphere for plotting based on dataset
-        self.plotter.set_hemisphere(dataset.hemisphere)
+        if not isinstance(pl_module, BaseModel):
+            msg = f"Lightning module is of type {type(pl_module)}, skipping plotting."
+            logger.warning(msg)
+            return
+        self.plotter.set_hemisphere(pl_module.hemisphere)
 
         # Get loggers that support image and video logging
         image_loggers = [ll for ll in trainer.loggers if hasattr(ll, "log_image")]

diff --git a/icenet_mp/cli/hydra.py b/icenet_mp/cli/hydra.py
@@ -32,7 +32,7 @@ def wrapper(
         config_name: Annotated[
             str | None,
             Option(help="Specify the name of a file to load from the config directory"),
-        ] = "base",
+        ] = "sample",
         *args: Param.args,
         **kwargs: Param.kwargs,
     ) -> RetType:

diff --git a/icenet_mp/config/base.yaml b/icenet_mp/config/base.yaml
@@ -5,7 +5,7 @@ defaults:
   - loggers:
     - wandb
   - model: naive_unet_naive
-  - predict: sic-icenet
+  - predict: sic-icenet-2d
   - train: default
   - _self_
 

diff --git a/icenet_mp/config/demo_north.yaml b/icenet_mp/config/demo_north.yaml
@@ -1,5 +1,5 @@
 defaults:
   - base
   - override /data: demo
-  - override /predict: osisaf-north
+  - override /predict: sic-icenet-2d
   - _self_
diff --git a/icenet_mp/config/model/cnn_null_cnn.yaml b/icenet_mp/config/model/cnn_null_cnn.yaml
@@ -4,15 +4,11 @@ name: cnn-null-cnn
 
 encoder:
   _target_: icenet_mp.models.encoders.CNNEncoder
-  kernel_size: 3 # Size of the kernel for convolutional layers
-  latent_space: [128, 128] # Shape of the latent space
-  n_layers: 3 # Number of convolutional layers
+  latent_space: [144, 144] # Shape of the latent space
 
 processor:
   _target_: icenet_mp.models.processors.NullProcessor
 
 decoder:
   _target_: icenet_mp.models.decoders.CNNDecoder
-  kernel_size: 3 # Size of the kernel for convolutional layers
-  n_layers: 3 # Number of convolutional layers
   bounded: false # Whether to bound the output between 0 and 1
diff --git a/icenet_mp/config/model/cnn_unet_cnn.yaml b/icenet_mp/config/model/cnn_unet_cnn.yaml
@@ -4,17 +4,11 @@ name: cnn-unet-cnn
 
 encoder:
   _target_: icenet_mp.models.encoders.CNNEncoder
-  kernel_size: 3 # Size of the kernel for convolutional layers
-  latent_space: [128, 128] # Shape of the latent space
-  n_layers: 3 # Number of convolutional layers
+  latent_space: [144, 144] # Shape of the latent space
 
 processor:
   _target_: icenet_mp.models.processors.UNetProcessor
-  kernel_size: 3 # Size of the kernel for convolutional layers
-  start_out_channels: 64 # Initial number of channels for the first convolutional layer
 
 decoder:
   _target_: icenet_mp.models.decoders.CNNDecoder
-  kernel_size: 3 # Size of the kernel for convolutional layers
-  n_layers: 3 # Number of convolutional layers
   bounded: false # Whether to bound the output between 0 and 1
diff --git a/icenet_mp/config/model/cnn_vit_cnn.yaml b/icenet_mp/config/model/cnn_vit_cnn.yaml
@@ -4,21 +4,11 @@ name: cnn-vit-cnn
 
 encoder:
   _target_: icenet_mp.models.encoders.CNNEncoder
-  kernel_size: 3 # Size of the kernel for convolutional layers
-  latent_space: [192, 192] # Shape of the latent space
-  n_layers: 3 # Number of convolutional layers
+  latent_space: [144, 144] # Shape of the latent space
 
 processor:
   _target_: icenet_mp.models.processors.VitProcessor
-  patch_size: 16
-  emb_dim: 128
-  depth: 3
-  heads: 4
-  mlp_dim: 256
-  dropout: 0.3
 
 decoder:
   _target_: icenet_mp.models.decoders.CNNDecoder
-  kernel_size: 3 # Size of the kernel for convolutional layers
-  n_layers: 3 # Number of convolutional layers
   bounded: false # Whether to bound the output between 0 and 1
diff --git a/icenet_mp/config/model/ddpm.yaml b/icenet_mp/config/model/ddpm.yaml
@@ -1,13 +1,4 @@
 _target_: icenet_mp.models.ddpm.DDPM
 
+# Run DDPM model with default settings
 name: ddpm
-
-# DDPM parameters
-timesteps: 1000
-learning_rate: 5e-4
-start_out_channels: 32
-kernel_size: 3
-activation: "SiLU"
-normalization: "groupnorm"
-time_embed_dim : 256
-dropout_rate: 0.1
diff --git a/icenet_mp/config/model/naive_null_naive.yaml b/icenet_mp/config/model/naive_null_naive.yaml
@@ -4,7 +4,7 @@ name: naive-null-naive
 
 encoder:
   _target_: icenet_mp.models.encoders.NaiveLinearEncoder
-  latent_space: [128, 128] # Shape of the latent space
+  latent_space: [432, 432] # Shape of the latent space
 
 processor:
   _target_: icenet_mp.models.processors.NullProcessor

diff --git a/icenet_mp/config/model/naive_unet_naive.yaml b/icenet_mp/config/model/naive_unet_naive.yaml
@@ -4,12 +4,11 @@ name: naive-unet-naive
 
 encoder:
   _target_: icenet_mp.models.encoders.NaiveLinearEncoder
-  latent_space: [128, 128] # Shape of the latent space
+  latent_space: [432, 432] # Shape of the latent space
 
 processor:
   _target_: icenet_mp.models.processors.UNetProcessor
-  kernel_size: 3 # Size of the kernel for convolutional layers
-  start_out_channels: 64 # Initial number of channels for the first convolutional layer
+  start_out_channels: 100 # Reduce number of channels to support 21 day forecasts
 
 decoder:
   _target_: icenet_mp.models.decoders.NaiveLinearDecoder

diff --git a/icenet_mp/config/model/naive_vit_naive.yaml b/icenet_mp/config/model/naive_vit_naive.yaml
@@ -4,16 +4,10 @@ name: naive-vit-naive
 
 encoder:
   _target_: icenet_mp.models.encoders.NaiveLinearEncoder
-  latent_space: [192, 192] # Shape of the latent space
+  latent_space: [432, 432] # Shape of the latent space
 
 processor:
   _target_: icenet_mp.models.processors.VitProcessor
-  patch_size: 16
-  emb_dim: 128
-  depth: 3
-  heads: 4
-  mlp_dim: 256
-  dropout: 0.3
 
 decoder:
   _target_: icenet_mp.models.decoders.NaiveLinearDecoder

diff --git a/icenet_mp/config/model/piecewise_null_piecewise.yaml b/icenet_mp/config/model/piecewise_null_piecewise.yaml
@@ -0,0 +1,13 @@
+_target_: icenet_mp.models.EncodeProcessDecode
+
+name: piecewise-null-piecewise
+
+encoder:
+  _target_: icenet_mp.models.encoders.PiecewiseEncoder
+  latent_space: [192, 192] # Shape of the latent space
+
+processor:
+  _target_: icenet_mp.models.processors.NullProcessor
+
+decoder:
+  _target_: icenet_mp.models.decoders.PiecewiseDecoder
diff --git a/icenet_mp/config/model/piecewise_unet_piecewise.yaml b/icenet_mp/config/model/piecewise_unet_piecewise.yaml
@@ -4,15 +4,10 @@ name: piecewise-unet-piecewise
 
 encoder:
   _target_: icenet_mp.models.encoders.PiecewiseEncoder
-  latent_space: [128, 128] # Shape of the latent space
-  n_conv_blocks: 3 # Number of convolutional blocks to add after encoding
+  latent_space: [192, 192] # Shape of the latent space
 
 processor:
   _target_: icenet_mp.models.processors.UNetProcessor
-  kernel_size: 3 # Size of the kernel for convolutional layers
-  start_out_channels: 64 # Initial number of channels for the first convolutional layer
 
 decoder:
   _target_: icenet_mp.models.decoders.PiecewiseDecoder
-  restrict_range: clamp # Method for restricting output range (e.g., clamp, sigmoid, tanh)
-  n_conv_blocks: 3 # Number of convolutional blocks to add before decoding
diff --git a/icenet_mp/config/model/piecewise_vit_piecewise.yaml b/icenet_mp/config/model/piecewise_vit_piecewise.yaml
@@ -0,0 +1,13 @@
+_target_: icenet_mp.models.EncodeProcessDecode
+
+name: piecewise-vit-piecewise
+
+encoder:
+  _target_: icenet_mp.models.encoders.PiecewiseEncoder
+  latent_space: [192, 192] # Shape of the latent space
+
+processor:
+  _target_: icenet_mp.models.processors.VitProcessor
+
+decoder:
+  _target_: icenet_mp.models.decoders.PiecewiseDecoder
diff --git a/icenet_mp/config/model/quick_test.yaml b/icenet_mp/config/model/quick_test.yaml
@@ -0,0 +1,14 @@
+_target_: icenet_mp.models.EncodeProcessDecode
+
+name: sample
+
+encoder:
+  _target_: icenet_mp.models.encoders.NaiveLinearEncoder
+  latent_space: [128, 128]
+
+processor:
+  _target_: icenet_mp.models.processors.UNetProcessor
+  start_out_channels: 64
+
+decoder:
+  _target_: icenet_mp.models.decoders.NaiveLinearDecoder
diff --git a/icenet_mp/config/predict/sic-icenet-14d.yaml b/icenet_mp/config/predict/sic-icenet-14d.yaml
@@ -0,0 +1,9 @@
+# Name of the dataset group containing our prediction target
+target:
+  group_name: sic-icenet
+
+# Number of future steps to predict
+n_forecast_steps: 14
+
+# Number of history steps to use when predicting
+n_history_steps: 3
diff --git a/icenet_mp/config/predict/sic-icenet-21d.yaml b/icenet_mp/config/predict/sic-icenet-21d.yaml
@@ -0,0 +1,9 @@
+# Name of the dataset group containing our prediction target
+target:
+  group_name: sic-icenet
+
+# Number of future steps to predict
+n_forecast_steps: 21
+
+# Number of history steps to use when predicting
+n_history_steps: 3
diff --git a/icenet_mp/config/predict/sic-icenet.yaml → icenet_mp/config/predict/sic-icenet-2d.yaml b/icenet_mp/config/predict/sic-icenet.yaml → icenet_mp/config/predict/sic-icenet-2d.yaml
diff --git a/icenet_mp/config/predict/sic-ssmis-14d.yaml b/icenet_mp/config/predict/sic-ssmis-14d.yaml
@@ -0,0 +1,11 @@
+# Name of the dataset group containing our prediction target
+target:
+  group_name: sic-ssmis
+  variables:
+    - ice_conc
+
+# Number of future steps to predict
+n_forecast_steps: 14
+
+# Number of history steps to use when predicting
+n_history_steps: 3
diff --git a/icenet_mp/config/predict/sic-ssmis-21d.yaml b/icenet_mp/config/predict/sic-ssmis-21d.yaml
@@ -0,0 +1,11 @@
+# Name of the dataset group containing our prediction target
+target:
+  group_name: sic-ssmis
+  variables:
+    - ice_conc
+
+# Number of future steps to predict
+n_forecast_steps: 21
+
+# Number of history steps to use when predicting
+n_history_steps: 3
diff --git a/icenet_mp/config/predict/sic-ssmis.yaml → icenet_mp/config/predict/sic-ssmis-2d.yaml b/icenet_mp/config/predict/sic-ssmis.yaml → icenet_mp/config/predict/sic-ssmis-2d.yaml
diff --git a/icenet_mp/config/sample.yaml b/icenet_mp/config/sample.yaml
@@ -0,0 +1,4 @@
+defaults:
+  - base
+  - override /model: quick_test
+  - _self_
diff --git a/icenet_mp/data_loaders/combined_dataset.py b/icenet_mp/data_loaders/combined_dataset.py
@@ -1,5 +1,4 @@
 from collections.abc import Sequence
-from typing import Literal
 
 import numpy as np
 from torch.utils.data import Dataset
@@ -87,17 +86,6 @@ def start_date(self) -> np.datetime64:
         """Return the start date of the dataset."""
         return self.dates[0]
 
-    @property
-    def hemisphere(self) -> Literal["north", "south"]:
-        """Return the hemisphere of the dataset."""
-        hemisphere: set[Literal["north", "south"]] = {
-            ds.hemisphere for ds in self.inputs
-        }
-        if len(hemisphere) != 1:
-            msg = f"Found {len(hemisphere)} different hemisphere indicators across {len(self.inputs)} datasets."
-            raise ValueError(msg)
-        return hemisphere.pop()
-
     def __len__(self) -> int:
         """Return the total length of the dataset."""
         return len(self.dates)

diff --git a/icenet_mp/data_loaders/common_data_module.py b/icenet_mp/data_loaders/common_data_module.py
@@ -7,7 +7,7 @@
 from omegaconf import DictConfig
 from torch.utils.data import DataLoader
 
-from icenet_mp.types import ArrayTCHW, DataloaderArgs, DataSpace
+from icenet_mp.types import ArrayTCHW, DataloaderArgs, DataSpace, Hemisphere
 
 from .combined_dataset import CombinedDataset
 from .single_dataset import SingleDataset
@@ -35,9 +35,11 @@ def __init__(self, config: DictConfig) -> None:
                     self.base_path / "data" / "anemoi" / f"{dataset['name']}.zarr"
                 ).resolve()
             )
-        logger.info("Found %d dataset_groups.", len(self.dataset_groups))
-        for dataset_group in self.dataset_groups:
-            logger.debug("... %s.", dataset_group)
+        logger.info("Found %d dataset groups.", len(self.dataset_groups))
+        for idx, (name, paths) in enumerate(self.dataset_groups.items(), start=1):
+            logger.info("%d) %s:", idx, name)
+            for path in paths:
+                logger.info("%s - %s", " " * (len(str(idx)) + 1), path)
 
         # Check prediction target
         self.target_group_name = config["predict"]["target"]["group_name"]
@@ -82,6 +84,18 @@ def __init__(self, config: DictConfig) -> None:
             worker_init_fn=None,
         )
 
+    @property
+    def hemisphere(self) -> Hemisphere:
+        """Return the hemisphere of the dataset."""
+        hemisphere: set[Hemisphere] = {
+            SingleDataset(name, paths).hemisphere
+            for name, paths in self.dataset_groups.items()
+        }
+        if len(hemisphere) != 1:
+            msg = f"Found {len(hemisphere)} different hemisphere indicators across {len(self.dataset_groups)} dataset groups."
+            raise ValueError(msg)
+        return hemisphere.pop()
+
     @cached_property
     def input_spaces(self) -> list[DataSpace]:
         """Return the data space for each input."""

diff --git a/icenet_mp/data_loaders/single_dataset.py b/icenet_mp/data_loaders/single_dataset.py
@@ -1,14 +1,13 @@
 from collections.abc import Sequence
 from functools import cached_property
 from pathlib import Path
-from typing import Literal
 
 import numpy as np
 from anemoi.datasets.data import open_dataset
 from anemoi.datasets.data.dataset import Dataset as AnemoiDataset
 from torch.utils.data import Dataset
 
-from icenet_mp.types import ArrayCHW, ArrayTCHW, DataSpace
+from icenet_mp.types import ArrayCHW, ArrayTCHW, DataSpace, Hemisphere
 from icenet_mp.utils import normalise_date
 
 
@@ -31,7 +30,7 @@ def __init__(
         self._date_ranges = sorted(
             date_ranges, key=lambda dr: "" if dr["start"] is None else dr["start"]
         )
-        self.hemisphere: Literal["north", "south"] = (
+        self.hemisphere: Hemisphere = (
             "north"
             if any("north" in str(input_file).lower() for input_file in input_files)
             else "south"

diff --git a/icenet_mp/model_service.py b/icenet_mp/model_service.py
@@ -44,6 +44,7 @@ def from_config(cls, config: DictConfig) -> "ModelService":
         builder.model_ = hydra.utils.instantiate(
             dict(
                 {
+                    "hemisphere": builder.data_module.hemisphere,
                     "input_spaces": [
                         s.to_dict() for s in builder.data_module.input_spaces
                     ],