➕ Use jaxtyping to annotate types of tensor and array being used in different functions

jemrobinson · jemrobinson · commit 38b22021da31 · 2025-08-07T09:58:14.000+01:00
diff --git a/ice_station_zebra/data/lightning/combined_dataset.py b/ice_station_zebra/data/lightning/combined_dataset.py
@@ -5,7 +5,7 @@
 from torch.utils.data import Dataset
 
 from .zebra_dataset import ZebraDataset
-from ice_station_zebra.types import CombinedNumpyBatch
+from ice_station_zebra.types import ArrayTCHW
 
 
 class CombinedDataset(Dataset):
@@ -59,7 +59,7 @@ def __len__(self) -> int:
         """Return the total length of the dataset"""
         return len(self.available_dates)
 
-    def __getitem__(self, idx: int) -> CombinedNumpyBatch:
+    def __getitem__(self, idx: int) -> dict[str, ArrayTCHW]:
         """Return the data for a single timestep as a dictionary
 
         Returns:
diff --git a/ice_station_zebra/data/lightning/zebra_data_module.py b/ice_station_zebra/data/lightning/zebra_data_module.py
@@ -3,13 +3,11 @@
 from functools import cached_property
 from pathlib import Path
 
-import numpy as np
 from lightning import LightningDataModule
-from numpy.typing import NDArray
 from omegaconf import DictConfig
 from torch.utils.data import DataLoader
 
-from ice_station_zebra.types import DataloaderArgs, DataSpace
+from ice_station_zebra.types import ArrayTCHW, DataloaderArgs, DataSpace
 
 from .combined_dataset import CombinedDataset
 from .zebra_dataset import ZebraDataset
@@ -89,7 +87,7 @@ def output_space(self) -> DataSpace:
 
     def train_dataloader(
         self,
-    ) -> DataLoader[tuple[NDArray[np.float32], NDArray[np.float32]]]:
+    ) -> DataLoader[dict[str, ArrayTCHW]]:
         """Construct train dataloader"""
         dataset = CombinedDataset(
             [
@@ -115,7 +113,7 @@ def train_dataloader(
 
     def val_dataloader(
         self,
-    ) -> DataLoader[tuple[NDArray[np.float32], NDArray[np.float32]]]:
+    ) -> DataLoader[dict[str, ArrayTCHW]]:
         """Construct validation dataloader"""
         dataset = CombinedDataset(
             [
@@ -141,7 +139,7 @@ def val_dataloader(
 
     def test_dataloader(
         self,
-    ) -> DataLoader[tuple[NDArray[np.float32], NDArray[np.float32]]]:
+    ) -> DataLoader[dict[str, ArrayTCHW]]:
         """Construct test dataloader"""
         dataset = CombinedDataset(
             [
diff --git a/ice_station_zebra/data/lightning/zebra_dataset.py b/ice_station_zebra/data/lightning/zebra_dataset.py
@@ -1,12 +1,12 @@
 from pathlib import Path
+from collections.abc import Sequence
 
 import numpy as np
 from anemoi.datasets.data import open_dataset
 from cachetools import LRUCache, cachedmethod
-from numpy.typing import NDArray
 from torch.utils.data import Dataset
 
-from ice_station_zebra.types import DataSpace
+from ice_station_zebra.types import ArrayCHW, ArrayTCHW, DataSpace
 
 
 class ZebraDataset(Dataset):
@@ -57,8 +57,8 @@ def __len__(self) -> int:
         """Return the total length of the dataset"""
         return len(self.dataset)
 
-    def __getitem__(self, idx: int) -> NDArray[np.float32]:
-        """Return a single timestep after reshaping to [C, H, W]"""
+    def __getitem__(self, idx: int) -> ArrayCHW:
+        """Return the data for a single timestep in [C, H, W] format"""
         return self.dataset[idx].reshape(self.chw)
 
     @cachedmethod(lambda self: self._cache)
@@ -67,8 +67,8 @@ def index_from_date(self, date: np.datetime64) -> int:
         idx, _, _ = self.dataset.to_index(date, 0)
         return idx
 
-    def get_tchw(self, dates: list[np.datetime64]) -> NDArray[np.float32]:
-        """Return the data for a given set of dates in [T, C, H, W] format"""
+    def get_tchw(self, dates: Sequence[np.datetime64]) -> ArrayTCHW:
+        """Return the data for a series of timesteps in [T, C, H, W] format"""
         return np.stack(
             [self[self.index_from_date(target_date)] for target_date in dates], axis=0
         )
diff --git a/ice_station_zebra/models/decoders/base_decoder.py b/ice_station_zebra/models/decoders/base_decoder.py
@@ -1,32 +1,33 @@
 from abc import ABC, abstractmethod
 
 import torch.nn as nn
-from torch import Tensor
+
+from ice_station_zebra.types import TensorNCHW, TensorNTCHW
 
 
 class BaseDecoder(nn.Module, ABC):
     """
     Decoder that takes data in a latent space and translates it to a larger output space
 
     Latent space:
-        Tensor[NCHW] with (batch_size, latent_channels, latent_height, latent_width)
+        TensorNCHW with (batch_size, latent_channels, latent_height, latent_width)
 
     Output space:
-        Tensor[NTCHW] with (batch_size, n_forecast_steps, output_channels, output_height, output_width)
+        TensorNTCHW with (batch_size, n_forecast_steps, output_channels, output_height, output_width)
     """
 
     def __init__(self, *, n_forecast_steps: int) -> None:
         super().__init__()
         self.n_forecast_steps = n_forecast_steps
 
     @abstractmethod
-    def forward(self, x: Tensor) -> Tensor:
+    def forward(self, x: TensorNCHW) -> TensorNTCHW:
         """
         Transformation summary
 
         Args:
-            x: Tensor[NCHW] with (batch_size, latent_channels, latent_height, latent_width)
+            x: TensorNCHW with (batch_size, latent_channels, latent_height, latent_width)
 
         Returns:
-            Tensor[NTCHW] with (batch_size, n_forecast_steps, output_channels, output_height, output_width)
+            TensorNTCHW with (batch_size, n_forecast_steps, output_channels, output_height, output_width)
         """
diff --git a/ice_station_zebra/models/decoders/naive_latent_space_decoder.py b/ice_station_zebra/models/decoders/naive_latent_space_decoder.py
@@ -2,9 +2,8 @@
 from typing import Any
 
 import torch.nn as nn
-from torch import Tensor
 
-from ice_station_zebra.types import DataSpace
+from ice_station_zebra.types import DataSpace, TensorNCHW, TensorNTCHW
 from .base_decoder import BaseDecoder
 
 
@@ -13,10 +12,10 @@ class NaiveLatentSpaceDecoder(BaseDecoder):
     Naive, linear decoder that takes data in a latent space and translates it to a larger output space
 
     Latent space:
-        Tensor[NCHW] with (batch_size, latent_channels, latent_height, latent_width)
+        TensorNCHW with (batch_size, latent_channels, latent_height, latent_width)
 
     Output space:
-        Tensor[NTCHW] with (batch_size, n_forecast_steps, output_channels, output_height, output_width)
+        TensorNTCHW with (batch_size, n_forecast_steps, output_channels, output_height, output_width)
     """
 
     def __init__(
@@ -54,14 +53,14 @@ def __init__(
         # Combine the layers sequentially
         self.model = nn.Sequential(*layers)
 
-    def forward(self, x: Tensor) -> Tensor:
+    def forward(self, x: TensorNCHW) -> TensorNTCHW:
         """
         Transformation summary
 
         Args:
-            x: Tensor[NCHW] with (batch_size, latent_channels, latent_height, latent_width)
+            x: TensorNCHW with (batch_size, latent_channels, latent_height, latent_width)
 
         Returns:
-            Tensor[NTCHW] with (batch_size, n_forecast_steps, output_channels, output_height, output_width)
+            TensorNTCHW with (batch_size, n_forecast_steps, output_channels, output_height, output_width)
         """
         return self.model(x)
diff --git a/ice_station_zebra/models/encode_process_decode.py b/ice_station_zebra/models/encode_process_decode.py
@@ -3,9 +3,8 @@
 import hydra
 import torch
 from omegaconf import DictConfig
-from torch import Tensor
 
-from ice_station_zebra.types import CombinedTensorBatch, DataSpace
+from ice_station_zebra.types import DataSpace, TensorNCHW, TensorNTCHW
 from ice_station_zebra.models.encoders import BaseEncoder
 
 from .zebra_model import ZebraModel
@@ -62,7 +61,7 @@ def __init__(
         self.model_list.append(self.processor)
         self.model_list.append(self.decoder)
 
-    def forward(self, inputs: CombinedTensorBatch) -> torch.Tensor:
+    def forward(self, inputs: dict[str, TensorNTCHW]) -> TensorNTCHW:
         """Forward step of the model
 
         - start with multiple [NTCHW] inputs each with shape [batch, n_history_steps, C_input_k, H_input_k, W_input_k]
@@ -72,18 +71,18 @@ def forward(self, inputs: CombinedTensorBatch) -> torch.Tensor:
         - decode back to [NTCHW] output space [batch, n_forecast_steps, C_output, H_output, W_output]
         """
         # Encode inputs into latent space: list of tensors with (batch_size, variables, latent_height, latent_width)
-        latent_inputs: list[Tensor] = [
+        latent_inputs: list[TensorNCHW] = [
             encoder(inputs[encoder.name]) for encoder in self.encoders
         ]
 
         # Combine in the variable dimension: tensor with (batch_size, all_variables, latent_height, latent_width)
-        latent_input_combined = torch.cat(latent_inputs, dim=1)
+        latent_input_combined: TensorNCHW = torch.cat(latent_inputs, dim=1)
 
         # Process in latent space: tensor with (batch_size, all_variables, latent_height, latent_width)
-        latent_output: Tensor = self.processor(latent_input_combined)
+        latent_output: TensorNCHW = self.processor(latent_input_combined)
 
         # Decode to output space: tensor with (batch_size, output_variables, output_height, output_width)
-        output: Tensor = self.decoder(latent_output)
+        output: TensorNTCHW = self.decoder(latent_output)
 
         # Return
         return output
diff --git a/ice_station_zebra/models/encoders/base_encoder.py b/ice_station_zebra/models/encoders/base_encoder.py
@@ -1,18 +1,18 @@
 from abc import ABC, abstractmethod
 
 import torch.nn as nn
-from torch import Tensor
+from ice_station_zebra.types import TensorNCHW, TensorNTCHW
 
 
 class BaseEncoder(nn.Module, ABC):
     """
     Encoder that takes data in an input space and translates it to a smaller latent space
 
     Input space:
-        Tensor[NTCHW] with (batch_size, n_history_steps, input_channels, input_height, input_width)
+        TensorNTCHW with (batch_size, n_history_steps, input_channels, input_height, input_width)
 
     Latent space:
-        Tensor[NCHW] with (batch_size, latent_channels, latent_height, latent_width)
+        TensorNCHW with (batch_size, latent_channels, latent_height, latent_width)
     """
 
     def __init__(self, *, name: str, n_history_steps: int) -> None:
@@ -21,13 +21,13 @@ def __init__(self, *, name: str, n_history_steps: int) -> None:
         self.n_history_steps = n_history_steps
 
     @abstractmethod
-    def forward(self, x: Tensor) -> Tensor:
+    def forward(self, x: TensorNTCHW) -> TensorNCHW:
         """
         Transformation summary
 
         Args:
-            x: Tensor[NTCHW] with (batch_size, n_history_steps, input_channels, input_height, input_width)
+            x: TensorNTCHW with (batch_size, n_history_steps, input_channels, input_height, input_width)
 
         Returns:
-            Tensor[NCHW] with (batch_size, latent_channels, latent_height, latent_width)
+            TensorNCHW with (batch_size, latent_channels, latent_height, latent_width)
         """
diff --git a/ice_station_zebra/models/encoders/naive_latent_space_encoder.py b/ice_station_zebra/models/encoders/naive_latent_space_encoder.py
@@ -2,9 +2,8 @@
 from typing import Any
 
 import torch.nn as nn
-from torch import Tensor
 
-from ice_station_zebra.types import DataSpace
+from ice_station_zebra.types import DataSpace, TensorNCHW, TensorNTCHW
 from .base_encoder import BaseEncoder
 
 
@@ -13,10 +12,10 @@ class NaiveLatentSpaceEncoder(BaseEncoder):
     Naive, linear encoder that takes data in an input space and translates it to a smaller latent space
 
     Input space:
-        Tensor[NTCHW] with (batch_size, n_history_steps, input_channels, input_height, input_width)
+        TensorNTCHW with (batch_size, n_history_steps, input_channels, input_height, input_width)
 
     Latent space:
-        Tensor[NCHW] with (batch_size, latent_channels, latent_height, latent_width)
+        TensorNCHW with (batch_size, latent_channels, latent_height, latent_width)
     """
 
     def __init__(
@@ -52,14 +51,14 @@ def __init__(
         # Combine the layers sequentially
         self.model = nn.Sequential(*layers)
 
-    def forward(self, x: Tensor) -> Tensor:
+    def forward(self, x: TensorNTCHW) -> TensorNCHW:
         """
         Transformation summary
 
         Args:
-            x: Tensor[NTCHW] with (batch_size, n_history_steps, input_channels, input_height, input_width)
+            x: TensorNTCHW with (batch_size, n_history_steps, input_channels, input_height, input_width)
 
         Returns:
-            Tensor[NCHW] with (batch_size, latent_channels, latent_height, latent_width)
+            TensorNCHW with (batch_size, latent_channels, latent_height, latent_width)
         """
         return self.model(x)
diff --git a/ice_station_zebra/models/processors/null.py b/ice_station_zebra/models/processors/null.py
@@ -1,14 +1,27 @@
 import torch.nn as nn
-from torch import Tensor
+from ice_station_zebra.types import TensorNCHW
 
 
 class NullProcessor(nn.Module):
-    """Null model that simply returns input"""
+    """Null model that simply returns input
+
+    Operations all occur in latent space:
+        TensorNCHW with (batch_size, latent_channels, latent_height, latent_width)
+    """
 
     def __init__(self, n_latent_channels: int) -> None:
         super().__init__()
         self.n_latent_channels = n_latent_channels
         self.model = nn.Identity()
 
-    def forward(self, x: Tensor) -> Tensor:
+    def forward(self, x: TensorNCHW) -> TensorNCHW:
+        """
+        Transformation summary
+
+        Args:
+            x: TensorNCHW with (batch_size, latent_channels, latent_height, latent_width)
+
+        Returns:
+            TensorNCHW with (batch_size, latent_channels, latent_height, latent_width)
+        """
         return self.model(x)
diff --git a/ice_station_zebra/models/zebra_model.py b/ice_station_zebra/models/zebra_model.py
@@ -7,7 +7,7 @@
 from omegaconf import DictConfig
 from torch.optim import Optimizer
 
-from ice_station_zebra.types import CombinedTensorBatch, DataSpace
+from ice_station_zebra.types import DataSpace, TensorNTCHW
 
 
 class ZebraModel(LightningModule, ABC):
@@ -46,7 +46,7 @@ def __init__(
         self.save_hyperparameters()
 
     @abstractmethod
-    def forward(self, inputs: CombinedTensorBatch) -> torch.Tensor:
+    def forward(self, inputs: dict[str, TensorNTCHW]) -> TensorNTCHW:
         """Forward step of the model
 
         - start with multiple [NTCHW] inputs each with shape [batch, n_history_steps, C_input_k, H_input_k, W_input_k]
@@ -62,11 +62,11 @@ def configure_optimizers(self) -> Optimizer:
             dict(**self.optimizer_cfg) | {"params": self.model_list.parameters()}
         )
 
-    def loss(self, output: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
+    def loss(self, output: TensorNTCHW, target: TensorNTCHW) -> torch.Tensor:
         return torch.nn.functional.l1_loss(output, target)
 
     def test_step(
-        self, batch: CombinedTensorBatch, batch_idx: int
+        self, batch: dict[str, TensorNTCHW], batch_idx: int
     ) -> dict[str, torch.Tensor]:
         """Run the test step, in PyTorch eval model (i.e. no gradients)
 
@@ -82,7 +82,9 @@ def test_step(
         loss = self.loss(output, target)
         return {"output": output, "target": target, "loss": loss}
 
-    def training_step(self, batch: CombinedTensorBatch, batch_idx: int) -> torch.Tensor:
+    def training_step(
+        self, batch: dict[str, TensorNTCHW], batch_idx: int
+    ) -> torch.Tensor:
         """Run the training step
 
         A batch contains one tensor for each input dataset and one for the target
@@ -97,7 +99,7 @@ def training_step(self, batch: CombinedTensorBatch, batch_idx: int) -> torch.Ten
         return self.loss(output, target)
 
     def validation_step(
-        self, batch: CombinedTensorBatch, batch_idx: int
+        self, batch: dict[str, TensorNTCHW], batch_idx: int
     ) -> torch.Tensor:
         """Run the validation step
 
diff --git a/ice_station_zebra/types.py b/ice_station_zebra/types.py
diff --git a/pyproject.toml b/pyproject.toml
diff --git a/uv.lock b/uv.lock