alan-turing-institute
diff --git a/‎notebooks/00_exploration.ipynb‎
Lines changed: 256 additions & 0 deletions b/‎notebooks/00_exploration.ipynb‎
Lines changed: 256 additions & 0 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 12 additions & 0 deletions b/‎pyproject.toml‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎src/auto_cast/decoders/channels_last.py‎
Lines changed: 35 additions & 0 deletions b/‎src/auto_cast/decoders/channels_last.py‎
Lines changed: 35 additions & 0 deletions
diff --git a/‎src/auto_cast/encoders/permute_concat.py‎
Lines changed: 29 additions & 0 deletions b/‎src/auto_cast/encoders/permute_concat.py‎
Lines changed: 29 additions & 0 deletions
diff --git a/‎src/auto_cast/models/encoder_decoder.py‎
Lines changed: 20 additions & 1 deletion b/‎src/auto_cast/models/encoder_decoder.py‎
Lines changed: 20 additions & 1 deletion
@@ -0,0 +1,256 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "0",
+   "metadata": {},
+   "source": [
+    "## AutoCast encoder-processor-decoder model API Exploration\n",
+    "\n",
+    "This notebook aims to explore the end-to-end API.\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1",
+   "metadata": {},
+   "source": [
+    "### Example dataaset\n",
+    "\n",
+    "We use the `AdvectionDiffusion` dataset as an example dataset to illustrate training and evaluation of models. This dataset simulates the advection-diffusion equation in 2D."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "from autoemulate.simulations.reaction_diffusion import ReactionDiffusion as Sim\n",
+    "\n",
+    "sim = Sim(return_timeseries=True, log_level=\"error\")\n",
+    "\n",
+    "def generate_split(simulator: Sim, n_train: int = 1, n_valid: int = 1, n_test: int = 1):\n",
+    "    \"\"\"Generate training, validation, and test splits from the simulator.\"\"\"\n",
+    "    train = simulator.forward_samples_spatiotemporal(n_train)\n",
+    "    valid = simulator.forward_samples_spatiotemporal(n_valid)\n",
+    "    test = simulator.forward_samples_spatiotemporal(n_test)\n",
+    "    return {\"train\": train, \"valid\": valid, \"test\": test}\n",
+    "\n",
+    "\n",
+    "combined_data = generate_split(sim)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3",
+   "metadata": {},
+   "source": [
+    "### Read combined data into datamodule\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from auto_cast.data.datamodule import SpatioTemporalDataModule\n",
+    "\n",
+    "n_steps_input = 4\n",
+    "n_steps_output = 1\n",
+    "datamodule = SpatioTemporalDataModule(\n",
+    "    data=combined_data, data_path=None, n_steps_input=n_steps_input, n_steps_output=n_steps_output, batch_size=16\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5",
+   "metadata": {},
+   "source": [
+    "### Example batch\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "batch = next(iter(datamodule.train_dataloader()))\n",
+    "\n",
+    "# batch"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from auto_cast.decoders.channels_last import ChannelsLast\n",
+    "from auto_cast.encoders.permute_concat import PermuteConcat\n",
+    "from auto_cast.models.encoder_decoder import EncoderDecoder\n",
+    "from auto_cast.models.encoder_processor_decoder import EncoderProcessorDecoder\n",
+    "from auto_cast.nn.fno import FNOProcessor\n",
+    "\n",
+    "batch = next(iter(datamodule.train_dataloader()))\n",
+    "n_channels = batch.input_fields.shape[-1]\n",
+    "processor = FNOProcessor(\n",
+    "    in_channels=n_channels * n_steps_input,\n",
+    "    out_channels=n_channels * n_steps_output,\n",
+    "    n_modes=(16, 16),\n",
+    "    hidden_channels=64,\n",
+    ")\n",
+    "encoder = PermuteConcat(with_constants=False)\n",
+    "decoder = ChannelsLast(output_channels=n_channels, time_steps=n_steps_output)\n",
+    "\n",
+    "model = EncoderProcessorDecoder.from_encoder_processor_decoder(\n",
+    "    encoder_decoder=EncoderDecoder(encoder=encoder, decoder=decoder),\n",
+    "    processor=processor,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model(batch).shape"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9",
+   "metadata": {},
+   "source": [
+    "### Run trainer\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "10",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import lightning as L\n",
+    "\n",
+    "# device = \"mps\"  # \"cpu\"\n",
+    "device = \"cpu\"\n",
+    "trainer = L.Trainer(max_epochs=1, accelerator=device, log_every_n_steps=10)\n",
+    "trainer.fit(model, datamodule.train_dataloader(), datamodule.val_dataloader())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "11",
+   "metadata": {},
+   "source": [
+    "### Run the evaluation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "12",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "trainer.test(model, datamodule.test_dataloader())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "13",
+   "metadata": {},
+   "source": [
+    "### Example rollout"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "14",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# A single element is the full trajectory\n",
+    "batch = next(iter(datamodule.rollout_test_dataloader()))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "15",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# First n_steps_input are inputs\n",
+    "print(batch.input_fields.shape)\n",
+    "# Remaining n_steps_output are outputs\n",
+    "print(batch.output_fields.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "16",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Run rollout on one trajectory\n",
+    "preds, trues = model.rollout(batch)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "17",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(preds.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "18",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(trues.shape)\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
@@ -14,6 +14,7 @@ dependencies = [
     "h5py>=3.15.1",
     "jaxtyping>=0.3.3",
     "lightning>=2.5.6",
+    "neuraloperator>=2.0.0",
     "the-well>=1.1.0",
     "torch>=2.9.1",
 ]
@@ -95,3 +96,14 @@ convention = "numpy"
 
 [tool.ruff.lint.per-file-ignores]
 "tests/*.py" = ["D"]
+
+[tool.uv.sources]
+autoemulate = { git = "https://github.com/alan-turing-institute/autoemulate.git" }
+
+[tool.pytest.ini_options]
+filterwarnings = [
+    # Ignore Lightning warnings that are expected/benign in test environment
+    "ignore:You are trying to `self.log\\(\\)` but the `self.trainer` reference is not registered:UserWarning",
+    "ignore:GPU available but not used:UserWarning",
+    "ignore:The '.*_dataloader' does not have many workers:UserWarning",
+]
@@ -0,0 +1,35 @@
+from einops import rearrange
+
+from auto_cast.decoders.base import Decoder
+from auto_cast.types import Tensor
+
+
+class ChannelsLast(Decoder):
+    """Decoder that splits merged (channel*time) back to (time, channel) and reorders to channels-last format."""  # noqa: E501
+
+    def __init__(self, output_channels: int, time_steps: int = 1) -> None:
+        """Initialize the ChannelsLast decoder.
+
+        Parameters
+        ----------
+        output_channels: int
+            Number of output channels (C).
+        time_steps: int
+            Number of time steps (T) that were merged with channels in encoding.
+        """
+        super().__init__()
+        self.output_channels = output_channels
+        self.time_steps = time_steps
+
+    def forward(self, x: Tensor) -> Tensor:
+        """Forward pass through the ChannelsLast decoder.
+
+        Expects input shape (B, C*T, W, H) and outputs (B, T, W, H, C).
+        """
+        # Split merged (C*T) dimension back into separate C and T
+        # x: (B, C*T, W, H) -> (B, C, T, W, H)
+        x = rearrange(
+            x, "b (c t) w h -> b c t w h", c=self.output_channels, t=self.time_steps
+        )
+        # Rearrange to channels-last: (B, C, T, W, H) -> (B, T, W, H, C)
+        return rearrange(x, "b c t w h -> b t w h c")
@@ -0,0 +1,29 @@
+import torch
+from einops import rearrange
+
+from auto_cast.encoders.base import Encoder
+from auto_cast.types import Batch, Tensor
+
+
+class PermuteConcat(Encoder):
+    """Permute and concatenate Encoder."""
+
+    def __init__(self, with_constants: bool = False) -> None:
+        super().__init__()
+        self.with_constants = with_constants
+
+    def forward(self, batch: Batch) -> Tensor:
+        # Destructure batch, time, space, channels
+        b, t, w, h, _ = batch.input_fields.shape  # TODO: generalize beyond 2D spatial
+        x = batch.input_fields
+        x = rearrange(x, "b t w h c -> b c t w h")
+        if self.with_constants and batch.constant_fields is not None:
+            constants = batch.constant_fields
+            constants = rearrange(constants, "b w h c -> b c 1 w h")
+            x = torch.cat([x, constants], dim=1)
+        if self.with_constants and batch.constant_scalars is not None:
+            scalars = batch.constant_scalars
+            scalars = rearrange(scalars, "b c -> b c 1 1 1")
+            scalars = scalars.expand(b, -1, t, w, h)
+            x = torch.cat([x, scalars], dim=1)
+        return rearrange(x, "b c t w h -> b (c t) w h")
@@ -27,7 +27,11 @@ def forward_with_latent(self, batch: Batch) -> tuple[TensorBTSPlusC, TensorBMSta
         return decoded, encoded
 
     def training_step(self, batch: Batch, batch_idx: int) -> Tensor:  # noqa: ARG002
-        output = self(batch)
+        if self.loss_func is None:
+            msg = "Loss function not defined for EncoderDecoder model."
+            raise ValueError(msg)
+        x = self(batch)
+        output = self.decoder(x)
         loss = self.loss_func(output, batch.output_fields)
         self.log(
             "train_loss", loss, prog_bar=True, batch_size=batch.input_fields.shape[0]
@@ -54,3 +58,18 @@ def decode(self, z: TensorBMStarL) -> TensorBTSPlusC:
     def configure_optimizers(self):
         """Configure optimizers for training."""
         return torch.optim.Adam(self.parameters(), lr=self.learning_rate)
+
+
+class VAE(EncoderDecoder):
+    """Variational Autoencoder Model."""
+
+    def forward(self, batch: Batch) -> Tensor:
+        mu, log_var = self.encoder(batch)
+        z = self.reparametrize(mu, log_var)
+        x = self.decoder(z)
+        return x  # noqa: RET504
+
+    def reparametrize(self, mu: Tensor, log_var: Tensor) -> Tensor:
+        std = torch.exp(0.5 * log_var)
+        eps = torch.randn_like(std)
+        return mu + eps * std