Merge pull request #101 from alan-turing-institute/77-flow-matching-and-diffusion-config-remove-backbone-kwargs

sgreenbury · web-flow · commit 6fe4886f27ff · 2025-12-12T09:52:32.000Z
Remove obsolete backbone kwargs, update config (#77)
diff --git a/configs/data/reaction_diffusion.yaml b/configs/data/reaction_diffusion.yaml
@@ -1,5 +1,5 @@
-data_path: null
-use_simulator: true
+data_path: ./datasets/reaction_diffusion/
+use_simulator: false
 split:
   n_train: 4
   n_valid: 2
diff --git a/configs/processor/flow_matching.yaml b/configs/processor/flow_matching.yaml
@@ -6,7 +6,11 @@ learning_rate: 0.0001
 flow_ode_steps: 4
 n_steps_output: null
 n_channels_out: null
-backbone_kwargs:
+backbone:
+  _target_: autocast.nn.unet.TemporalUNetBackbone
+  in_channels: null
+  out_channels: null
+  cond_channels: null
   mod_features: 256
   hid_channels: [32, 64, 128]
   hid_blocks: [2, 2, 2]
diff --git a/src/autocast/data/dataset.py b/src/autocast/data/dataset.py
@@ -180,6 +180,8 @@ def _from_f(self, f):
         self.constant_scalars = (
             torch.Tensor(f["constant_scalars"][:]).to(self.dtype)  # type: ignore  # noqa: PGH003
             if "constant_scalars" in f
+            and f["constant_scalars"] is not None
+            and f["constant_scalars"] != {}
             else None
         )  # [N, C]
 
@@ -188,7 +190,9 @@ def _from_f(self, f):
             torch.Tensor(f["constant_fields"][:]).to(  # type: ignore # noqa: PGH003
                 self.dtype
             )  # [N, W, H, C]
-            if "constant_fields" in f and f["constant_fields"] != {}
+            if "constant_fields" in f
+            and f["constant_fields"] is not None
+            and f["constant_fields"] != {}
             else None
         )
 
diff --git a/src/autocast/eval/processor.py b/src/autocast/eval/processor.py
@@ -30,7 +30,6 @@
 )
 from autocast.models.encoder_decoder import EncoderDecoder
 from autocast.models.encoder_processor_decoder import EncoderProcessorDecoder
-from autocast.processors.utils import initialize_flow_matching_backbone
 from autocast.train.configuration import (
     compose_training_config,
     configure_module_dimensions,
@@ -340,21 +339,12 @@ def _load_state_dict(checkpoint_path: Path) -> OrderedDict[str, torch.Tensor]:
 def _load_model(
     cfg: DictConfig,
     checkpoint_path: Path,
-    n_steps_input: int,
-    channel_count: int,
-    spatial_shape: Sequence[int],
 ) -> EncoderProcessorDecoder:
     model_cfg = cfg.get("model") or cfg
     encoder = instantiate(model_cfg.encoder)
     decoder = instantiate(model_cfg.decoder)
     encoder_decoder = EncoderDecoder(encoder=encoder, decoder=decoder)
     processor = instantiate(model_cfg.processor)
-    initialize_flow_matching_backbone(
-        processor,
-        n_steps_input,
-        channel_count,
-        spatial_shape,
-    )
     epd_cfg = model_cfg
     learning_rate = epd_cfg.get("learning_rate", 1e-3)
     training_cfg = cfg.get("training") or {}
@@ -486,7 +476,7 @@ def main() -> None:
         channel_count,
         inferred_n_steps_input,
         inferred_n_steps_output,
-        input_shape,
+        _,
         _,
     ) = prepare_datamodule(cfg)
 
@@ -500,13 +490,9 @@ def main() -> None:
 
     metrics = _build_metrics(args.metrics or ("mse", "rmse"))
 
-    spatial_shape = tuple(input_shape[2:-1])
     model = _load_model(
         cfg,
         args.checkpoint,
-        inferred_n_steps_input,
-        channel_count,
-        spatial_shape,
     )
     device = _resolve_device(args.device)
     model.to(device)
diff --git a/src/autocast/nn/unet.py b/src/autocast/nn/unet.py
@@ -1,3 +1,5 @@
+from collections.abc import Sequence
+
 from azula.nn.embedding import SineEncoding
 from azula.nn.unet import UNet
 from einops import rearrange
@@ -15,8 +17,8 @@ def __init__(
         out_channels: int = 1,
         cond_channels: int = 1,
         mod_features: int = 256,
-        hid_channels: tuple = (32, 64, 128),
-        hid_blocks: tuple = (2, 2, 2),
+        hid_channels: Sequence[int] = (32, 64, 128),
+        hid_blocks: Sequence[int] = (2, 2, 2),
         spatial: int = 2,
         periodic: bool = False,
     ):
diff --git a/src/autocast/processors/flow_matching.py b/src/autocast/processors/flow_matching.py
@@ -3,10 +3,8 @@
 from typing import Any
 
 import torch
-from omegaconf import DictConfig, OmegaConf
 from torch import nn
 
-from autocast.nn.unet import TemporalUNetBackbone
 from autocast.processors.base import Processor
 from autocast.types import EncodedBatch, Tensor
 
@@ -17,8 +15,7 @@ class FlowMatchingProcessor(Processor):
     def __init__(
         self,
         *,
-        flow_matching_model: nn.Module | None = None,
-        backbone: nn.Module | None = None,
+        backbone: nn.Module,
         schedule: Any | None = None,
         denoiser_type: str | None = None,
         stride: int = 1,
@@ -29,7 +26,6 @@ def __init__(
         flow_ode_steps: int = 1,
         n_steps_output: int = 4,
         n_channels_out: int = 1,
-        backbone_kwargs: dict[str, Any] | DictConfig | None = None,
         **kwargs: Any,
     ) -> None:
         # Store core hyperparameters and optional prebuilt backbone.
@@ -40,44 +36,13 @@ def __init__(
             loss_func=loss_func or nn.MSELoss(),
             **kwargs,
         )
-        self.flow_matching_model = flow_matching_model or backbone
+        self.flow_matching_model = backbone
         self.schedule = schedule  # accepted for API compatibility
         self.denoiser_type = denoiser_type
         self.learning_rate = learning_rate
         self.flow_ode_steps = max(flow_ode_steps, 1)
         self.n_steps_output = n_steps_output
         self.n_channels_out = n_channels_out
-        processed_kwargs: dict[str, Any] = {}
-        raw_kwargs: Any | None
-        if isinstance(backbone_kwargs, DictConfig):
-            raw_kwargs = OmegaConf.to_container(backbone_kwargs, resolve=True)
-        else:
-            raw_kwargs = backbone_kwargs
-        if isinstance(raw_kwargs, dict):
-            processed_kwargs = {str(k): v for k, v in raw_kwargs.items()}
-            for field in ("hid_channels", "hid_blocks"):
-                value = processed_kwargs.get(field)
-                if isinstance(value, list):
-                    processed_kwargs[field] = tuple(value)
-        self.backbone_kwargs = processed_kwargs
-
-    def _maybe_build_backbone(self, x: Tensor) -> None:
-        """Lazily build TemporalUNetBackbone when no model is provided."""
-        if self.flow_matching_model is not None:
-            return
-
-        # Infer in/out channels from configured temporal/channel counts.
-        t_in = x.shape[1]
-        c_in = x.shape[-1]
-        t_out = self.n_steps_output
-        c_out = self.n_channels_out
-
-        self.flow_matching_model = TemporalUNetBackbone(
-            in_channels=t_out * c_out,
-            out_channels=t_out * c_out,
-            cond_channels=t_in * c_in,
-            **self.backbone_kwargs,
-        )
 
     def flow_field(self, z: Tensor, t: Tensor, x: Tensor) -> Tensor:
         """Flow matching vector field.
@@ -94,8 +59,6 @@ def flow_field(self, z: Tensor, t: Tensor, x: Tensor) -> Tensor:
         -------
             Time derivative of output states with the same shape as `z`.
         """
-        self._maybe_build_backbone(x)
-        assert self.flow_matching_model is not None  # for type checkers
         return self.flow_matching_model(z, t, x)
 
     def forward(self, x: Tensor) -> Tensor:
@@ -146,8 +109,6 @@ def loss(self, batch: EncodedBatch) -> Tensor:
             )
             raise ValueError(msg)
 
-        self._maybe_build_backbone(input_states)
-
         batch_size = target_states.shape[0]
 
         z0 = torch.randn_like(target_states, requires_grad=True)
diff --git a/src/autocast/processors/utils.py b/src/autocast/processors/utils.py
@@ -1,25 +1 @@
 from __future__ import annotations
-
-from collections.abc import Sequence
-
-import torch
-
-
-def initialize_flow_matching_backbone(
-    processor,
-    n_steps_input: int | None,
-    channel_count: int | None,
-    spatial_shape: Sequence[int] | None,
-) -> None:
-    """Instantiate the flow-matching backbone before optimizers are created."""
-    builder = getattr(processor, "_maybe_build_backbone", None)
-    has_model = getattr(processor, "flow_matching_model", None) is not None
-    if builder is None or has_model:
-        return
-    if n_steps_input is None or channel_count is None:
-        return
-    spatial = tuple(spatial_shape) if spatial_shape is not None else ()
-    dummy = torch.zeros(
-        (1, n_steps_input, *spatial, channel_count), dtype=torch.float32
-    )
-    builder(dummy)
diff --git a/src/autocast/train/processor.py b/src/autocast/train/processor.py
@@ -17,7 +17,6 @@
 from autocast.models.ae import AE, AELoss
 from autocast.models.encoder_decoder import EncoderDecoder
 from autocast.models.encoder_processor_decoder import EncoderProcessorDecoder
-from autocast.processors.utils import initialize_flow_matching_backbone
 from autocast.train.configuration import (
     compose_training_config,
     configure_module_dimensions,
@@ -245,13 +244,6 @@ def main() -> None:  # noqa: PLR0915
         _freeze_module(encoder_decoder.decoder)
 
     processor = instantiate(model_cfg.processor)
-    spatial_shape = tuple(input_shape[2:-1])
-    initialize_flow_matching_backbone(
-        processor,
-        inferred_n_steps_input,
-        channel_count,
-        spatial_shape,
-    )
 
     epd_cfg = model_cfg
     learning_rate = epd_cfg.get("learning_rate", 1e-3)
diff --git a/tests/processors/test_flow_matching.py b/tests/processors/test_flow_matching.py
@@ -106,7 +106,7 @@ def test_flow_matching_processor(
     encoded_batch = next(iter(encoded_loader))
 
     processor = FlowMatchingProcessor(
-        flow_matching_model=TemporalUNetBackbone(
+        backbone=TemporalUNetBackbone(
             in_channels=n_steps_output * n_channels_out,
             out_channels=n_steps_output * n_channels_out,
             cond_channels=n_steps_input * n_channels_in,