Remove obsolete backbone kwargs, update config

sgreenbury · sgreenbury · commit 5e7f81d333e7 · 2025-12-11T18:02:47.000Z
diff --git a/configs/processor/flow_matching.yaml b/configs/processor/flow_matching.yaml
@@ -6,9 +6,19 @@ learning_rate: 0.0001
 flow_ode_steps: 4
 n_steps_output: null
 n_channels_out: null
-backbone_kwargs:
+backbone:
+  _target_: autocast.nn.unet.TemporalUNetBackbone
+  in_channels: null
+  out_channels: null
+  cond_channels: null
   mod_features: 256
-  hid_channels: [32, 64, 128]
-  hid_blocks: [2, 2, 2]
+  hid_channels:
+    _target_: builtins.tuple
+    _args_:
+      - [32, 64, 128]
+  hid_blocks:
+    _target_: builtins.tuple
+    _args_:
+      - [2, 2, 2]
   spatial: 2
   periodic: false
diff --git a/src/autocast/eval/processor.py b/src/autocast/eval/processor.py
@@ -30,7 +30,6 @@
 )
 from autocast.models.encoder_decoder import EncoderDecoder
 from autocast.models.encoder_processor_decoder import EncoderProcessorDecoder
-from autocast.processors.utils import initialize_flow_matching_backbone
 from autocast.train.configuration import (
     compose_training_config,
     configure_module_dimensions,
@@ -340,21 +339,12 @@ def _load_state_dict(checkpoint_path: Path) -> OrderedDict[str, torch.Tensor]:
 def _load_model(
     cfg: DictConfig,
     checkpoint_path: Path,
-    n_steps_input: int,
-    channel_count: int,
-    spatial_shape: Sequence[int],
 ) -> EncoderProcessorDecoder:
     model_cfg = cfg.get("model") or cfg
     encoder = instantiate(model_cfg.encoder)
     decoder = instantiate(model_cfg.decoder)
     encoder_decoder = EncoderDecoder(encoder=encoder, decoder=decoder)
     processor = instantiate(model_cfg.processor)
-    initialize_flow_matching_backbone(
-        processor,
-        n_steps_input,
-        channel_count,
-        spatial_shape,
-    )
     epd_cfg = model_cfg
     learning_rate = epd_cfg.get("learning_rate", 1e-3)
     training_cfg = cfg.get("training") or {}
@@ -486,7 +476,7 @@ def main() -> None:
         channel_count,
         inferred_n_steps_input,
         inferred_n_steps_output,
-        input_shape,
+        _,
         _,
     ) = prepare_datamodule(cfg)
 
@@ -500,13 +490,9 @@ def main() -> None:
 
     metrics = _build_metrics(args.metrics or ("mse", "rmse"))
 
-    spatial_shape = tuple(input_shape[2:-1])
     model = _load_model(
         cfg,
         args.checkpoint,
-        inferred_n_steps_input,
-        channel_count,
-        spatial_shape,
     )
     device = _resolve_device(args.device)
     model.to(device)
diff --git a/src/autocast/processors/flow_matching.py b/src/autocast/processors/flow_matching.py
@@ -3,10 +3,8 @@
 from typing import Any
 
 import torch
-from omegaconf import DictConfig, OmegaConf
 from torch import nn
 
-from autocast.nn.unet import TemporalUNetBackbone
 from autocast.processors.base import Processor
 from autocast.types import EncodedBatch, Tensor
 
@@ -29,7 +27,6 @@ def __init__(
         flow_ode_steps: int = 1,
         n_steps_output: int = 4,
         n_channels_out: int = 1,
-        backbone_kwargs: dict[str, Any] | DictConfig | None = None,
         **kwargs: Any,
     ) -> None:
         # Store core hyperparameters and optional prebuilt backbone.
@@ -47,37 +44,6 @@ def __init__(
         self.flow_ode_steps = max(flow_ode_steps, 1)
         self.n_steps_output = n_steps_output
         self.n_channels_out = n_channels_out
-        processed_kwargs: dict[str, Any] = {}
-        raw_kwargs: Any | None
-        if isinstance(backbone_kwargs, DictConfig):
-            raw_kwargs = OmegaConf.to_container(backbone_kwargs, resolve=True)
-        else:
-            raw_kwargs = backbone_kwargs
-        if isinstance(raw_kwargs, dict):
-            processed_kwargs = {str(k): v for k, v in raw_kwargs.items()}
-            for field in ("hid_channels", "hid_blocks"):
-                value = processed_kwargs.get(field)
-                if isinstance(value, list):
-                    processed_kwargs[field] = tuple(value)
-        self.backbone_kwargs = processed_kwargs
-
-    def _maybe_build_backbone(self, x: Tensor) -> None:
-        """Lazily build TemporalUNetBackbone when no model is provided."""
-        if self.flow_matching_model is not None:
-            return
-
-        # Infer in/out channels from configured temporal/channel counts.
-        t_in = x.shape[1]
-        c_in = x.shape[-1]
-        t_out = self.n_steps_output
-        c_out = self.n_channels_out
-
-        self.flow_matching_model = TemporalUNetBackbone(
-            in_channels=t_out * c_out,
-            out_channels=t_out * c_out,
-            cond_channels=t_in * c_in,
-            **self.backbone_kwargs,
-        )
 
     def flow_field(self, z: Tensor, t: Tensor, x: Tensor) -> Tensor:
         """Flow matching vector field.
@@ -94,7 +60,6 @@ def flow_field(self, z: Tensor, t: Tensor, x: Tensor) -> Tensor:
         -------
             Time derivative of output states with the same shape as `z`.
         """
-        self._maybe_build_backbone(x)
         assert self.flow_matching_model is not None  # for type checkers
         return self.flow_matching_model(z, t, x)
 
@@ -146,8 +111,6 @@ def loss(self, batch: EncodedBatch) -> Tensor:
             )
             raise ValueError(msg)
 
-        self._maybe_build_backbone(input_states)
-
         batch_size = target_states.shape[0]
 
         z0 = torch.randn_like(target_states, requires_grad=True)
diff --git a/src/autocast/processors/utils.py b/src/autocast/processors/utils.py
@@ -1,25 +1 @@
 from __future__ import annotations
-
-from collections.abc import Sequence
-
-import torch
-
-
-def initialize_flow_matching_backbone(
-    processor,
-    n_steps_input: int | None,
-    channel_count: int | None,
-    spatial_shape: Sequence[int] | None,
-) -> None:
-    """Instantiate the flow-matching backbone before optimizers are created."""
-    builder = getattr(processor, "_maybe_build_backbone", None)
-    has_model = getattr(processor, "flow_matching_model", None) is not None
-    if builder is None or has_model:
-        return
-    if n_steps_input is None or channel_count is None:
-        return
-    spatial = tuple(spatial_shape) if spatial_shape is not None else ()
-    dummy = torch.zeros(
-        (1, n_steps_input, *spatial, channel_count), dtype=torch.float32
-    )
-    builder(dummy)
diff --git a/src/autocast/train/processor.py b/src/autocast/train/processor.py
@@ -17,7 +17,6 @@
 from autocast.models.ae import AE, AELoss
 from autocast.models.encoder_decoder import EncoderDecoder
 from autocast.models.encoder_processor_decoder import EncoderProcessorDecoder
-from autocast.processors.utils import initialize_flow_matching_backbone
 from autocast.train.configuration import (
     compose_training_config,
     configure_module_dimensions,
@@ -245,13 +244,6 @@ def main() -> None:  # noqa: PLR0915
         _freeze_module(encoder_decoder.decoder)
 
     processor = instantiate(model_cfg.processor)
-    spatial_shape = tuple(input_shape[2:-1])
-    initialize_flow_matching_backbone(
-        processor,
-        inferred_n_steps_input,
-        channel_count,
-        spatial_shape,
-    )
 
     epd_cfg = model_cfg
     learning_rate = epd_cfg.get("learning_rate", 1e-3)