Add config to support different stride at train and eval

sgreenbury · sgreenbury · commit 8cf2e18c1d73 · 2025-12-12T14:06:00.000Z
diff --git a/configs/encoder_processor_decoder.yaml b/configs/encoder_processor_decoder.yaml
@@ -14,7 +14,9 @@ output:
 training:
   n_steps_input: 1
   n_steps_output: 4
-  stride: 4
+  train_stride: 1
+  eval_stride: 4
+  stride: 4  # Default stride for backward compatibility
   autoencoder_checkpoint: null
   freeze_autoencoder: false
 
diff --git a/configs/model/encoder_processor_decoder.yaml b/configs/model/encoder_processor_decoder.yaml
@@ -6,6 +6,8 @@ defaults:
 
 learning_rate: 0.0001
 train_processor_only: true
+stride: ${training.train_stride}
+eval_stride: ${training.eval_stride}
 teacher_forcing_ratio: 0.5
 max_rollout_steps: 10
 loss_func:
diff --git a/configs/processor/diffusion.yaml b/configs/processor/diffusion.yaml
@@ -13,7 +13,7 @@ schedule:
   _target_: azula.noise.VPSchedule
 denoiser_type: karras
 teacher_forcing_ratio: 0.0
-stride: ${training.stride}
+stride: ${training.train_stride}
 max_rollout_steps: ${training.n_steps_output}
 learning_rate: 0.0001
 n_steps_output: null
diff --git a/configs/processor/flow_matching.yaml b/configs/processor/flow_matching.yaml
@@ -1,5 +1,5 @@
 _target_: autocast.processors.flow_matching.FlowMatchingProcessor
-stride: ${training.stride}
+stride: ${training.train_stride}
 teacher_forcing_ratio: 0.0
 max_rollout_steps: ${training.n_steps_output}
 learning_rate: 0.0001
@@ -11,7 +11,7 @@ backbone:
   in_channels: null
   out_channels: null
   cond_channels: null
-  mod_features: 200
+  mod_features: 256
   hid_channels: [32, 64, 128]
   hid_blocks: [2, 2, 2]
   spatial: 2
diff --git a/configs/processor/fno.yaml b/configs/processor/fno.yaml
@@ -4,4 +4,7 @@ out_channels: null
 n_modes: [16, 16]
 hidden_channels: 64
 n_layers: 4
-learning_rate: 0.001
+learning_rate: 0.0001
+stride: ${training.train_stride}
+teacher_forcing_ratio: 0.0
+max_rollout_steps: ${training.n_steps_output}
diff --git a/notebooks/00_quickstart.ipynb b/notebooks/00_quickstart.ipynb
@@ -39,10 +39,11 @@
     "from autocast.data.datamodule import SpatioTemporalDataModule, TheWellDataModule\n",
     "from autocast.metrics.spatiotemporal import MAE, MSE, RMSE\n",
     "\n",
-    "THE_WELL = True\n",
+    "THE_WELL = False\n",
     "n_steps_input = 1\n",
     "n_steps_output = 4\n",
-    "stride = n_steps_output"
+    "train_stride = 1\n",
+    "eval_stride = 4"
    ]
   },
   {
@@ -62,9 +63,9 @@
    "source": [
     "\n",
     "if not THE_WELL:\n",
-    "    # simulation_name = \"reaction_diffusion\"\n",
+    "    simulation_name = \"reaction_diffusion\"\n",
     "    # simulation_name = \"advection_diffusion\"\n",
-    "    simulation_name = \"advection_diffusion_multichannel\"\n",
+    "    # simulation_name = \"advection_diffusion_multichannel\"\n",
     "\n",
     "    if simulation_name == \"advection_diffusion_multichannel\":\n",
     "        # Override to use multichannel version\n",
@@ -107,8 +108,8 @@
     "            pickle.dump(combined_data, f)\n",
     "\n",
     "    datamodule = SpatioTemporalDataModule(\n",
-    "        data=combined_data,\n",
-    "        data_path=None,\n",
+    "        # data=combined_data,\n",
+    "        data_path=\"../datasets/reaction_diffusion\",\n",
     "        n_steps_input=n_steps_input,\n",
     "        n_steps_output=n_steps_output,\n",
     "        stride=n_steps_output,\n",
@@ -221,7 +222,7 @@
     "        schedule=VPSchedule(),  # accepted for API parity, not used internally\n",
     "        n_steps_output=n_steps_output,\n",
     "        n_channels_out=n_channels,\n",
-    "        stride=n_steps_output,\n",
+    "        stride=train_stride,\n",
     "        flow_ode_steps=4,\n",
     "    )\n",
     "else:\n",
@@ -232,21 +233,18 @@
     "        schedule=VPSchedule(),\n",
     "        n_steps_output=n_steps_output,\n",
     "        n_channels_out=n_channels,\n",
-    "        stride=n_steps_output,\n",
     "    )\n",
     "\n",
     "encoder = IdentityEncoder()\n",
     "decoder = IdentityDecoder()\n",
     "model = EncoderProcessorDecoder(\n",
     "    encoder_decoder=EncoderDecoder(encoder=encoder, decoder=decoder),\n",
     "    processor=processor,\n",
-    "    stride=stride,\n",
     "    train_processor_only=True,\n",
-    "    # learning_rate=1e-5,\n",
     "    learning_rate=1e-4,\n",
-    "    #test_metrics = [MSE(), MAE(), RMSE()]\n",
+    "    test_metrics = [MSE(), MAE(), RMSE()]\n",
     ")\n",
-    "maybe_watch_model(logger, model, watch)"
+    "maybe_watch_model(logger, model, watch)\n"
    ]
   },
   {
@@ -341,21 +339,10 @@
    "id": "19",
    "metadata": {},
    "outputs": [],
-   "source": [
-    "# Set max rollout steps based on batch output shape\n",
-    "# model.max_rollout_steps = batch.output_fields.shape[1] // (n_steps_output * 2)\n",
-    "model.max_rollout_steps = 20"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "20",
-   "metadata": {},
-   "outputs": [],
    "source": [
     "# Run rollout on one trajectory\n",
-    "preds, trues = model.rollout(batch, free_running_only=True)\n",
+    "model.max_rollout_steps = 20\n",
+    "preds, trues = model.rollout(batch, stride=eval_stride, free_running_only=True)\n",
     "\n",
     "print(preds.shape)\n",
     "assert trues is not None\n",
@@ -365,7 +352,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "21",
+   "id": "20",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -374,7 +361,7 @@
     "assert trues is not None\n",
     "assert preds.shape == trues.shape\n",
     "mse = MSE()\n",
-    "mse_error_spatial = mse.score(preds, trues)\n",
+    "mse_error_spatial = mse(preds, trues)\n",
     "mse_error = mse(preds, trues)\n",
     "print(\"MSE spatial has shape (B,T,C):\", mse_error_spatial.shape)\n",
     "print(\"MSE overall is a single scalar:\", mse_error.shape)"
@@ -383,7 +370,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "22",
+   "id": "21",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -415,7 +402,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "23",
+   "id": "22",
    "metadata": {},
    "outputs": [],
    "source": []