alan-turing-institute · sgreenbury · Apr 2, 2026 · Mar 28, 2026 · Mar 28, 2026 · Mar 28, 2026
diff --git a/...l_hydra/local_experiment/ae_dc_large.yaml → ...ydra/local_experiment/ae/ae_dc_large.yaml b/...l_hydra/local_experiment/ae_dc_large.yaml → ...ydra/local_experiment/ae/ae_dc_large.yaml
diff --git a/local_hydra/local_experiment/ae/ae_dc_large_128.yaml b/local_hydra/local_experiment/ae/ae_dc_large_128.yaml
@@ -0,0 +1,26 @@
+# @package _global_
+defaults:
+  - /distributed: ddp_4gpu_slurm
+  - override /datamodule: advection_diffusion
+  - override /model: autoencoder_dc_large
+  - override /optimizer: psgd
+  - _self_
+
+experiment_name: ae_dc_large
+
+datamodule:
+  use_normalization: false
+  batch_size: 64
+
+
+logging:
+  wandb:
+    enabled: false
+
+optimizer:
+  learning_rate: 1e-5
+  weight_decay: 0.0
+  scheduler: cosine
+
+trainer:
+  gradient_clip_val: 1.0
diff --git a/...ocal_experiment/ae_dc_large_periodic.yaml → ...l_experiment/ae/ae_dc_large_periodic.yaml b/...ocal_experiment/ae_dc_large_periodic.yaml → ...l_experiment/ae/ae_dc_large_periodic.yaml
diff --git a/local_hydra/local_experiment/ae/ae_dc_large_periodic_128.yaml b/local_hydra/local_experiment/ae/ae_dc_large_periodic_128.yaml
@@ -0,0 +1,31 @@
+# @package _global_
+defaults:
+  - /distributed: ddp_4gpu_slurm
+  - override /datamodule: advection_diffusion
+  - override /model: autoencoder_dc_large
+  - override /optimizer: psgd
+  - _self_
+
+experiment_name: ae_dc_large
+
+datamodule:
+  use_normalization: false
+  batch_size: 64
+
+model:
+  encoder:
+    periodic: true
+  decoder:
+    periodic: true
+
+logging:
+  wandb:
+    enabled: false
+
+optimizer:
+  learning_rate: 1e-5
+  weight_decay: 0.0
+  scheduler: cosine
+
+trainer:
+  gradient_clip_val: 1.0
diff --git a/local_hydra/local_experiment/ae/ae_dc_large_periodic_single_gpu.yaml b/local_hydra/local_experiment/ae/ae_dc_large_periodic_single_gpu.yaml
@@ -0,0 +1,33 @@
+# @package _global_
+defaults:
+  - /distributed: single_gpu_slurm
+  - override /datamodule: advection_diffusion
+  - override /model: autoencoder_dc_large
+  - override /optimizer: psgd
+  - _self_
+
+experiment_name: ae_dc_large
+
+datamodule:
+  # use_normalization: false
+  # batch_size: 64
+  use_normalization: true
+  batch_size: 256
+
+model:
+  encoder:
+    periodic: true
+  decoder:
+    periodic: true
+
+logging:
+  wandb:
+    enabled: false
+
+optimizer:
+  learning_rate: 1e-5
+  weight_decay: 0.0
+  scheduler: cosine
+
+trainer:
+  gradient_clip_val: 1.0
diff --git a/local_hydra/local_experiment/epd_crps_vit_azula_4gpu.yaml b/local_hydra/local_experiment/epd_crps_vit_azula_4gpu.yaml
@@ -0,0 +1,34 @@
+# @package _global_
+defaults:
+  - /distributed: ddp_4gpu_slurm
+  - override /datamodule: advection_diffusion_multichannel_64_64
+  - override /encoder@model.encoder: permute_concat
+  - override /decoder@model.decoder: channels_last
+  # - override /processor@model.processor: vit_azula_large
+  #- override /input_noise_injector@model.input_noise_injector: concat
+  - _self_
+
+experiment_name: epd_crps_vit_azula
+
+datamodule:
+  use_normalization: true
+  batch_size: 32
+
+logging:
+  wandb:
+    enabled: true
+
+optimizer:
+  learning_rate: 0.0002
+
+model:
+  train_in_latent_space: false
+  n_members: 10
+  encoder:
+    with_constants: true
+  loss_func:
+    _target_: autocast.losses.ensemble.CRPSLoss
+  train_metrics:
+    crps:
+      _target_: autocast.metrics.ensemble.CRPS
+
diff --git a/local_hydra/local_experiment/epd_crps_vit_azula_afcrps.yaml b/local_hydra/local_experiment/epd_crps_vit_azula_afcrps.yaml
@@ -0,0 +1,34 @@
+# @package _global_
+defaults:
+  - /distributed: single_gpu_slurm
+  - override /datamodule: advection_diffusion_multichannel_64_64
+  - override /encoder@model.encoder: permute_concat
+  - override /decoder@model.decoder: channels_last
+  # - override /processor@model.processor: vit_azula_large
+  #- override /input_noise_injector@model.input_noise_injector: concat
+  - _self_
+
+experiment_name: epd_crps_vit_azula
+
+datamodule:
+  use_normalization: true
+  batch_size: 32
+
+logging:
+  wandb:
+    enabled: true
+
+optimizer:
+  learning_rate: 0.0002
+
+model:
+  train_in_latent_space: false
+  n_members: 10
+  encoder:
+    with_constants: true
+  loss_func:
+    _target_: autocast.losses.ensemble.AlphaFairCRPSLoss
+  train_metrics:
+    crps:
+      _target_: autocast.metrics.ensemble.CRPS
+
diff --git a/local_hydra/local_experiment/epd_crps_vit_azula_concat.yaml b/local_hydra/local_experiment/epd_crps_vit_azula_concat.yaml
@@ -0,0 +1,34 @@
+# @package _global_
+defaults:
+  - /distributed: single_gpu_slurm
+  - override /datamodule: advection_diffusion_multichannel_64_64
+  - override /encoder@model.encoder: permute_concat
+  - override /decoder@model.decoder: channels_last
+  # - override /processor@model.processor: vit_azula_large
+  - override /input_noise_injector@model.input_noise_injector: concat
+  - _self_
+
+experiment_name: epd_crps_vit_azula
+
+datamodule:
+  use_normalization: true
+  batch_size: 32
+
+logging:
+  wandb:
+    enabled: true
+
+optimizer:
+  learning_rate: 0.0002
+
+model:
+  train_in_latent_space: false
+  n_members: 10
+  encoder:
+    with_constants: true
+  loss_func:
+    _target_: autocast.losses.ensemble.CRPSLoss
+  train_metrics:
+    crps:
+      _target_: autocast.metrics.ensemble.CRPS
+
diff --git a/local_hydra/local_experiment/epd_crps_vit_azula_mae.yaml b/local_hydra/local_experiment/epd_crps_vit_azula_mae.yaml
@@ -0,0 +1,34 @@
+# @package _global_
+defaults:
+  - /distributed: single_gpu_slurm
+  - override /datamodule: advection_diffusion_multichannel_64_64
+  - override /encoder@model.encoder: permute_concat
+  - override /decoder@model.decoder: channels_last
+  # - override /processor@model.processor: vit_azula_large
+  #- override /input_noise_injector@model.input_noise_injector: concat
+  - _self_
+
+experiment_name: epd_crps_vit_azula
+
+datamodule:
+  use_normalization: true
+  batch_size: 32
+
+logging:
+  wandb:
+    enabled: true
+
+optimizer:
+  learning_rate: 0.0002
+
+model:
+  train_in_latent_space: false
+  n_members: 10
+  encoder:
+    with_constants: true
+  loss_func:
+    _target_: autocast.losses.ensemble.EnsembleMAELoss
+  train_metrics:
+    crps:
+      _target_: autocast.metrics.ensemble.CRPS
+
diff --git a/local_hydra/local_experiment/epd_crps_vit_azula_n_noise_1024.yaml b/local_hydra/local_experiment/epd_crps_vit_azula_n_noise_1024.yaml
@@ -0,0 +1,36 @@
+# @package _global_
+defaults:
+  - /distributed: single_gpu_slurm
+  - override /datamodule: advection_diffusion_multichannel_64_64
+  - override /encoder@model.encoder: permute_concat
+  - override /decoder@model.decoder: channels_last
+  # - override /processor@model.processor: vit_azula_large
+  #- override /input_noise_injector@model.input_noise_injector: concat
+  - _self_
+
+experiment_name: epd_crps_vit_azula
+
+datamodule:
+  use_normalization: true
+  batch_size: 32
+
+logging:
+  wandb:
+    enabled: true
+
+optimizer:
+  learning_rate: 0.0002
+
+model:
+  train_in_latent_space: false
+  n_members: 10
+  encoder:
+    with_constants: true
+  processor:
+    n_noise_channels: 1024
+  loss_func:
+    _target_: autocast.losses.ensemble.CRPSLoss
+  train_metrics:
+    crps:
+      _target_: autocast.metrics.ensemble.CRPS
+
diff --git a/pyproject.toml b/pyproject.toml
@@ -116,13 +116,16 @@ explicit = true
 [tool.uv.sources]
 autoemulate = { git = "https://github.com/alan-turing-institute/autoemulate.git" }
 torch = [
-  { index = "pytorch-cu126", marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
+    { index = "pytorch-cu126", marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
 ]
 torchvision = [
-  { index = "pytorch-cu126", marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
+    { index = "pytorch-cu126", marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
 ]
 
 [tool.pytest.ini_options]
+testpaths = ["tests"]
+pythonpath = ["tests"]
+norecursedirs = ["outputs", "slurm_scripts"]
 filterwarnings = [
     # Ignore Lightning warnings that are expected/benign in test environment
     "ignore:You are trying to `self.log\\(\\)` but the `self.trainer` reference is not registered:UserWarning",

diff --git a/src/autocast/configs/datamodule/conditioned_navier_stokes_128.yaml b/src/autocast/configs/datamodule/conditioned_navier_stokes_128.yaml
@@ -0,0 +1,10 @@
+_target_: autocast.data.datamodule.SpatioTemporalDataModule
+data_path: "${oc.env:AUTOCAST_DATASETS,./datasets}/128x128/conditioned_navier_stokes_2d_b106e4d"
+batch_size: 16
+n_steps_input: 1
+n_steps_output: 4
+stride: 1
+verbose: false
+use_normalization: false
+normalization_path: ${.data_path}/stats.yml
+num_workers: 0
diff --git a/src/autocast/configs/datamodule/gpe_laser_only_wake.yaml b/src/autocast/configs/datamodule/gpe_laser_only_wake.yaml
@@ -1,5 +1,5 @@
 _target_: autocast.data.datamodule.SpatioTemporalDataModule
-data_path: "${oc.env:AUTOCAST_DATASETS,./datasets}/gpe/laser_only_wake_9be0bfb"
+data_path: "${oc.env:AUTOCAST_DATASETS,./datasets}/gpe/laser_only_wake_5b51eac"
 batch_size: 16
 n_steps_input: 1
 n_steps_output: 4

diff --git a/src/autocast/configs/datamodule/gpe_laser_only_wake_128.yaml b/src/autocast/configs/datamodule/gpe_laser_only_wake_128.yaml
@@ -0,0 +1,10 @@
+_target_: autocast.data.datamodule.SpatioTemporalDataModule
+data_path: "${oc.env:AUTOCAST_DATASETS,./datasets}/128x128/gpe/laser_only_wake_f15bdbb"
+batch_size: 16
+n_steps_input: 1
+n_steps_output: 4
+stride: 1
+verbose: false
+use_normalization: false
+normalization_path: ${.data_path}/stats.yml
+num_workers: 0
diff --git a/src/autocast/configs/datamodule/shallow_water2d_128.yaml b/src/autocast/configs/datamodule/shallow_water2d_128.yaml
@@ -0,0 +1,10 @@
+_target_: autocast.data.datamodule.SpatioTemporalDataModule
+data_path: "${oc.env:AUTOCAST_DATASETS,./datasets}/128x128/shallow_water2d_433068d"
+batch_size: 16
+n_steps_input: 1
+n_steps_output: 4
+stride: 1
+verbose: false
+use_normalization: false
+normalization_path: ${.data_path}/stats.yml
+num_workers: 0
diff --git a/src/autocast/configs/distributed/single_gpu_slurm.yaml b/src/autocast/configs/distributed/single_gpu_slurm.yaml
@@ -10,6 +10,13 @@ trainer:
   strategy: auto
   num_nodes: 1
 
+hydra:
+  launcher:
+    gpus_per_node: 1
+    tasks_per_node: 1
+    additional_parameters:
+      ntasks: 1
+
 eval:
   accelerator: auto
   devices: 1
diff --git a/src/autocast/configs/optimizer/adamw.yaml b/src/autocast/configs/optimizer/adamw.yaml
@@ -5,4 +5,15 @@ learning_rate: 1e-4
 weight_decay: 0.0
 warmup: 0
 scheduler: "cosine"
+# Optional scheduler controls:
+# scheduler_interval: "epoch"     # "epoch" or "step"
+# cosine_epochs: null              # Used when scheduler_interval="epoch"
+# cosine_steps: null               # Used when scheduler_interval="step"
+# cosine_t_max: 1                  # Fallback horizon if trainer values are unavailable
+# Horizon precedence (highest to lowest):
+# 1) cosine_epochs/cosine_steps (based on scheduler_interval)
+# 2) trainer max value (max_epochs or estimated_stepping_batches)
+# 3) cosine_t_max
+# min_lr_ratio: 0.0                # Final LR ratio in [0, 1]
+# scheduler: "cosine_with_restarts"
 grad_clip: 1
diff --git a/src/autocast/configs/optimizer/adamw_half.yaml b/src/autocast/configs/optimizer/adamw_half.yaml
@@ -0,0 +1,24 @@
+# Compatibility with: https://github.com/francois-rozet/lola/blob/21a4354b327e6e5ee06da5075ba3bd1dd88c61f1/experiments/configs/optim/adamw.yaml
+name: "${.optimizer}_${.learning_rate}_${.scheduler}_half"
+optimizer: "adamw"
+betas: [0.9, 0.99]
+learning_rate: 1e-4
+weight_decay: 0.0
+scheduler: "cosine"
+
+# lr_lambda(t) = (1 + cos(pi * t / epochs)) / 2
+# cold_lr_lambda(t) = min(1, (t + 1) / (warmup + 1)) * lr_lambda(t)
+scheduler_interval: "epoch"
+cosine_epochs: ${trainer.max_epochs}
+
+# Horizon precedence (highest to lowest):
+# 1) Explicit interval key: cosine_epochs (epoch) or cosine_steps (step)
+# 2) Trainer-derived value (max_epochs / estimated_stepping_batches)
+# 3) cosine_t_max fallback
+# In this file, cosine_epochs is set, so cosine_t_max is used only if that
+# interpolation is unavailable.
+cosine_t_max: 130
+
+warmup: 0
+min_lr_ratio: 0.0
+grad_clip: 1