Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions local_hydra/local_experiment/ae/ae_dc_large_128.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# @package _global_
defaults:
- /distributed: ddp_4gpu_slurm
- override /datamodule: advection_diffusion
- override /model: autoencoder_dc_large
- override /optimizer: psgd
- _self_

experiment_name: ae_dc_large

datamodule:
use_normalization: false
batch_size: 64


logging:
wandb:
enabled: false

optimizer:
learning_rate: 1e-5
weight_decay: 0.0
scheduler: cosine

trainer:
gradient_clip_val: 1.0
31 changes: 31 additions & 0 deletions local_hydra/local_experiment/ae/ae_dc_large_periodic_128.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# @package _global_
defaults:
- /distributed: ddp_4gpu_slurm
- override /datamodule: advection_diffusion
- override /model: autoencoder_dc_large
- override /optimizer: psgd
- _self_

experiment_name: ae_dc_large

datamodule:
use_normalization: false
batch_size: 64

model:
encoder:
periodic: true
decoder:
periodic: true

logging:
wandb:
enabled: false

optimizer:
learning_rate: 1e-5
weight_decay: 0.0
scheduler: cosine

trainer:
gradient_clip_val: 1.0
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# @package _global_
defaults:
- /distributed: single_gpu_slurm
- override /datamodule: advection_diffusion
- override /model: autoencoder_dc_large
- override /optimizer: psgd
- _self_

experiment_name: ae_dc_large

datamodule:
# use_normalization: false
# batch_size: 64
use_normalization: true
batch_size: 256

model:
encoder:
periodic: true
decoder:
periodic: true

logging:
wandb:
enabled: false

optimizer:
learning_rate: 1e-5
weight_decay: 0.0
scheduler: cosine

trainer:
gradient_clip_val: 1.0
34 changes: 34 additions & 0 deletions local_hydra/local_experiment/epd_crps_vit_azula_4gpu.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# @package _global_
defaults:
- /distributed: ddp_4gpu_slurm
- override /datamodule: advection_diffusion_multichannel_64_64
- override /encoder@model.encoder: permute_concat
- override /decoder@model.decoder: channels_last
# - override /processor@model.processor: vit_azula_large
#- override /input_noise_injector@model.input_noise_injector: concat
- _self_

experiment_name: epd_crps_vit_azula

datamodule:
use_normalization: true
batch_size: 32

logging:
wandb:
enabled: true

optimizer:
learning_rate: 0.0002

model:
train_in_latent_space: false
n_members: 10
encoder:
with_constants: true
loss_func:
_target_: autocast.losses.ensemble.CRPSLoss
train_metrics:
crps:
_target_: autocast.metrics.ensemble.CRPS

34 changes: 34 additions & 0 deletions local_hydra/local_experiment/epd_crps_vit_azula_afcrps.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# @package _global_
defaults:
- /distributed: single_gpu_slurm
- override /datamodule: advection_diffusion_multichannel_64_64
- override /encoder@model.encoder: permute_concat
- override /decoder@model.decoder: channels_last
# - override /processor@model.processor: vit_azula_large
#- override /input_noise_injector@model.input_noise_injector: concat
- _self_

experiment_name: epd_crps_vit_azula

datamodule:
use_normalization: true
batch_size: 32

logging:
wandb:
enabled: true

optimizer:
learning_rate: 0.0002

model:
train_in_latent_space: false
n_members: 10
encoder:
with_constants: true
loss_func:
_target_: autocast.losses.ensemble.AlphaFairCRPSLoss
train_metrics:
crps:
_target_: autocast.metrics.ensemble.CRPS

34 changes: 34 additions & 0 deletions local_hydra/local_experiment/epd_crps_vit_azula_concat.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# @package _global_
defaults:
- /distributed: single_gpu_slurm
- override /datamodule: advection_diffusion_multichannel_64_64
- override /encoder@model.encoder: permute_concat
- override /decoder@model.decoder: channels_last
# - override /processor@model.processor: vit_azula_large
- override /input_noise_injector@model.input_noise_injector: concat
- _self_

experiment_name: epd_crps_vit_azula

datamodule:
use_normalization: true
batch_size: 32

logging:
wandb:
enabled: true

optimizer:
learning_rate: 0.0002

model:
train_in_latent_space: false
n_members: 10
encoder:
with_constants: true
loss_func:
_target_: autocast.losses.ensemble.CRPSLoss
train_metrics:
crps:
_target_: autocast.metrics.ensemble.CRPS

34 changes: 34 additions & 0 deletions local_hydra/local_experiment/epd_crps_vit_azula_mae.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# @package _global_
defaults:
- /distributed: single_gpu_slurm
- override /datamodule: advection_diffusion_multichannel_64_64
- override /encoder@model.encoder: permute_concat
- override /decoder@model.decoder: channels_last
# - override /processor@model.processor: vit_azula_large
#- override /input_noise_injector@model.input_noise_injector: concat
- _self_

experiment_name: epd_crps_vit_azula

datamodule:
use_normalization: true
batch_size: 32

logging:
wandb:
enabled: true

optimizer:
learning_rate: 0.0002

model:
train_in_latent_space: false
n_members: 10
encoder:
with_constants: true
loss_func:
_target_: autocast.losses.ensemble.EnsembleMAELoss
train_metrics:
crps:
_target_: autocast.metrics.ensemble.CRPS

36 changes: 36 additions & 0 deletions local_hydra/local_experiment/epd_crps_vit_azula_n_noise_1024.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# @package _global_
defaults:
- /distributed: single_gpu_slurm
- override /datamodule: advection_diffusion_multichannel_64_64
- override /encoder@model.encoder: permute_concat
- override /decoder@model.decoder: channels_last
# - override /processor@model.processor: vit_azula_large
#- override /input_noise_injector@model.input_noise_injector: concat
- _self_

experiment_name: epd_crps_vit_azula

datamodule:
use_normalization: true
batch_size: 32

logging:
wandb:
enabled: true

optimizer:
learning_rate: 0.0002

model:
train_in_latent_space: false
n_members: 10
encoder:
with_constants: true
processor:
n_noise_channels: 1024
loss_func:
_target_: autocast.losses.ensemble.CRPSLoss
train_metrics:
crps:
_target_: autocast.metrics.ensemble.CRPS

7 changes: 5 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -116,13 +116,16 @@ explicit = true
[tool.uv.sources]
autoemulate = { git = "https://github.com/alan-turing-institute/autoemulate.git" }
torch = [
{ index = "pytorch-cu126", marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
{ index = "pytorch-cu126", marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
]
torchvision = [
{ index = "pytorch-cu126", marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
{ index = "pytorch-cu126", marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
]

[tool.pytest.ini_options]
testpaths = ["tests"]
pythonpath = ["tests"]
norecursedirs = ["outputs", "slurm_scripts"]
filterwarnings = [
# Ignore Lightning warnings that are expected/benign in test environment
"ignore:You are trying to `self.log\\(\\)` but the `self.trainer` reference is not registered:UserWarning",
Expand Down
10 changes: 10 additions & 0 deletions src/autocast/configs/datamodule/conditioned_navier_stokes_128.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
_target_: autocast.data.datamodule.SpatioTemporalDataModule
data_path: "${oc.env:AUTOCAST_DATASETS,./datasets}/128x128/conditioned_navier_stokes_2d_b106e4d"
batch_size: 16
n_steps_input: 1
n_steps_output: 4
stride: 1
verbose: false
use_normalization: false
normalization_path: ${.data_path}/stats.yml
num_workers: 0
2 changes: 1 addition & 1 deletion src/autocast/configs/datamodule/gpe_laser_only_wake.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
_target_: autocast.data.datamodule.SpatioTemporalDataModule
data_path: "${oc.env:AUTOCAST_DATASETS,./datasets}/gpe/laser_only_wake_9be0bfb"
data_path: "${oc.env:AUTOCAST_DATASETS,./datasets}/gpe/laser_only_wake_5b51eac"
batch_size: 16
n_steps_input: 1
n_steps_output: 4
Expand Down
10 changes: 10 additions & 0 deletions src/autocast/configs/datamodule/gpe_laser_only_wake_128.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
_target_: autocast.data.datamodule.SpatioTemporalDataModule
data_path: "${oc.env:AUTOCAST_DATASETS,./datasets}/128x128/gpe/laser_only_wake_f15bdbb"
batch_size: 16
n_steps_input: 1
n_steps_output: 4
stride: 1
verbose: false
use_normalization: false
normalization_path: ${.data_path}/stats.yml
num_workers: 0
10 changes: 10 additions & 0 deletions src/autocast/configs/datamodule/shallow_water2d_128.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
_target_: autocast.data.datamodule.SpatioTemporalDataModule
data_path: "${oc.env:AUTOCAST_DATASETS,./datasets}/128x128/shallow_water2d_433068d"
batch_size: 16
n_steps_input: 1
n_steps_output: 4
stride: 1
verbose: false
use_normalization: false
normalization_path: ${.data_path}/stats.yml
num_workers: 0
7 changes: 7 additions & 0 deletions src/autocast/configs/distributed/single_gpu_slurm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,13 @@ trainer:
strategy: auto
num_nodes: 1

hydra:
launcher:
gpus_per_node: 1
tasks_per_node: 1
additional_parameters:
ntasks: 1

eval:
accelerator: auto
devices: 1
11 changes: 11 additions & 0 deletions src/autocast/configs/optimizer/adamw.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,15 @@ learning_rate: 1e-4
weight_decay: 0.0
warmup: 0
scheduler: "cosine"
# Optional scheduler controls:
# scheduler_interval: "epoch" # "epoch" or "step"
# cosine_epochs: null # Used when scheduler_interval="epoch"
# cosine_steps: null # Used when scheduler_interval="step"
# cosine_t_max: 1 # Fallback horizon if trainer values are unavailable
# Horizon precedence (highest to lowest):
# 1) cosine_epochs/cosine_steps (based on scheduler_interval)
# 2) trainer max value (max_epochs or estimated_stepping_batches)
# 3) cosine_t_max
# min_lr_ratio: 0.0 # Final LR ratio in [0, 1]
# scheduler: "cosine_with_restarts"
grad_clip: 1
24 changes: 24 additions & 0 deletions src/autocast/configs/optimizer/adamw_half.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Compatibility with: https://github.com/francois-rozet/lola/blob/21a4354b327e6e5ee06da5075ba3bd1dd88c61f1/experiments/configs/optim/adamw.yaml
name: "${.optimizer}_${.learning_rate}_${.scheduler}_half"
optimizer: "adamw"
betas: [0.9, 0.99]
learning_rate: 1e-4
weight_decay: 0.0
scheduler: "cosine"

# lr_lambda(t) = (1 + cos(pi * t / epochs)) / 2
# cold_lr_lambda(t) = min(1, (t + 1) / (warmup + 1)) * lr_lambda(t)
scheduler_interval: "epoch"
cosine_epochs: ${trainer.max_epochs}

# Horizon precedence (highest to lowest):
# 1) Explicit interval key: cosine_epochs (epoch) or cosine_steps (step)
# 2) Trainer-derived value (max_epochs / estimated_stepping_batches)
# 3) cosine_t_max fallback
# In this file, cosine_epochs is set, so cosine_t_max is used only if that
# interpolation is unavailable.
cosine_t_max: 130

warmup: 0
min_lr_ratio: 0.0
grad_clip: 1
Loading
Loading