diff --git a/local_hydra/local_experiment/ablations/arch_unet_fno_vit/conditioned_navier_stokes/crps_unet_azula_80m.yaml b/local_hydra/local_experiment/ablations/arch_unet_fno_vit/conditioned_navier_stokes/crps_unet_azula_80m.yaml new file mode 100644 index 00000000..611f80cf --- /dev/null +++ b/local_hydra/local_experiment/ablations/arch_unet_fno_vit/conditioned_navier_stokes/crps_unet_azula_80m.yaml @@ -0,0 +1,69 @@ +# @package _global_ +defaults: + - /distributed: ddp_4gpu_slurm + - override /datamodule: conditioned_navier_stokes + - override /encoder@model.encoder: permute_concat + - override /decoder@model.decoder: channels_last + - override /processor@model.processor: unet_azula_large + - override /optimizer: adamw_half + - _self_ + +experiment_name: ablation_arch_crps_unet_azula_80m_conditioned_navier_stokes + +datamodule: + use_normalization: true + batch_size: 32 + +float32_matmul_precision: high + +logging: + wandb: + enabled: true + +output: + skip_test: true + +optimizer: + learning_rate: 2e-4 + warmup: 0 + +model: + train_in_latent_space: false + n_members: 8 + encoder: + with_constants: true + processor: + # Both backbones at their Azula-canonical FFN ratios (UNet ffn_factor=1, + # ViT ffn_factor=4). hid_channels follows the canonical 1:2:4:8 doubling; + # base=62 lands at ~81.3M processor params, matching the 81.0M CRPS ViT + # baseline within 0.3%. periodic=false matches CNS Neumann BCs. + hid_channels: [62, 124, 248, 496] + hid_blocks: [3, 3, 3, 3] + norm: layer + ffn_factor: 1 + dropout: 0.0 + periodic: false + gradient_checkpointing: false + n_noise_channels: 1024 + loss_func: + _target_: autocast.losses.ensemble.AlphaFairCRPSLoss + train_metrics: + afcrps: + _target_: autocast.metrics.ensemble.AlphaFairCRPS + afcrps_mae_term: + _target_: autocast.metrics.ensemble.AlphaFairCRPSMAETerm + afcrps_spread_term: + _target_: autocast.metrics.ensemble.AlphaFairCRPSSpreadTerm + val_metrics: + afcrps: + _target_: autocast.metrics.ensemble.AlphaFairCRPS + afcrps_mae_term: + _target_: autocast.metrics.ensemble.AlphaFairCRPSMAETerm + afcrps_spread_term: + _target_: autocast.metrics.ensemble.AlphaFairCRPSSpreadTerm + spread: + _target_: autocast.metrics.ensemble.EnsembleSpread + multicoverage: + _target_: autocast.metrics.MultiCoverage + multiwinkler: + _target_: autocast.metrics.ensemble.MultiWinkler diff --git a/local_hydra/local_experiment/ablations/crps_variants/conditioned_navier_stokes/crps_vit_fair.yaml b/local_hydra/local_experiment/ablations/crps_variants/conditioned_navier_stokes/crps_vit_fair.yaml new file mode 100644 index 00000000..14b22fef --- /dev/null +++ b/local_hydra/local_experiment/ablations/crps_variants/conditioned_navier_stokes/crps_vit_fair.yaml @@ -0,0 +1,61 @@ +# @package _global_ +defaults: + - /distributed: ddp_4gpu_slurm + - override /datamodule: conditioned_navier_stokes + - override /encoder@model.encoder: permute_concat + - override /decoder@model.decoder: channels_last + - override /processor@model.processor: vit_azula_large + - override /optimizer: adamw_half + - _self_ + +experiment_name: ablation_crps_variant_fair_conditioned_navier_stokes + +datamodule: + use_normalization: true + batch_size: 32 + +float32_matmul_precision: high + +logging: + wandb: + enabled: true + +output: + skip_test: true + +optimizer: + learning_rate: 2e-4 + warmup: 0 + +model: + train_in_latent_space: false + n_members: 8 + encoder: + with_constants: true + processor: + hidden_dim: 568 + num_heads: 8 + n_layers: 12 + n_noise_channels: 1024 + loss_func: + _target_: autocast.losses.ensemble.FairCRPSLoss + train_metrics: + fcrps: + _target_: autocast.metrics.ensemble.FairCRPS + fcrps_mae_term: + _target_: autocast.metrics.ensemble.FairCRPSMAETerm + fcrps_spread_term: + _target_: autocast.metrics.ensemble.FairCRPSSpreadTerm + val_metrics: + fcrps: + _target_: autocast.metrics.ensemble.FairCRPS + fcrps_mae_term: + _target_: autocast.metrics.ensemble.FairCRPSMAETerm + fcrps_spread_term: + _target_: autocast.metrics.ensemble.FairCRPSSpreadTerm + spread: + _target_: autocast.metrics.ensemble.EnsembleSpread + multicoverage: + _target_: autocast.metrics.MultiCoverage + multiwinkler: + _target_: autocast.metrics.ensemble.MultiWinkler diff --git a/local_hydra/local_experiment/ablations/crps_variants/conditioned_navier_stokes/crps_vit_plain.yaml b/local_hydra/local_experiment/ablations/crps_variants/conditioned_navier_stokes/crps_vit_plain.yaml new file mode 100644 index 00000000..66c54633 --- /dev/null +++ b/local_hydra/local_experiment/ablations/crps_variants/conditioned_navier_stokes/crps_vit_plain.yaml @@ -0,0 +1,61 @@ +# @package _global_ +defaults: + - /distributed: ddp_4gpu_slurm + - override /datamodule: conditioned_navier_stokes + - override /encoder@model.encoder: permute_concat + - override /decoder@model.decoder: channels_last + - override /processor@model.processor: vit_azula_large + - override /optimizer: adamw_half + - _self_ + +experiment_name: ablation_crps_variant_plain_conditioned_navier_stokes + +datamodule: + use_normalization: true + batch_size: 32 + +float32_matmul_precision: high + +logging: + wandb: + enabled: true + +output: + skip_test: true + +optimizer: + learning_rate: 2e-4 + warmup: 0 + +model: + train_in_latent_space: false + n_members: 8 + encoder: + with_constants: true + processor: + hidden_dim: 568 + num_heads: 8 + n_layers: 12 + n_noise_channels: 1024 + loss_func: + _target_: autocast.losses.ensemble.CRPSLoss + train_metrics: + crps: + _target_: autocast.metrics.ensemble.CRPS + crps_mae_term: + _target_: autocast.metrics.ensemble.CRPSMAETerm + crps_spread_term: + _target_: autocast.metrics.ensemble.CRPSSpreadTerm + val_metrics: + crps: + _target_: autocast.metrics.ensemble.CRPS + crps_mae_term: + _target_: autocast.metrics.ensemble.CRPSMAETerm + crps_spread_term: + _target_: autocast.metrics.ensemble.CRPSSpreadTerm + spread: + _target_: autocast.metrics.ensemble.EnsembleSpread + multicoverage: + _target_: autocast.metrics.MultiCoverage + multiwinkler: + _target_: autocast.metrics.ensemble.MultiWinkler diff --git a/local_hydra/local_experiment/ablations/fm_vs_diffusion/conditioned_navier_stokes/diffusion_vit_large.yaml b/local_hydra/local_experiment/ablations/fm_vs_diffusion/conditioned_navier_stokes/diffusion_vit_large.yaml new file mode 100644 index 00000000..f5aa0ae8 --- /dev/null +++ b/local_hydra/local_experiment/ablations/fm_vs_diffusion/conditioned_navier_stokes/diffusion_vit_large.yaml @@ -0,0 +1,44 @@ +# @package _global_ +defaults: + - /distributed: ddp_4gpu_slurm + - override /datamodule: conditioned_navier_stokes + - override /encoder@model.encoder: identity + - override /decoder@model.decoder: identity + - override /processor@model.processor: diffusion_vit + - override /backbone@model.processor.backbone: vit + - override /optimizer: adamw_half + - _self_ + +experiment_name: ablation_diffusion_vit_large_conditioned_navier_stokes + +# Match the CNS FM ambient baseline as closely as possible: same identity +# conditioning path, batch size, optimizer, and ViT backbone. +datamodule: + use_normalization: true + batch_size: 256 + +float32_matmul_precision: high + +logging: + wandb: + enabled: true + +output: + skip_test: true + +optimizer: + learning_rate: 1e-4 + warmup: 0 + +model: + train_in_latent_space: true + processor: + denoiser_type: karras + sampler_steps: 50 + sampler: euler + backbone: + hid_channels: 704 + hid_blocks: 12 + attention_heads: 8 + patch_size: 4 + val_metrics: [] diff --git a/local_hydra/local_experiment/ablations/noise_channels/conditioned_navier_stokes/crps_vit_noise256.yaml b/local_hydra/local_experiment/ablations/noise_channels/conditioned_navier_stokes/crps_vit_noise256.yaml new file mode 100644 index 00000000..e1c29a00 --- /dev/null +++ b/local_hydra/local_experiment/ablations/noise_channels/conditioned_navier_stokes/crps_vit_noise256.yaml @@ -0,0 +1,64 @@ +# @package _global_ +defaults: + - /distributed: ddp_4gpu_slurm + - override /datamodule: conditioned_navier_stokes + - override /encoder@model.encoder: permute_concat + - override /decoder@model.decoder: channels_last + - override /processor@model.processor: vit_azula_large + - override /optimizer: adamw_half + - _self_ + +experiment_name: ablation_noise_channels_crps_vit_256_conditioned_navier_stokes + +datamodule: + use_normalization: true + batch_size: 32 + +float32_matmul_precision: high + +logging: + wandb: + enabled: true + +output: + skip_test: true + +optimizer: + learning_rate: 2e-4 + warmup: 0 + +model: + train_in_latent_space: false + n_members: 8 + encoder: + with_constants: true + processor: + # With n_noise_channels=256, hidden_dim=568 drops the processor to ~53.4M + # params. Keep depth/heads fixed and use width as the single balancing knob; + # hidden_dim=704 gives ~79.9M processor params for CNS ambient shapes. + hidden_dim: 704 + num_heads: 8 + n_layers: 12 + n_noise_channels: 256 + loss_func: + _target_: autocast.losses.ensemble.AlphaFairCRPSLoss + train_metrics: + afcrps: + _target_: autocast.metrics.ensemble.AlphaFairCRPS + afcrps_mae_term: + _target_: autocast.metrics.ensemble.AlphaFairCRPSMAETerm + afcrps_spread_term: + _target_: autocast.metrics.ensemble.AlphaFairCRPSSpreadTerm + val_metrics: + afcrps: + _target_: autocast.metrics.ensemble.AlphaFairCRPS + afcrps_mae_term: + _target_: autocast.metrics.ensemble.AlphaFairCRPSMAETerm + afcrps_spread_term: + _target_: autocast.metrics.ensemble.AlphaFairCRPSSpreadTerm + spread: + _target_: autocast.metrics.ensemble.EnsembleSpread + multicoverage: + _target_: autocast.metrics.MultiCoverage + multiwinkler: + _target_: autocast.metrics.ensemble.MultiWinkler diff --git a/slurm_scripts/ablations/README.md b/slurm_scripts/ablations/README.md index 1e783044..42fd1f9e 100644 --- a/slurm_scripts/ablations/README.md +++ b/slurm_scripts/ablations/README.md @@ -19,14 +19,15 @@ small edit. |---|---|---|---|---| | ensemble_size (m=16, fixed bs=32) | sweep | CNS | 1 | ready | | ensemble_size (m=16, fixed global eff. bs=1024) | sweep | GS / GPE / CNS / AD | 4 | timing ready | -| noise_channels | sweep | CNS | 1+ | stub | -| crps_variants (AlphaFair / Fair / CRPS) | comparison | CNS | 3 | stub | -| fm_vs_diffusion | comparison | CNS | 1 | stub | -| arch_unet_fno_vit | comparison | CNS | 2 | stub | +| planned_cns batch | mixed | CNS | 8 | timing scripted | +| noise_channels | sweep | CNS | 1 | config + planned | +| crps_variants (AlphaFair / Fair / CRPS) | comparison | CNS | 2 new (+baseline) | config + planned | +| fm_vs_diffusion | comparison | CNS | 1 | config + planned | +| arch_unet_fno_vit | comparison | CNS | 1 U-Net (+ViT baseline) | config + planned | | model_size | sweep | CNS | 2 active (+2 staged) | in progress | | vit_mae_pretrain | pretrain | CNS | 1 | staged | -| cached_latent_crps | comparison | CNS | 1 (done, 2026-04-20) | stub | -| cond_global_vs_permute | comparison | CNS | 1 (done for CRPS-ViT, 2026-04-18) | stub | +| cached_latent_crps | comparison | CNS | 1 (basis: 2026-04-20) | eval ready | +| cond_global_vs_permute | comparison | CNS | 1 planned rerun (+old 2026-04-18 point) | config ready | | eval_only/ode_steps | eval-only | FM runs | 0 | stub | | eval_only/ema | eval-only | EMA ckpts | 0 | stub | @@ -35,6 +36,27 @@ small edit. ablation — no new training required, but they should be eval'd through the same pipeline. +## Planned CNS batch + +The current planned CNS batch is centralized in +`submit_planned_cns_timing.sh` and `submit_planned_cns_large.sh` so the +cross-ablation run list can be submitted consistently after timing. It covers: + +| planned run | study folder | implementation | +|---|---|---| +| U-Net m=8 CRPS CNS | `arch_unet_fno_vit` | `crps_unet_azula_80m`, ~80.9M params | +| Diffusion CNS | `fm_vs_diffusion` | diffusion processor with the FM 704/12/8 ViT backbone | +| CNS m=8 fair CRPS | `crps_variants` | FairCRPS loss on the 80M CRPS ViT | +| CNS m=8 CRPS | `crps_variants` | plain CRPS loss on the 80M CRPS ViT | +| CNS ViT noise channels=256 | `noise_channels` | CRPS ViT with `n_noise_channels=256`, `hidden_dim=704` (~79.9M params) | +| CNS m=4 ViT | `ensemble_size` | canonical CRPS ViT plus `n_members=4`, `batch_size=64` | +| CNS m=8 latent CRPS | `cached_latent_crps` | 2026-04-20 cached-latent CRPS basis | +| CNS m=8 CRPS ViT global cond | `cond_global_vs_permute` | identity encoder + `include_global_cond=true` | + +Use the 2026-04-24 CRPS ambient runs for current CRPS comparison numbers and +the 2026-04-20 `diff_*` cached-latent runs as the FM/diff basis. The comparison +eval scripts have those dates wired in. + ## Design notes - **Flexible by construction.** Each ablation is a self-contained diff --git a/slurm_scripts/ablations/arch_unet_fno_vit/README.md b/slurm_scripts/ablations/arch_unet_fno_vit/README.md index 4b8b0485..70f42f0f 100644 --- a/slurm_scripts/ablations/arch_unet_fno_vit/README.md +++ b/slurm_scripts/ablations/arch_unet_fno_vit/README.md @@ -3,7 +3,7 @@ Compare U-Net and FNO backbones against the ViT (Azula) baseline on the CRPS ambient path. -**Status:** stub — no scripts yet. +**Status:** U-Net CNS config added; FNO remains unscheduled. ## Baseline @@ -19,19 +19,21 @@ Swap `model.processor` backbone while trying to match parameter count CRPS. - `local_hydra/local_experiment/epd_crps_fno.yaml` — FNO + CRPS. -Each will need per-CNS `local_experiment/ablations/arch/.yaml` -that matches the ambient baseline's encoder/decoder/loss so only the -backbone varies. +The planned U-Net run uses +`local_hydra/local_experiment/ablations/arch_unet_fno_vit/conditioned_navier_stokes/crps_unet_azula_80m.yaml`. +It matches the ambient baseline's encoder/decoder/loss and uses an Azula U-Net +channel ladder `[47, 94, 188, 376]`, measured at ~80.9M processor params for +CNS ambient shapes. + +FNO still needs a matching per-CNS config before scheduling. ## Datasets -CNS only for now. Table says 2 datasets × 2 non-ViT archs = 4 runs -(CNS gives 2: U-Net and FNO). +CNS only for now. Current planned coverage is U-Net only; FNO is held back +until the parameter-matching decision is settled. ## Outstanding decisions -- How to match parameter count across architectures — the comparison - table for the main study (see `slurm_scripts/comparison/README.md`) - locked ~80M for ViT variants; we need equivalent targets for U-Net - and FNO. +- How to match FNO parameter count — the U-Net target is now fixed at ~80.9M + to match the 80M ViT variants. - Whether FNO needs a different patch-size / token structure. diff --git a/slurm_scripts/ablations/cached_latent_crps/README.md b/slurm_scripts/ablations/cached_latent_crps/README.md index 8b9dec00..31fc113b 100644 --- a/slurm_scripts/ablations/cached_latent_crps/README.md +++ b/slurm_scripts/ablations/cached_latent_crps/README.md @@ -3,7 +3,7 @@ CRPS loss trained in cached-latent space (processor-only training on pre-encoded latents, decoded only at eval time). -**Status:** CNS data point exists — +**Status:** CNS data point exists as the current latent CRPS basis — `outputs/2026-04-20/crps_cns64_vit_azula_large_09490da_8b7573d`. No new training script needed for this pass; comparison eval is handled by `slurm_scripts/comparison/eval/submit_eval_crps_latent.sh` via the default diff --git a/slurm_scripts/ablations/cond_global_vs_permute/README.md b/slurm_scripts/ablations/cond_global_vs_permute/README.md index 88ea7515..c7089d90 100644 --- a/slurm_scripts/ablations/cond_global_vs_permute/README.md +++ b/slurm_scripts/ablations/cond_global_vs_permute/README.md @@ -5,9 +5,11 @@ channel concatenation) to `identity` encoder + `include_global_cond: true` (AdaLN modulation on the backbone). Makes conditioning flow match FM ambient, isolating the encoder effect. -**Status:** CNS data point exists for CRPS-ViT — -`outputs/2026-04-18/crps_cns64_vit_azula_large_0f89f06_cf53b48`. No new -CRPS-ViT training needed for this pass; U-Net equivalent is pending. +**Status:** CNS config exists and is included in the planned CNS batch. An +older data point exists at +`outputs/2026-04-18/crps_cns64_vit_azula_large_0f89f06_cf53b48`, but the +current comparison basis uses 2026-04-24 CRPS runs, so rerun this ablation if +date-aligned comparison is required. ## Baselines @@ -22,6 +24,5 @@ CRPS-ViT training needed for this pass; U-Net equivalent is pending. mirroring the ViT ablation. U-Net backbone `include_global_cond` path to be verified against `src/autocast/processors/` U-Net module. -- Eval for the existing CNS ViT ablation run is covered by - `slurm_scripts/comparison/eval/submit_eval_crps_ambient.sh` (included - in its RUN_DIRS). +- Eval for a rerun should live under this ablation until it is promoted into + the main comparison eval set. diff --git a/slurm_scripts/ablations/crps_variants/README.md b/slurm_scripts/ablations/crps_variants/README.md index f14b554d..b0e07d04 100644 --- a/slurm_scripts/ablations/crps_variants/README.md +++ b/slurm_scripts/ablations/crps_variants/README.md @@ -2,7 +2,8 @@ Compare `AlphaFairCRPS` (baseline) vs `FairCRPS` vs `CRPS`. -**Status:** stub — no scripts yet. +**Status:** FairCRPS and plain CRPS CNS configs added; AlphaFairCRPS is the +2026-04-24 CRPS baseline. ## Baseline @@ -20,16 +21,18 @@ target: | FairCRPS | `autocast.losses.ensemble.FairCRPSLoss` | `autocast.metrics.ensemble.FairCRPS` | | CRPS | `autocast.losses.ensemble.CRPSLoss` | `autocast.metrics.ensemble.CRPS` | -Exact class paths to be verified against -`src/autocast/losses/ensemble.py` and `metrics/ensemble.py` before -scripting. +Class paths were verified against `src/autocast/losses/ensemble.py` and +`src/autocast/metrics/ensemble.py`. ## Datasets -CNS only for now. Table spec'd 2 datasets × 3 losses = 6 runs — CNS -gives us 3 runs for this pass. +CNS only for now. The planned batch adds the two non-baseline loss variants: + +- `local_hydra/local_experiment/ablations/crps_variants/conditioned_navier_stokes/crps_vit_fair.yaml` +- `local_hydra/local_experiment/ablations/crps_variants/conditioned_navier_stokes/crps_vit_plain.yaml` ## Implementation sketch -Single-file sweep via CLI overrides in `submit_crps_variants_*.sh` with -a `LOSSES` array of `(name, loss_target, metric_target)` triples. +The cross-cutting submitter is +`slurm_scripts/ablations/submit_planned_cns_{timing,large}.sh`; it keeps the +loss-variant runs alongside the other planned CNS ablations. diff --git a/slurm_scripts/ablations/ensemble_size/README.md b/slurm_scripts/ablations/ensemble_size/README.md index 8047877b..4c7f6011 100644 --- a/slurm_scripts/ablations/ensemble_size/README.md +++ b/slurm_scripts/ablations/ensemble_size/README.md @@ -1,7 +1,12 @@ # Ensemble size ablation First-pass defaults focus on `n_members=16` under two batch-size -regimes. For the current production submission pass, +regimes. The planned CNS batch also includes a compute-matched +`n_members=4` ViT point via the root-level planned submitter: +`model.n_members=4` and `datamodule.batch_size=64`, preserving the baseline +effective per-GPU batch of 256. + +For the current production submission pass, `submit_ensemble_large.sh` is pared down to just three `eff_bs1024` runs on `gray_scott`, `gpe_laser_only_wake`, and `advection_diffusion`; the CNS production entries and `fixed_bs32` combo are left commented for @@ -32,6 +37,7 @@ Keep `bs_crps × n_members × 4 GPUs = 1024`. With `n_members=16`, | n_members | bs_per_gpu | effective per-GPU | effective global | |---:|---:|---:|---:| +| 4 | 64 | 256 | 1024 | | 16 | 16 | 256 | 1024 | ## Dataset coverage diff --git a/slurm_scripts/ablations/ensemble_size/eval_best_multiwinkler_from0p25/submit_eval_crps_ambient.sh b/slurm_scripts/ablations/ensemble_size/eval_best_multiwinkler_from0p25/submit_eval_crps_ambient.sh new file mode 100755 index 00000000..c2db8a48 --- /dev/null +++ b/slurm_scripts/ablations/ensemble_size/eval_best_multiwinkler_from0p25/submit_eval_crps_ambient.sh @@ -0,0 +1,97 @@ +#!/bin/bash + +set -euo pipefail +# Ambient eval submitter for the 2026-04-24 CRPS ensemble-size runs. +# +# This variant selects the best multi-Winkler checkpoint after the 0.25 +# progress cutoff: best-multiwinkler-from0p25-*.ckpt. The standard ensemble +# eval submitter under ../eval/ is left unchanged. +# +# Force eval.mode=ambient. These stateless EPD checkpoints can look +# processor-only to eval.mode=auto because PermuteConcat / ChannelsLast add no +# encoder_decoder.* weights, but raw-space ambient rollout is the right route. +# +# Batch size: keep 4/GPU as the conservative first pass used by the standard +# ambient ensemble-size eval script. + +EVAL_BATCH_SIZE=4 +EVAL_N_MEMBERS=10 +TIMEOUT_MIN=240 +EVAL_SUBDIR="eval_best_multiwinkler_from0p25" +ROLLOUT_SNAPSHOT_TIMESTEPS="[0,4,12,30,99]" +RUN_DRY_STATES=("true" "false") +EVAL_METRICS="[mse,mae,nmse,nmae,rmse,nrmse,vmse,vrmse,linf,psrmse,psrmse_low,psrmse_mid,psrmse_high,psrmse_tail,pscc,pscc_low,pscc_mid,pscc_high,pscc_tail,crps,fcrps,afcrps,energy,ssr,winkler]" + +RUN_DIRS=( + "outputs/2026-04-24/ensemble_size/crps_gs64_vit_azula_large_bed4611_4d04729" + "outputs/2026-04-24/ensemble_size/crps_gpe64_vit_azula_large_bed4611_6b78265" + "outputs/2026-04-24/ensemble_size/crps_cns64_vit_azula_large_bed4611_5758ebc" + "outputs/2026-04-24/ensemble_size/crps_ad64_vit_azula_large_bed4611_69c99bf" +) + +resolve_multiwinkler_checkpoint() { + local run_dir="$1" + local -a ckpts=() + + mapfile -t ckpts < <( + find "${run_dir}" -type f -path '*/checkpoints/best-multiwinkler-from0p25-*.ckpt' | sort + ) + + if (( ${#ckpts[@]} >= 1 )); then + printf '%s\n' "${ckpts[$(( ${#ckpts[@]} - 1 ))]}" + return 0 + fi + + return 1 +} + +for run_dir in "${RUN_DIRS[@]}"; do + run_dir_abs="$(realpath "${run_dir}")" + if [[ ! -f "${run_dir_abs}/resolved_config.yaml" ]]; then + echo "Skipping ${run_dir}: resolved_config.yaml missing" >&2 + continue + fi + + if ! eval_ckpt="$(resolve_multiwinkler_checkpoint "${run_dir_abs}")"; then + echo "Skipping ${run_dir}: best-multiwinkler-from0p25-*.ckpt missing" >&2 + continue + fi + eval_ckpt_abs="$(realpath "${eval_ckpt}")" + eval_output_dir="${run_dir_abs}/${EVAL_SUBDIR}" + + for run_dry in "${RUN_DRY_STATES[@]}"; do + dry_run_arg=() + run_label="slurm" + if [[ "${run_dry}" == "true" ]]; then + dry_run_arg=(--dry-run) + run_label="slurm --dry-run" + fi + + echo "Submitting ensemble-size CRPS ambient eval (best multi-Winkler from 0.25)" + echo " mode: ${run_label}" + echo " run_dir: ${run_dir_abs}" + echo " eval.checkpoint: ${eval_ckpt_abs}" + echo " eval.mode: ambient" + echo " output_subdir: ${EVAL_SUBDIR}" + echo " eval.rollout_snapshot_timesteps: ${ROLLOUT_SNAPSHOT_TIMESTEPS}" + echo " eval.batch_size: ${EVAL_BATCH_SIZE}" + echo " eval.n_members: ${EVAL_N_MEMBERS}" + echo " eval.metrics: ${EVAL_METRICS}" + + uv run autocast eval --mode slurm "${dry_run_arg[@]}" \ + --workdir "${run_dir_abs}" \ + --output-subdir "${EVAL_SUBDIR}" \ + eval.checkpoint="${eval_ckpt_abs}" \ + eval.mode=ambient \ + eval.csv_path="${eval_output_dir}/evaluation_metrics.csv" \ + eval.video_dir="${eval_output_dir}/videos" \ + eval.save_rollout_snapshots=true \ + eval.rollout_snapshot_dir="${eval_output_dir}/videos/snapshots" \ + eval.rollout_snapshot_timesteps="${ROLLOUT_SNAPSHOT_TIMESTEPS}" \ + eval.rollout_snapshot_format=png \ + eval.metrics="${EVAL_METRICS}" \ + eval.batch_size="${EVAL_BATCH_SIZE}" \ + eval.n_members="${EVAL_N_MEMBERS}" \ + hydra.launcher.timeout_min="${TIMEOUT_MIN}" + done +done diff --git a/slurm_scripts/ablations/fm_vs_diffusion/README.md b/slurm_scripts/ablations/fm_vs_diffusion/README.md index 6e4fe9c8..ad72696b 100644 --- a/slurm_scripts/ablations/fm_vs_diffusion/README.md +++ b/slurm_scripts/ablations/fm_vs_diffusion/README.md @@ -3,7 +3,7 @@ Compare flow matching (baseline) against DDPM/EDM-style diffusion on the same ambient ViT backbone. -**Status:** stub — no scripts yet. +**Status:** CNS diffusion config added; timing is part of the planned CNS batch. ## Baseline @@ -12,15 +12,19 @@ same ambient ViT backbone. ## Knob -Swap `model.processor` from `flow_matching_vit` to the diffusion -equivalent. Existing configs to crib from: +Swap `model.processor` from `flow_matching_vit` to `diffusion_vit` while +keeping the FM ambient backbone and conditioning path: -- `local_hydra/local_experiment/epd_diffusion_dm_256_dc_large.yaml` - (DDPM-style with DC large AE — note: ambient baseline in the ablation - table uses identity encoder, not DC AE, so we need a matching - `epd_diffusion_dm_256_identity.yaml`-style config.) -- `local_hydra/local_experiment/epd_diffusion_fm_256_identity.yaml` — - FM ambient equivalent. +- identity encoder/decoder +- ViT backbone `hid_channels=704`, `hid_blocks=12`, `attention_heads=8`, + `patch_size=4` +- `datamodule.batch_size=256` +- AdamW-half LR `1e-4` +- 50 Euler sampler steps, matching FM's `flow_ode_steps=50` as the closest + equal-NFE comparison + +Config: +`local_hydra/local_experiment/ablations/fm_vs_diffusion/conditioned_navier_stokes/diffusion_vit_large.yaml`. ## Datasets @@ -29,9 +33,5 @@ gives 1). ## Outstanding decisions -- DDPM vs EDM vs something else. Table just says "diffusion" — need to - pick a specific processor. -- Sampler / step count for the diffusion side (FM uses 50 ODE steps; - diffusion would typically use more). -- Whether the comparison should hold n_function_evaluations fixed at - inference for fairness. +- Whether a second diffusion eval should also report a higher-step sampler for + quality, separate from the equal-50-step fairness run. diff --git a/slurm_scripts/ablations/noise_channels/README.md b/slurm_scripts/ablations/noise_channels/README.md index 0edcf92a..8472da3c 100644 --- a/slurm_scripts/ablations/noise_channels/README.md +++ b/slurm_scripts/ablations/noise_channels/README.md @@ -3,7 +3,8 @@ Sweep `model.processor.n_noise_channels` (the AdaLN-modulation noise dimensionality) for the CRPS ambient baseline. -**Status:** stub — no scripts yet. +**Status:** CNS `n_noise_channels=256` config added; timing is part of the +planned CNS batch. ## Baseline @@ -12,8 +13,13 @@ dimensionality) for the CRPS ambient baseline. ## Knob -- `model.processor.n_noise_channels` — e.g. `{256, 1024, 4096}` or a - finer grid. Values TBD. +- `model.processor.n_noise_channels=256`, compared against the 1024-channel + 2026-04-24 CRPS baseline. To keep the processor near 80M params, this run + holds depth/heads fixed and uses `hidden_dim=704` as the single balancing + knob (~79.9M params for CNS ambient shapes). + +Config: +`local_hydra/local_experiment/ablations/noise_channels/conditioned_navier_stokes/crps_vit_noise256.yaml`. ## Datasets @@ -21,9 +27,7 @@ CNS only for now. ## Outstanding decisions -- Exact sweep values. Existing config - `epd_crps_vit_azula_n_noise_1024.yaml` and - `epd_crps_vit_azula_noise4096_mod1024.yaml` hint at historical values. +- Whether to add a wider value such as 4096 after the 256-vs-1024 contrast. - Whether to include the Concat-noise variant (see table notes "Concat vs with...") — currently red/skipped in the provisional table, so leave out. diff --git a/slurm_scripts/ablations/submit_planned_cns_large.sh b/slurm_scripts/ablations/submit_planned_cns_large.sh new file mode 100755 index 00000000..b5a693ff --- /dev/null +++ b/slurm_scripts/ablations/submit_planned_cns_large.sh @@ -0,0 +1,171 @@ +#!/bin/bash + +set -euo pipefail + +source "$(dirname "${BASH_SOURCE[0]}")/../comparison/cached_latents/validate_cached_latents_against_ae.sh" + +# 24h production jobs for the planned CNS ablation batch. +# +# Run submit_planned_cns_timing.sh first. This script resolves each +# trainer.max_epochs from either COSINE_EPOCHS_BY_RUN or the latest matching +# timing.ckpt under outputs/*/timing_planned_cns//timing.ckpt. + +declare -A COSINE_EPOCHS_BY_RUN=( + # Pinned from outputs/2026-04-25/timing_planned_cns/*/timing.ckpt at + # 24h budget, 2% margin (uv run autocast time-epochs -b 24 -m 0.02). + ["unet_m8_crps_cns"]=611 + ["diffusion_cns"]=2248 + ["fair_crps_m8_cns"]=439 + ["plain_crps_m8_cns"]=439 + ["vit_noise256_m8_cns"]=427 + ["vit_m4_cns"]=803 + ["latent_crps_m8_cns"]=327 + ["vit_global_cond_m8_cns"]=432 +) + +BUDGET_MAX_TIME="00:23:59:00" +TIMEOUT_MIN=1439 +SOURCE_DATASET="conditioned_navier_stokes" +RUN_GROUP="$(date +%Y-%m-%d)/planned_cns" +RUN_DRY_STATES=("true" "false") +AE_RUN_DIR="$HOME/autocast/outputs/2026-04-17/ae_cns64_3a7999b_b9c29f8" +CACHE_DIR="${AE_RUN_DIR}/cached_latents" + +# run_id|kind|local_experiment +RUNS=( + "unet_m8_crps_cns|epd|ablations/arch_unet_fno_vit/conditioned_navier_stokes/crps_unet_azula_80m" + "diffusion_cns|epd|ablations/fm_vs_diffusion/conditioned_navier_stokes/diffusion_vit_large" + "fair_crps_m8_cns|epd|ablations/crps_variants/conditioned_navier_stokes/crps_vit_fair" + "plain_crps_m8_cns|epd|ablations/crps_variants/conditioned_navier_stokes/crps_vit_plain" + "vit_noise256_m8_cns|epd|ablations/noise_channels/conditioned_navier_stokes/crps_vit_noise256" + "vit_m4_cns|epd|epd/conditioned_navier_stokes/crps_vit_azula_large" + "latent_crps_m8_cns|processor|processor/conditioned_navier_stokes/crps_vit_azula_large" + "vit_global_cond_m8_cns|epd|epd/conditioned_navier_stokes/crps_vit_azula_large_identity_global_cond" +) + +run_overrides() { + local run_id="$1" + + case "${run_id}" in + latent_crps_m8_cns) + # Keep the local_experiment's cached_latents datamodule. Passing + # datamodule=conditioned_navier_stokes here would switch training + # back to raw fields. Pair datamodule=cached_latents with the cache + # path so the workflow infers the source cns64 token from + # autoencoder_config.yaml. + printf '%s\n' "datamodule=cached_latents" "datamodule.data_path=${CACHE_DIR}" + ;; + vit_m4_cns) + printf '%s\n' "datamodule=${SOURCE_DATASET}" "model.n_members=4" "datamodule.batch_size=64" + ;; + *) + printf '%s\n' "datamodule=${SOURCE_DATASET}" + ;; + esac +} + +validate_run_inputs() { + local run_id="$1" + + if [[ "${run_id}" != "latent_crps_m8_cns" ]]; then + return 0 + fi + + if [[ ! -d "${CACHE_DIR}/train" ]] || [[ ! -d "${CACHE_DIR}/valid" ]] || [[ ! -d "${CACHE_DIR}/test" ]]; then + echo "Skipping ${run_id}: cache missing train/valid/test under ${CACHE_DIR}" >&2 + return 1 + fi + if ! validate_cached_latents_against_ae "${AE_RUN_DIR}"; then + echo "Skipping ${run_id}: cached-latents config mismatch vs AE training config" >&2 + return 1 + fi +} + +find_timing_checkpoint() { + local run_id="$1" + + if [[ ! -d outputs ]]; then + return 0 + fi + + find outputs -path "*/timing_planned_cns/${run_id}/timing.ckpt" | sort | tail -n 1 +} + +derive_cosine_epochs_from_timing() { + local timing_ckpt="$1" + local result + + result="$( + uv run autocast time-epochs --from-checkpoint "${timing_ckpt}" -b 24 -m 0.02 + )" + + sed -n 's/.*trainer.max_epochs=\([0-9][0-9]*\).*/\1/p' <<< "${result}" | tail -n 1 +} + +resolve_cosine_epochs() { + local run_id="$1" + local cached="${COSINE_EPOCHS_BY_RUN[$run_id]:-}" + + if [[ -n "${cached}" ]]; then + printf '%s\n' "${cached}" + return 0 + fi + + local timing_ckpt + timing_ckpt="$(find_timing_checkpoint "${run_id}")" + if [[ -z "${timing_ckpt}" ]]; then + return 1 + fi + + derive_cosine_epochs_from_timing "${timing_ckpt}" +} + +for run_spec in "${RUNS[@]}"; do + IFS="|" read -r run_id kind experiment <<< "${run_spec}" + if ! validate_run_inputs "${run_id}"; then + continue + fi + + if ! cosine_epochs="$(resolve_cosine_epochs "${run_id}")"; then + echo "Skipping ${run_id}: no timing-derived cosine_epochs available" >&2 + continue + fi + if [[ -z "${cosine_epochs}" ]]; then + echo "Skipping ${run_id}: could not parse trainer.max_epochs from timing output" >&2 + continue + fi + + mapfile -t overrides < <(run_overrides "${run_id}") + + for run_dry in "${RUN_DRY_STATES[@]}"; do + dry_run_arg=() + run_label="slurm" + if [[ "${run_dry}" == "true" ]]; then + dry_run_arg=(--dry-run) + run_label="slurm --dry-run" + fi + + echo "Submitting planned CNS production run" + echo " mode: ${run_label}" + echo " run_id: ${run_id}" + echo " kind: ${kind}" + echo " source_dataset: ${SOURCE_DATASET}" + echo " local_experiment: ${experiment}" + echo " cosine_epochs: ${cosine_epochs}" + + uv run autocast "${kind}" --mode slurm "${dry_run_arg[@]}" \ + --run-group "${RUN_GROUP}" \ + local_experiment="${experiment}" \ + "${overrides[@]}" \ + logging.wandb.enabled=true \ + logging.wandb.name="${run_id}" \ + optimizer.cosine_epochs="${cosine_epochs}" \ + hydra.launcher.timeout_min="${TIMEOUT_MIN}" \ + trainer.max_time="${BUDGET_MAX_TIME}" \ + +trainer.max_epochs="${cosine_epochs}" \ + trainer.callbacks.0.every_n_train_steps_fraction=0.05 \ + +trainer.callbacks.0.every_n_epochs=0 \ + trainer.callbacks.0.save_top_k=-1 \ + trainer.callbacks.0.filename=\"snapshot-{progress_token}-{epoch:04d}-{step:08d}\" + done +done diff --git a/slurm_scripts/ablations/submit_planned_cns_timing.sh b/slurm_scripts/ablations/submit_planned_cns_timing.sh new file mode 100755 index 00000000..d7557c5f --- /dev/null +++ b/slurm_scripts/ablations/submit_planned_cns_timing.sh @@ -0,0 +1,104 @@ +#!/bin/bash + +set -euo pipefail + +source "$(dirname "${BASH_SOURCE[0]}")/../comparison/cached_latents/validate_cached_latents_against_ae.sh" + +# Timing jobs for the planned CNS ablation batch. +# +# This is intentionally CNS-only and cross-cuts the study-specific ablation +# folders. Architecture-changing variants use local_experiment configs under +# local_hydra/local_experiment/ablations/*; one-knob variants use CLI +# overrides against the canonical comparison configs. + +BUDGET_HOURS=24 +NUM_TIMING_EPOCHS=5 +RUN_GROUP="$(date +%Y-%m-%d)/timing_planned_cns" +SOURCE_DATASET="conditioned_navier_stokes" +AE_RUN_DIR="$HOME/autocast/outputs/2026-04-17/ae_cns64_3a7999b_b9c29f8" +CACHE_DIR="${AE_RUN_DIR}/cached_latents" + +# run_id|kind|local_experiment +RUNS=( + "unet_m8_crps_cns|epd|ablations/arch_unet_fno_vit/conditioned_navier_stokes/crps_unet_azula_80m" + "diffusion_cns|epd|ablations/fm_vs_diffusion/conditioned_navier_stokes/diffusion_vit_large" + "fair_crps_m8_cns|epd|ablations/crps_variants/conditioned_navier_stokes/crps_vit_fair" + "plain_crps_m8_cns|epd|ablations/crps_variants/conditioned_navier_stokes/crps_vit_plain" + "vit_noise256_m8_cns|epd|ablations/noise_channels/conditioned_navier_stokes/crps_vit_noise256" + "vit_m4_cns|epd|epd/conditioned_navier_stokes/crps_vit_azula_large" + "latent_crps_m8_cns|processor|processor/conditioned_navier_stokes/crps_vit_azula_large" + "vit_global_cond_m8_cns|epd|epd/conditioned_navier_stokes/crps_vit_azula_large_identity_global_cond" +) + +run_overrides() { + local run_id="$1" + + case "${run_id}" in + latent_crps_m8_cns) + # Keep the local_experiment's cached_latents datamodule. Passing + # datamodule=conditioned_navier_stokes here would switch training + # back to raw fields. Pair datamodule=cached_latents with the cache + # path so the workflow infers the source cns64 token from + # autoencoder_config.yaml. + printf '%s\n' "datamodule=cached_latents" "datamodule.data_path=${CACHE_DIR}" + ;; + vit_m4_cns) + printf '%s\n' "datamodule=${SOURCE_DATASET}" "model.n_members=4" "datamodule.batch_size=64" + ;; + *) + printf '%s\n' "datamodule=${SOURCE_DATASET}" + ;; + esac +} + +validate_run_inputs() { + local run_id="$1" + + if [[ "${run_id}" != "latent_crps_m8_cns" ]]; then + return 0 + fi + + if [[ ! -d "${CACHE_DIR}/train" ]] || [[ ! -d "${CACHE_DIR}/valid" ]] || [[ ! -d "${CACHE_DIR}/test" ]]; then + echo "Skipping ${run_id}: cache missing train/valid/test under ${CACHE_DIR}" >&2 + return 1 + fi + if ! validate_cached_latents_against_ae "${AE_RUN_DIR}"; then + echo "Skipping ${run_id}: cached-latents config mismatch vs AE training config" >&2 + return 1 + fi +} + +for run_spec in "${RUNS[@]}"; do + IFS="|" read -r run_id kind experiment <<< "${run_spec}" + if ! validate_run_inputs "${run_id}"; then + continue + fi + + mapfile -t overrides < <(run_overrides "${run_id}") + + echo "Submitting planned CNS timing run" + echo " run_id: ${run_id}" + echo " kind: ${kind}" + echo " source_dataset: ${SOURCE_DATASET}" + echo " local_experiment: ${experiment}" + echo " timing epochs: ${NUM_TIMING_EPOCHS}" + echo " budget: ${BUDGET_HOURS}h" + echo " run_group: ${RUN_GROUP}" + + uv run autocast time-epochs --kind "${kind}" --mode slurm \ + --run-group "${RUN_GROUP}" \ + --run-id "${run_id}" \ + -n "${NUM_TIMING_EPOCHS}" \ + -b "${BUDGET_HOURS}" \ + local_experiment="${experiment}" \ + "${overrides[@]}" + + echo "" + echo "---" + echo "" +done + +echo "All planned CNS timing jobs submitted." +echo "" +echo "Once SLURM jobs complete, collect all results with:" +echo " for f in outputs/${RUN_GROUP}/*/retrieve.sh; do bash \"\$f\"; done" diff --git a/slurm_scripts/ablations/vit_mae_pretrain/README.md b/slurm_scripts/ablations/vit_mae_pretrain/README.md index 658da52c..90d14a98 100644 --- a/slurm_scripts/ablations/vit_mae_pretrain/README.md +++ b/slurm_scripts/ablations/vit_mae_pretrain/README.md @@ -36,7 +36,7 @@ but trains without the ensemble path: 2. After the timing job finishes, collect the schedule: ```bash - uv run autocast time-epochs --from-checkpoint /timing.ckpt -b 24 + bash outputs//timing_vit_mae_pretrain/vit_mae_pretrain_conditioned_navier_stokes/retrieve.sh ``` 3. Paste the emitted `trainer.max_epochs` value into @@ -55,9 +55,9 @@ job, following the other ablation submitters. ## Checkpoints and CRPS fine-tuning The 24h script saves local progress checkpoints every ~5% of optimizer-step -progress with `save_top_k=-1`, keeps `last.ckpt`, and sets -`logging.wandb.log_model=all` so W&B logs every checkpoint artifact emitted by -the checkpoint callbacks. +progress with `save_top_k=-1` and keeps `last.ckpt`. W&B logs metrics, but +checkpoint artifact uploads stay disabled with `logging.wandb.log_model=false` +so a transient W&B artifact/auth failure cannot kill the Slurm job. For the follow-up shortened CRPS fine-tune, use the `vit_mae_to_crps` scripts and point `MAE_CHECKPOINT` at one of the MAE checkpoints: diff --git a/slurm_scripts/ablations/vit_mae_pretrain/submit_vit_mae_pretrain_large.sh b/slurm_scripts/ablations/vit_mae_pretrain/submit_vit_mae_pretrain_large.sh index e856843c..c7971915 100755 --- a/slurm_scripts/ablations/vit_mae_pretrain/submit_vit_mae_pretrain_large.sh +++ b/slurm_scripts/ablations/vit_mae_pretrain/submit_vit_mae_pretrain_large.sh @@ -4,8 +4,7 @@ set -euo pipefail # 24h deterministic ViT MAE pretraining on CNS. # # Populate COSINE_EPOCHS_BY_DATASET after running -# submit_vit_mae_pretrain_timing.sh and extracting timing.ckpt with: -# uv run autocast time-epochs --from-checkpoint /timing.ckpt -b 24 +# submit_vit_mae_pretrain_timing.sh and collecting its generated retrieve.sh. # If left blank, the script falls back to the newest matching timing.ckpt # under outputs/*/timing_vit_mae_pretrain/. @@ -14,7 +13,7 @@ declare -A EXPERIMENTS=( ) declare -A COSINE_EPOCHS_BY_DATASET=( - # ["conditioned_navier_stokes"]=... + ["conditioned_navier_stokes"]=2533 # 33.4 s/ep, 2026-04-24 timing run ) BUDGET_MAX_TIME="00:23:59:00" @@ -97,13 +96,13 @@ for datamodule in "${!EXPERIMENTS[@]}"; do local_experiment="${experiment}" \ logging.wandb.enabled=true \ logging.wandb.name="${wandb_name}" \ - logging.wandb.log_model=all \ + logging.wandb.log_model=false \ optimizer.cosine_epochs="${cosine_epochs}" \ hydra.launcher.timeout_min="${TIMEOUT_MIN}" \ trainer.max_time="${BUDGET_MAX_TIME}" \ +trainer.max_epochs="${cosine_epochs}" \ trainer.callbacks.0.every_n_train_steps_fraction=0.05 \ - trainer.callbacks.0.every_n_epochs=0 \ + +trainer.callbacks.0.every_n_epochs=0 \ trainer.callbacks.0.save_top_k=-1 \ trainer.callbacks.0.filename=\"snapshot-{progress_token}-{epoch:04d}-{step:08d}\" done diff --git a/slurm_scripts/ablations/vit_mae_pretrain/submit_vit_mae_pretrain_timing.sh b/slurm_scripts/ablations/vit_mae_pretrain/submit_vit_mae_pretrain_timing.sh index a75fdedb..4cc67f4f 100755 --- a/slurm_scripts/ablations/vit_mae_pretrain/submit_vit_mae_pretrain_timing.sh +++ b/slurm_scripts/ablations/vit_mae_pretrain/submit_vit_mae_pretrain_timing.sh @@ -5,8 +5,7 @@ set -euo pipefail # # This starts from the CRPS ViT ambient architecture but disables the ensemble # path (n_members=1) and trains with torch.nn.L1Loss. Run this first, then -# derive the 24h cosine schedule from timing.ckpt: -# uv run autocast time-epochs --from-checkpoint /timing.ckpt -b 24 +# derive the 24h cosine schedule via the generated retrieve.sh script. declare -A EXPERIMENTS=( ["conditioned_navier_stokes"]="ablations/vit_mae_pretrain/conditioned_navier_stokes/vit_azula_large_mae_no_ensemble" diff --git a/slurm_scripts/ablations/vit_mae_pretrain/submit_vit_mae_to_crps_large.sh b/slurm_scripts/ablations/vit_mae_pretrain/submit_vit_mae_to_crps_large.sh index 4a0c95ee..8fec0c2b 100755 --- a/slurm_scripts/ablations/vit_mae_pretrain/submit_vit_mae_to_crps_large.sh +++ b/slurm_scripts/ablations/vit_mae_pretrain/submit_vit_mae_to_crps_large.sh @@ -128,13 +128,13 @@ for datamodule in "${!EXPERIMENTS[@]}"; do +resume_weights_only=true \ logging.wandb.enabled=true \ logging.wandb.name="${wandb_name}" \ - logging.wandb.log_model=all \ + logging.wandb.log_model=false \ optimizer.cosine_epochs="${cosine_epochs}" \ hydra.launcher.timeout_min="${TIMEOUT_MIN}" \ trainer.max_time="${BUDGET_MAX_TIME}" \ +trainer.max_epochs="${cosine_epochs}" \ trainer.callbacks.0.every_n_train_steps_fraction=0.05 \ - trainer.callbacks.0.every_n_epochs=0 \ + +trainer.callbacks.0.every_n_epochs=0 \ trainer.callbacks.0.save_top_k=-1 \ trainer.callbacks.0.filename=\"snapshot-{progress_token}-{epoch:04d}-{step:08d}\" done diff --git a/slurm_scripts/comparison/README.md b/slurm_scripts/comparison/README.md index 75107a14..d53c4d9b 100644 --- a/slurm_scripts/comparison/README.md +++ b/slurm_scripts/comparison/README.md @@ -38,6 +38,11 @@ All latent submit scripts now fail fast if `epd/submit_fm_ambient_*.sh`, and `cached_latents/submit_fm_*.sh`. 5. `cached_latents/submit_crps_latent_*.sh` is kept as an ablation. +For eval comparisons, use the most recent CRPS ambient basis under +`outputs/2026-04-24/crps_*` and the 2026-04-20 FM/diff cached-latent basis +under `outputs/2026-04-20/diff_*`. The eval submitters in +`comparison/eval/` encode that split explicitly. + ## Model-size matrix (~80M params, DiT-aligned) All 4 processor variants target ~80M trainable parameters (AE params excluded) @@ -94,9 +99,28 @@ is hard-coded false in `vit_azula_large.yaml`, so the auto-detected value is ignored — conditioning flows only through `permute_concat`'s spatial concatenation. -Planned ablation (not in this submission round): CRPS ambient with -`identity` encoder + `include_global_cond: true` to match FM ambient's -conditioning path exactly and isolate the encoder effect. +Planned CNS ablation: CRPS ambient with `identity` encoder + +`include_global_cond: true` to match FM ambient's conditioning path exactly +and isolate the encoder effect. The config already lives at +`epd/conditioned_navier_stokes/crps_vit_azula_large_identity_global_cond.yaml`. + +## Planned CNS ablation batch + +The cross-cutting CNS batch is scripted in +`slurm_scripts/ablations/submit_planned_cns_{timing,large}.sh`. It keeps +the main CRPS comparison anchored to the 2026-04-24 CRPS runs and the FM/diff +reference anchored to the 2026-04-20 diff basis. + +| run | local experiment / basis | fairness note | +|---|---|---| +| U-Net m=8 CRPS CNS | `ablations/arch_unet_fno_vit/.../crps_unet_azula_80m` | Azula U-Net channel ladder `[47,94,188,376]` gives ~80.9M params | +| Diffusion CNS | `ablations/fm_vs_diffusion/.../diffusion_vit_large` | same 704/12/8 ViT backbone, identity conditioning, batch 256, 50 sampler steps as FM | +| CNS m=8 fair CRPS | `ablations/crps_variants/.../crps_vit_fair` | same ViT/backbone/batch as CRPS, only FairCRPS loss changes | +| CNS m=8 CRPS | `ablations/crps_variants/.../crps_vit_plain` | same ViT/backbone/batch as CRPS, only plain CRPS loss changes | +| CNS ViT noise channels=256 | `ablations/noise_channels/.../crps_vit_noise256` | `n_noise_channels` 1024 -> 256, `hidden_dim=704` for ~79.9M params | +| CNS m=4 ViT | canonical `epd/.../crps_vit_azula_large` + CLI overrides | `n_members=4`, `batch_size=64` keeps 256 effective per GPU | +| CNS m=8 latent CRPS | `processor/.../crps_vit_azula_large` / 2026-04-20 run | cached-latent CRPS basis, eval via `auto -> encode_once` | +| CNS m=8 CRPS ViT global cond | `epd/.../crps_vit_azula_large_identity_global_cond` | matches FM's AdaLN/global-cond path | ## Cosine schedule (per-dataset, 24h budget) diff --git a/slurm_scripts/comparison/eval/README.md b/slurm_scripts/comparison/eval/README.md index 825d5294..9f873565 100644 --- a/slurm_scripts/comparison/eval/README.md +++ b/slurm_scripts/comparison/eval/README.md @@ -5,19 +5,33 @@ submitter only targets a study-specific ablation run set, keep it under `slurm_scripts/ablations//eval/` until that run set is promoted into the main comparison. -Four submission scripts cover ambient and cached-latent checkpoints produced -under `outputs/2026-04-18/` and `outputs/2026-04-20/`. Each script iterates -`--dry-run` first, then submits for real. +The canonical basis is now: + +- CRPS ambient: latest 2026-04-24 EPD runs, with the optional + best-multi-Winkler-from-0.25 submitter for the current preferred checkpoint + selection. +- FM/diff: 2026-04-20 cached-latent `diff_*` runs. The default latent eval uses + `auto -> encode_once`; the ambient eval explicitly supplies the matching AE + checkpoint and writes to `eval_ambient/`. + +Each script iterates `--dry-run` first, then submits for real. All comparison eval submitters explicitly pass `eval.n_members=10` for now so comparison numbers do not silently drift if the global eval default changes. | script | runs covered | eval.mode | eval.batch_size | |---|---|---|---| -| `submit_eval_crps_ambient.sh` | `outputs/2026-04-18/crps_*` (4 primary + 2 CNS ablations) | default (auto → ambient) | 8 | -| `submit_eval_fm_ambient.sh` | `outputs/2026-04-18/diff_*` ambient (4 datasets) | default (auto → ambient) | 4 | +| `submit_eval_crps_ambient.sh` | `outputs/2026-04-24/crps_*` primary final checkpoints | explicit `ambient` | 8 | +| `submit_eval_crps_ambient_best_multiwinkler_from0p25.sh` | same 2026-04-24 CRPS runs, best multi-Winkler after 25% progress | explicit `ambient` | 8 | +| `submit_eval_fm_ambient.sh` | `outputs/2026-04-20/diff_*` cached-latent FM basis, final checkpoints | explicit `ambient` | 4 | | `submit_eval_crps_latent.sh` | `outputs/2026-04-20/crps_*` cached-latent (CNS so far) | default (`auto -> encode_once`) | 8 | | `submit_eval_fm_latent.sh` | `outputs/2026-04-20/diff_*` cached-latent (4 datasets) | default (`auto -> encode_once`) | 4 | +| `submit_eval_fm_latent_0p25.sh` | same 2026-04-20 FM cached-latent runs at 25% progress | explicit `auto -> encode_once` | 4 | +| `submit_eval_fm_latent_0p50.sh` | same 2026-04-20 FM cached-latent runs at 50% progress | explicit `auto -> encode_once` | 4 | +| `submit_eval_fm_latent_0p75.sh` | same 2026-04-20 FM cached-latent runs at 75% progress | explicit `auto -> encode_once` | 4 | +| `submit_eval_fm_ambient_0p25.sh` | same 2026-04-20 FM cached-latent runs at 25% progress | explicit `ambient` | 4 | +| `submit_eval_fm_ambient_0p50.sh` | same 2026-04-20 FM cached-latent runs at 50% progress | explicit `ambient` | 4 | +| `submit_eval_fm_ambient_0p75.sh` | same 2026-04-20 FM cached-latent runs at 75% progress | explicit `ambient` | 4 | ## Batch-size rationale @@ -52,3 +66,16 @@ checkpoint. There are no branch prerequisites for the cached-latent scripts. Dry-run everything first, review the printed sbatch commands, then re-run without `RUN_DRY_STATES` edits to submit. Outputs land under each run's `eval/` subdirectory (`evaluation_metrics.csv`, rollout videos, etc.). + +The CRPS best-checkpoint submitter prefers +`best-multiwinkler-from0p25-*.ckpt` and writes to +`eval_best_multiwinkler_from0p25/`. + +The FM progress-checkpoint submitters prefer `snapshot-0p25-*.ckpt`, +`snapshot-0p50-*.ckpt`, or `snapshot-0p75-*.ckpt` when present, and fall +back to the legacy first, second, or third sorted `quarter-*.ckpt` checkpoint +saved by the 2026-04-20 runs. The default cached-latent progress evals write +to `eval_0p25/`, `eval_0p50/`, and `eval_0p75/`. The explicit ambient +variants write to `eval_0p25_ambient/`, `eval_0p50_ambient/`, and +`eval_0p75_ambient/` so they do not overwrite the `auto -> encode_once` +outputs. diff --git a/slurm_scripts/comparison/eval/submit_eval_crps_ambient.sh b/slurm_scripts/comparison/eval/submit_eval_crps_ambient.sh index c7e785a5..9d9329d9 100755 --- a/slurm_scripts/comparison/eval/submit_eval_crps_ambient.sh +++ b/slurm_scripts/comparison/eval/submit_eval_crps_ambient.sh @@ -1,13 +1,17 @@ #!/bin/bash set -euo pipefail -# Evaluate CRPS-in-ambient EPD runs trained on 2026-04-18. -# Covers 4 primary runs (permute_concat across all 4 datasets) plus two CNS -# ablations: AE-ambient (DC encoder/decoder, frozen) and identity+global_cond. +# Evaluate CRPS-in-ambient EPD runs trained on 2026-04-24. +# Covers the 4 primary permute_concat runs. CNS ablation-only reruns stay under +# slurm_scripts/ablations/ until they are promoted into the comparison suite. # All are EPD checkpoints (encoder_processor_decoder.ckpt); eval uses the # resolved_config.yaml written alongside each run, so the trained architecture # is reproduced exactly for eval. # +# Force eval.mode=ambient. These stateless EPD checkpoints can look +# processor-only to eval.mode=auto because PermuteConcat / ChannelsLast add no +# encoder_decoder.* weights, but raw-space ambient rollout is the right route. +# # Batch size: CRPS eval fits 8/GPU comfortably (ambient 64x64, n_members=10, # single forward pass per rollout step — no ODE). # @@ -22,14 +26,10 @@ EVAL_METRICS="[mse,mae,nmse,nmae,rmse,nrmse,vmse,vrmse,linf,psrmse,psrmse_low,ps # Run dirs (absolute paths work; relative paths resolved from repo root). RUN_DIRS=( - # CRPS ambient (permute_concat) — 4 datasets - "outputs/2026-04-18/crps_gs64_vit_azula_large_0f89f06_779325a" - "outputs/2026-04-18/crps_gpe64_vit_azula_large_0f89f06_d337bd8" - "outputs/2026-04-18/crps_cns64_vit_azula_large_0f89f06_5b7332b" - "outputs/2026-04-18/crps_ad64_vit_azula_large_0f89f06_4667606" - # CNS ablations - "outputs/2026-04-18/crps_cns64_vit_azula_large_0f89f06_cf53b48" # identity+global_cond - "outputs/2026-04-18/crps_cns64_vit_azula_large_0f89f06_e7e60d9" # AE-ambient (DC encoder/decoder) + "outputs/2026-04-24/crps_gs64_vit_azula_large_bed4611_828a161" + "outputs/2026-04-24/crps_gpe64_vit_azula_large_bed4611_e0a6df5" + "outputs/2026-04-24/crps_cns64_vit_azula_large_bed4611_c99f534" + "outputs/2026-04-24/crps_ad64_vit_azula_large_bed4611_da01a04" ) for run_dir in "${RUN_DIRS[@]}"; do @@ -49,12 +49,14 @@ for run_dir in "${RUN_DIRS[@]}"; do echo "Submitting CRPS-ambient eval" echo " mode: ${run_label}" echo " run_dir: ${run_dir}" + echo " eval.mode: ambient" echo " eval.batch_size: ${EVAL_BATCH_SIZE}" echo " eval.n_members: ${EVAL_N_MEMBERS}" echo " eval.metrics: ${EVAL_METRICS}" uv run autocast eval --mode slurm "${dry_run_arg[@]}" \ --workdir "${run_dir}" \ + eval.mode=ambient \ eval.metrics="${EVAL_METRICS}" \ eval.batch_size="${EVAL_BATCH_SIZE}" \ eval.n_members="${EVAL_N_MEMBERS}" \ diff --git a/slurm_scripts/comparison/eval/submit_eval_crps_ambient_best_multiwinkler_from0p25.sh b/slurm_scripts/comparison/eval/submit_eval_crps_ambient_best_multiwinkler_from0p25.sh new file mode 100755 index 00000000..339dbdf0 --- /dev/null +++ b/slurm_scripts/comparison/eval/submit_eval_crps_ambient_best_multiwinkler_from0p25.sh @@ -0,0 +1,94 @@ +#!/bin/bash + +set -euo pipefail +# Evaluate CRPS-in-ambient EPD runs trained on 2026-04-24. +# +# This variant selects the best multi-Winkler checkpoint after the 0.25 +# progress cutoff: best-multiwinkler-from0p25-*.ckpt. The default CRPS ambient +# eval submitter is left unchanged. +# +# Force eval.mode=ambient. These stateless EPD checkpoints can look +# processor-only to eval.mode=auto because PermuteConcat / ChannelsLast add no +# encoder_decoder.* weights, but raw-space ambient rollout is the right route. + +EVAL_BATCH_SIZE=8 +EVAL_N_MEMBERS=10 +TIMEOUT_MIN=240 +EVAL_SUBDIR="eval_best_multiwinkler_from0p25" +ROLLOUT_SNAPSHOT_TIMESTEPS="[0,4,12,30,99]" +RUN_DRY_STATES=("true" "false") +EVAL_METRICS="[mse,mae,nmse,nmae,rmse,nrmse,vmse,vrmse,linf,psrmse,psrmse_low,psrmse_mid,psrmse_high,psrmse_tail,pscc,pscc_low,pscc_mid,pscc_high,pscc_tail,crps,fcrps,afcrps,energy,ssr,winkler]" + +RUN_DIRS=( + "outputs/2026-04-24/crps_gs64_vit_azula_large_bed4611_828a161" + "outputs/2026-04-24/crps_gpe64_vit_azula_large_bed4611_e0a6df5" + "outputs/2026-04-24/crps_cns64_vit_azula_large_bed4611_c99f534" + "outputs/2026-04-24/crps_ad64_vit_azula_large_bed4611_da01a04" +) + +resolve_multiwinkler_checkpoint() { + local run_dir="$1" + local -a ckpts=() + + mapfile -t ckpts < <( + find "${run_dir}" -type f -path '*/checkpoints/best-multiwinkler-from0p25-*.ckpt' | sort + ) + + if (( ${#ckpts[@]} >= 1 )); then + printf '%s\n' "${ckpts[$(( ${#ckpts[@]} - 1 ))]}" + return 0 + fi + + return 1 +} + +for run_dir in "${RUN_DIRS[@]}"; do + run_dir_abs="$(realpath "${run_dir}")" + if [[ ! -f "${run_dir_abs}/resolved_config.yaml" ]]; then + echo "Skipping ${run_dir}: resolved_config.yaml missing" >&2 + continue + fi + + if ! eval_ckpt="$(resolve_multiwinkler_checkpoint "${run_dir_abs}")"; then + echo "Skipping ${run_dir}: best-multiwinkler-from0p25-*.ckpt missing" >&2 + continue + fi + eval_ckpt_abs="$(realpath "${eval_ckpt}")" + eval_output_dir="${run_dir_abs}/${EVAL_SUBDIR}" + + for run_dry in "${RUN_DRY_STATES[@]}"; do + dry_run_arg=() + run_label="slurm" + if [[ "${run_dry}" == "true" ]]; then + dry_run_arg=(--dry-run) + run_label="slurm --dry-run" + fi + + echo "Submitting CRPS-ambient eval (best multi-Winkler from 0.25)" + echo " mode: ${run_label}" + echo " run_dir: ${run_dir_abs}" + echo " eval.checkpoint: ${eval_ckpt_abs}" + echo " eval.mode: ambient" + echo " output_subdir: ${EVAL_SUBDIR}" + echo " eval.rollout_snapshot_timesteps: ${ROLLOUT_SNAPSHOT_TIMESTEPS}" + echo " eval.batch_size: ${EVAL_BATCH_SIZE}" + echo " eval.n_members: ${EVAL_N_MEMBERS}" + echo " eval.metrics: ${EVAL_METRICS}" + + uv run autocast eval --mode slurm "${dry_run_arg[@]}" \ + --workdir "${run_dir_abs}" \ + --output-subdir "${EVAL_SUBDIR}" \ + eval.checkpoint="${eval_ckpt_abs}" \ + eval.mode=ambient \ + eval.csv_path="${eval_output_dir}/evaluation_metrics.csv" \ + eval.video_dir="${eval_output_dir}/videos" \ + eval.save_rollout_snapshots=true \ + eval.rollout_snapshot_dir="${eval_output_dir}/videos/snapshots" \ + eval.rollout_snapshot_timesteps="${ROLLOUT_SNAPSHOT_TIMESTEPS}" \ + eval.rollout_snapshot_format=png \ + eval.metrics="${EVAL_METRICS}" \ + eval.batch_size="${EVAL_BATCH_SIZE}" \ + eval.n_members="${EVAL_N_MEMBERS}" \ + hydra.launcher.timeout_min="${TIMEOUT_MIN}" + done +done diff --git a/slurm_scripts/comparison/eval/submit_eval_fm_ambient.sh b/slurm_scripts/comparison/eval/submit_eval_fm_ambient.sh index eb56a251..2f86c5e1 100755 --- a/slurm_scripts/comparison/eval/submit_eval_fm_ambient.sh +++ b/slurm_scripts/comparison/eval/submit_eval_fm_ambient.sh @@ -1,9 +1,10 @@ #!/bin/bash set -euo pipefail -# Evaluate FM-in-ambient (flow matching, identity encoder) EPD runs from -# 2026-04-18 across all 4 datasets. Eval reuses resolved_config.yaml so -# flow_ode_steps (=50), hid_channels, and backbone match training. +# Evaluate the 2026-04-20 FM/diff cached-latent processor basis with explicit +# eval.mode=ambient across all 4 datasets. Eval reuses resolved_config.yaml so +# flow_ode_steps (=50), hid_channels, and backbone match training, and the +# autoencoder checkpoint enables ambient-space rollout through decode->encode. # # Batch size: diffusion rollout is ODE-integrated (flow_ode_steps=50) per # rollout step, so ambient 64x64 × n_members=10 × 50 ODE substeps is the @@ -15,21 +16,46 @@ set -euo pipefail EVAL_BATCH_SIZE=4 EVAL_N_MEMBERS=10 TIMEOUT_MIN=360 +EVAL_SUBDIR="eval_ambient" RUN_DRY_STATES=("true" "false") EVAL_METRICS="[mse,mae,nmse,nmae,rmse,nrmse,vmse,vrmse,linf,psrmse,psrmse_low,psrmse_mid,psrmse_high,psrmse_tail,pscc,pscc_low,pscc_mid,pscc_high,pscc_tail,crps,fcrps,afcrps,energy,ssr,winkler]" RUN_DIRS=( - "outputs/2026-04-18/diff_gs64_flow_matching_vit_0f89f06_6e3a299" - "outputs/2026-04-18/diff_gpe64_flow_matching_vit_0f89f06_3b3604d" - "outputs/2026-04-18/diff_cns64_flow_matching_vit_0f89f06_483bb70" - "outputs/2026-04-18/diff_ad64_flow_matching_vit_0f89f06_725d44a" + "outputs/2026-04-20/diff_gs64_flow_matching_vit_09490da_7e9e331" + "outputs/2026-04-20/diff_gpe64_flow_matching_vit_09490da_47bf39a" + "outputs/2026-04-20/diff_cns64_flow_matching_vit_09490da_636fcc3" + "outputs/2026-04-20/diff_ad64_flow_matching_vit_09490da_dae1382" +) +declare -A AE_CKPT=( + ["outputs/2026-04-20/diff_gs64_flow_matching_vit_09490da_7e9e331"]="$HOME/autocast/outputs/2026-04-17/ae_gs64_3a7999b_ed36b8e/autoencoder.ckpt" + ["outputs/2026-04-20/diff_gpe64_flow_matching_vit_09490da_47bf39a"]="$HOME/autocast/outputs/2026-04-17/ae_gpe64_3a7999b_31e1c9f/autoencoder.ckpt" + ["outputs/2026-04-20/diff_cns64_flow_matching_vit_09490da_636fcc3"]="$HOME/autocast/outputs/2026-04-17/ae_cns64_3a7999b_b9c29f8/autoencoder.ckpt" + ["outputs/2026-04-20/diff_ad64_flow_matching_vit_09490da_dae1382"]="$HOME/autocast/outputs/2026-04-17/ae_ad64_3a7999b_1a1e300/autoencoder.ckpt" ) for run_dir in "${RUN_DIRS[@]}"; do - if [[ ! -f "${run_dir}/resolved_config.yaml" ]]; then + ae_ckpt="${AE_CKPT[$run_dir]:-}" + if [[ -z "${ae_ckpt}" ]]; then + echo "Skipping ${run_dir}: no autoencoder_checkpoint mapping" >&2 + continue + fi + + run_dir_abs="$(realpath "${run_dir}")" + if [[ ! -f "${run_dir_abs}/resolved_config.yaml" ]]; then echo "Skipping ${run_dir}: resolved_config.yaml missing" >&2 continue fi + eval_ckpt="${run_dir_abs}/processor.ckpt" + if [[ ! -f "${eval_ckpt}" ]]; then + echo "Skipping ${run_dir}: processor.ckpt missing" >&2 + continue + fi + if [[ ! -f "${ae_ckpt}" ]]; then + echo "Skipping ${run_dir}: AE checkpoint missing at ${ae_ckpt}" >&2 + continue + fi + ae_ckpt_abs="$(realpath "${ae_ckpt}")" + eval_output_dir="${run_dir_abs}/${EVAL_SUBDIR}" for run_dry in "${RUN_DRY_STATES[@]}"; do dry_run_arg=() @@ -41,13 +67,23 @@ for run_dir in "${RUN_DIRS[@]}"; do echo "Submitting FM-ambient eval" echo " mode: ${run_label}" - echo " run_dir: ${run_dir}" + echo " run_dir: ${run_dir_abs}" + echo " eval.checkpoint: ${eval_ckpt}" + echo " autoencoder_checkpoint: ${ae_ckpt_abs}" + echo " eval.mode: ambient" + echo " output_subdir: ${EVAL_SUBDIR}" echo " eval.batch_size: ${EVAL_BATCH_SIZE}" echo " eval.n_members: ${EVAL_N_MEMBERS}" echo " eval.metrics: ${EVAL_METRICS}" uv run autocast eval --mode slurm "${dry_run_arg[@]}" \ - --workdir "${run_dir}" \ + --workdir "${run_dir_abs}" \ + --output-subdir "${EVAL_SUBDIR}" \ + eval.checkpoint="${eval_ckpt}" \ + eval.mode=ambient \ + +autoencoder_checkpoint="${ae_ckpt_abs}" \ + eval.csv_path="${eval_output_dir}/evaluation_metrics.csv" \ + eval.video_dir="${eval_output_dir}/videos" \ eval.metrics="${EVAL_METRICS}" \ eval.batch_size="${EVAL_BATCH_SIZE}" \ eval.n_members="${EVAL_N_MEMBERS}" \ diff --git a/slurm_scripts/comparison/eval/submit_eval_fm_ambient_0p25.sh b/slurm_scripts/comparison/eval/submit_eval_fm_ambient_0p25.sh new file mode 100755 index 00000000..812b7da3 --- /dev/null +++ b/slurm_scripts/comparison/eval/submit_eval_fm_ambient_0p25.sh @@ -0,0 +1,123 @@ +#!/bin/bash + +set -euo pipefail +# Evaluate FM cached-latent processor runs from 2026-04-20 at the +# 25%-progress checkpoint with explicit eval.mode=ambient. +# +# This charges the autoencoder decode->encode drift at every rollout step. +# Outputs are kept under eval_0p25_ambient/ so they do not overwrite the +# default auto->encode_once 25%-checkpoint evals. +# +# Current 2026-04-20 FM runs saved legacy quarter-*.ckpt files, so this uses +# the first sorted quarter checkpoint as the 25% fallback. Future runs with +# snapshot-0p25-*.ckpt files are preferred automatically. + +EVAL_BATCH_SIZE=4 +EVAL_N_MEMBERS=10 +TIMEOUT_MIN=360 +EVAL_SUBDIR="eval_0p25_ambient" +PROGRESS_TOKEN="0p25" +PROGRESS_LABEL="0.25" +LEGACY_QUARTER_INDEX=0 +RUN_DRY_STATES=("true" "false") +EVAL_METRICS="[mse,mae,nmse,nmae,rmse,nrmse,vmse,vrmse,linf,psrmse,psrmse_low,psrmse_mid,psrmse_high,psrmse_tail,pscc,pscc_low,pscc_mid,pscc_high,pscc_tail,crps,fcrps,afcrps,energy,ssr,winkler]" + +RUN_DIRS=( + "outputs/2026-04-20/diff_gs64_flow_matching_vit_09490da_7e9e331" + "outputs/2026-04-20/diff_gpe64_flow_matching_vit_09490da_47bf39a" + "outputs/2026-04-20/diff_cns64_flow_matching_vit_09490da_636fcc3" + "outputs/2026-04-20/diff_ad64_flow_matching_vit_09490da_dae1382" +) +declare -A AE_CKPT=( + ["outputs/2026-04-20/diff_gs64_flow_matching_vit_09490da_7e9e331"]="$HOME/autocast/outputs/2026-04-17/ae_gs64_3a7999b_ed36b8e/autoencoder.ckpt" + ["outputs/2026-04-20/diff_gpe64_flow_matching_vit_09490da_47bf39a"]="$HOME/autocast/outputs/2026-04-17/ae_gpe64_3a7999b_31e1c9f/autoencoder.ckpt" + ["outputs/2026-04-20/diff_cns64_flow_matching_vit_09490da_636fcc3"]="$HOME/autocast/outputs/2026-04-17/ae_cns64_3a7999b_b9c29f8/autoencoder.ckpt" + ["outputs/2026-04-20/diff_ad64_flow_matching_vit_09490da_dae1382"]="$HOME/autocast/outputs/2026-04-17/ae_ad64_3a7999b_1a1e300/autoencoder.ckpt" +) + +resolve_progress_checkpoint() { + local run_dir="$1" + local progress_token="$2" + local legacy_quarter_index="$3" + local -a snapshot_ckpts=() + local -a quarter_ckpts=() + + mapfile -t snapshot_ckpts < <( + find "${run_dir}" -type f -path "*/checkpoints/snapshot-${progress_token}-*.ckpt" | sort + ) + + if (( ${#snapshot_ckpts[@]} >= 1 )); then + printf '%s\n' "${snapshot_ckpts[$(( ${#snapshot_ckpts[@]} - 1 ))]}" + return 0 + fi + + mapfile -t quarter_ckpts < <( + find "${run_dir}" -type f -path '*/checkpoints/quarter-*.ckpt' | sort + ) + + if (( ${#quarter_ckpts[@]} > legacy_quarter_index )); then + printf '%s\n' "${quarter_ckpts[$legacy_quarter_index]}" + return 0 + fi + + return 1 +} + +for run_dir in "${RUN_DIRS[@]}"; do + ae_ckpt="${AE_CKPT[$run_dir]:-}" + if [[ -z "${ae_ckpt}" ]]; then + echo "Skipping ${run_dir}: no autoencoder_checkpoint mapping" >&2 + continue + fi + + run_dir_abs="$(realpath "${run_dir}")" + if [[ ! -f "${run_dir_abs}/resolved_config.yaml" ]]; then + echo "Skipping ${run_dir}: resolved_config.yaml missing" >&2 + continue + fi + if [[ ! -f "${ae_ckpt}" ]]; then + echo "Skipping ${run_dir}: AE checkpoint missing at ${ae_ckpt}" >&2 + continue + fi + ae_ckpt_abs="$(realpath "${ae_ckpt}")" + + if ! eval_ckpt="$(resolve_progress_checkpoint "${run_dir_abs}" "${PROGRESS_TOKEN}" "${LEGACY_QUARTER_INDEX}")"; then + echo "Skipping ${run_dir}: neither snapshot-${PROGRESS_TOKEN}-*.ckpt nor legacy quarter checkpoint index ${LEGACY_QUARTER_INDEX} found" >&2 + continue + fi + eval_ckpt_abs="$(realpath "${eval_ckpt}")" + eval_output_dir="${run_dir_abs}/${EVAL_SUBDIR}" + + for run_dry in "${RUN_DRY_STATES[@]}"; do + dry_run_arg=() + run_label="slurm" + if [[ "${run_dry}" == "true" ]]; then + dry_run_arg=(--dry-run) + run_label="slurm --dry-run" + fi + + echo "Submitting FM ambient eval (${PROGRESS_LABEL} checkpoint)" + echo " mode: ${run_label}" + echo " run_dir: ${run_dir_abs}" + echo " eval.checkpoint: ${eval_ckpt_abs}" + echo " autoencoder_checkpoint: ${ae_ckpt_abs}" + echo " eval.mode: ambient" + echo " output_subdir: ${EVAL_SUBDIR}" + echo " eval.batch_size: ${EVAL_BATCH_SIZE}" + echo " eval.n_members: ${EVAL_N_MEMBERS}" + echo " eval.metrics: ${EVAL_METRICS}" + + uv run autocast eval --mode slurm "${dry_run_arg[@]}" \ + --workdir "${run_dir_abs}" \ + --output-subdir "${EVAL_SUBDIR}" \ + eval.checkpoint="${eval_ckpt_abs}" \ + eval.mode=ambient \ + +autoencoder_checkpoint="${ae_ckpt_abs}" \ + eval.csv_path="${eval_output_dir}/evaluation_metrics.csv" \ + eval.video_dir="${eval_output_dir}/videos" \ + eval.metrics="${EVAL_METRICS}" \ + eval.batch_size="${EVAL_BATCH_SIZE}" \ + eval.n_members="${EVAL_N_MEMBERS}" \ + hydra.launcher.timeout_min="${TIMEOUT_MIN}" + done +done diff --git a/slurm_scripts/comparison/eval/submit_eval_fm_ambient_0p50.sh b/slurm_scripts/comparison/eval/submit_eval_fm_ambient_0p50.sh new file mode 100755 index 00000000..84e0a57e --- /dev/null +++ b/slurm_scripts/comparison/eval/submit_eval_fm_ambient_0p50.sh @@ -0,0 +1,123 @@ +#!/bin/bash + +set -euo pipefail +# Evaluate FM cached-latent processor runs from 2026-04-20 at the +# 50%-progress checkpoint with explicit eval.mode=ambient. +# +# This charges the autoencoder decode->encode drift at every rollout step. +# Outputs are kept under eval_0p50_ambient/ so they do not overwrite the +# default auto->encode_once 50%-checkpoint evals. +# +# Current 2026-04-20 FM runs saved legacy quarter-*.ckpt files, so this uses +# the second sorted quarter checkpoint as the 50% fallback. Future runs with +# snapshot-0p50-*.ckpt files are preferred automatically. + +EVAL_BATCH_SIZE=4 +EVAL_N_MEMBERS=10 +TIMEOUT_MIN=360 +EVAL_SUBDIR="eval_0p50_ambient" +PROGRESS_TOKEN="0p50" +PROGRESS_LABEL="0.50" +LEGACY_QUARTER_INDEX=1 +RUN_DRY_STATES=("true" "false") +EVAL_METRICS="[mse,mae,nmse,nmae,rmse,nrmse,vmse,vrmse,linf,psrmse,psrmse_low,psrmse_mid,psrmse_high,psrmse_tail,pscc,pscc_low,pscc_mid,pscc_high,pscc_tail,crps,fcrps,afcrps,energy,ssr,winkler]" + +RUN_DIRS=( + "outputs/2026-04-20/diff_gs64_flow_matching_vit_09490da_7e9e331" + "outputs/2026-04-20/diff_gpe64_flow_matching_vit_09490da_47bf39a" + "outputs/2026-04-20/diff_cns64_flow_matching_vit_09490da_636fcc3" + "outputs/2026-04-20/diff_ad64_flow_matching_vit_09490da_dae1382" +) +declare -A AE_CKPT=( + ["outputs/2026-04-20/diff_gs64_flow_matching_vit_09490da_7e9e331"]="$HOME/autocast/outputs/2026-04-17/ae_gs64_3a7999b_ed36b8e/autoencoder.ckpt" + ["outputs/2026-04-20/diff_gpe64_flow_matching_vit_09490da_47bf39a"]="$HOME/autocast/outputs/2026-04-17/ae_gpe64_3a7999b_31e1c9f/autoencoder.ckpt" + ["outputs/2026-04-20/diff_cns64_flow_matching_vit_09490da_636fcc3"]="$HOME/autocast/outputs/2026-04-17/ae_cns64_3a7999b_b9c29f8/autoencoder.ckpt" + ["outputs/2026-04-20/diff_ad64_flow_matching_vit_09490da_dae1382"]="$HOME/autocast/outputs/2026-04-17/ae_ad64_3a7999b_1a1e300/autoencoder.ckpt" +) + +resolve_progress_checkpoint() { + local run_dir="$1" + local progress_token="$2" + local legacy_quarter_index="$3" + local -a snapshot_ckpts=() + local -a quarter_ckpts=() + + mapfile -t snapshot_ckpts < <( + find "${run_dir}" -type f -path "*/checkpoints/snapshot-${progress_token}-*.ckpt" | sort + ) + + if (( ${#snapshot_ckpts[@]} >= 1 )); then + printf '%s\n' "${snapshot_ckpts[$(( ${#snapshot_ckpts[@]} - 1 ))]}" + return 0 + fi + + mapfile -t quarter_ckpts < <( + find "${run_dir}" -type f -path '*/checkpoints/quarter-*.ckpt' | sort + ) + + if (( ${#quarter_ckpts[@]} > legacy_quarter_index )); then + printf '%s\n' "${quarter_ckpts[$legacy_quarter_index]}" + return 0 + fi + + return 1 +} + +for run_dir in "${RUN_DIRS[@]}"; do + ae_ckpt="${AE_CKPT[$run_dir]:-}" + if [[ -z "${ae_ckpt}" ]]; then + echo "Skipping ${run_dir}: no autoencoder_checkpoint mapping" >&2 + continue + fi + + run_dir_abs="$(realpath "${run_dir}")" + if [[ ! -f "${run_dir_abs}/resolved_config.yaml" ]]; then + echo "Skipping ${run_dir}: resolved_config.yaml missing" >&2 + continue + fi + if [[ ! -f "${ae_ckpt}" ]]; then + echo "Skipping ${run_dir}: AE checkpoint missing at ${ae_ckpt}" >&2 + continue + fi + ae_ckpt_abs="$(realpath "${ae_ckpt}")" + + if ! eval_ckpt="$(resolve_progress_checkpoint "${run_dir_abs}" "${PROGRESS_TOKEN}" "${LEGACY_QUARTER_INDEX}")"; then + echo "Skipping ${run_dir}: neither snapshot-${PROGRESS_TOKEN}-*.ckpt nor legacy quarter checkpoint index ${LEGACY_QUARTER_INDEX} found" >&2 + continue + fi + eval_ckpt_abs="$(realpath "${eval_ckpt}")" + eval_output_dir="${run_dir_abs}/${EVAL_SUBDIR}" + + for run_dry in "${RUN_DRY_STATES[@]}"; do + dry_run_arg=() + run_label="slurm" + if [[ "${run_dry}" == "true" ]]; then + dry_run_arg=(--dry-run) + run_label="slurm --dry-run" + fi + + echo "Submitting FM ambient eval (${PROGRESS_LABEL} checkpoint)" + echo " mode: ${run_label}" + echo " run_dir: ${run_dir_abs}" + echo " eval.checkpoint: ${eval_ckpt_abs}" + echo " autoencoder_checkpoint: ${ae_ckpt_abs}" + echo " eval.mode: ambient" + echo " output_subdir: ${EVAL_SUBDIR}" + echo " eval.batch_size: ${EVAL_BATCH_SIZE}" + echo " eval.n_members: ${EVAL_N_MEMBERS}" + echo " eval.metrics: ${EVAL_METRICS}" + + uv run autocast eval --mode slurm "${dry_run_arg[@]}" \ + --workdir "${run_dir_abs}" \ + --output-subdir "${EVAL_SUBDIR}" \ + eval.checkpoint="${eval_ckpt_abs}" \ + eval.mode=ambient \ + +autoencoder_checkpoint="${ae_ckpt_abs}" \ + eval.csv_path="${eval_output_dir}/evaluation_metrics.csv" \ + eval.video_dir="${eval_output_dir}/videos" \ + eval.metrics="${EVAL_METRICS}" \ + eval.batch_size="${EVAL_BATCH_SIZE}" \ + eval.n_members="${EVAL_N_MEMBERS}" \ + hydra.launcher.timeout_min="${TIMEOUT_MIN}" + done +done diff --git a/slurm_scripts/comparison/eval/submit_eval_fm_ambient_0p75.sh b/slurm_scripts/comparison/eval/submit_eval_fm_ambient_0p75.sh new file mode 100755 index 00000000..e0f87292 --- /dev/null +++ b/slurm_scripts/comparison/eval/submit_eval_fm_ambient_0p75.sh @@ -0,0 +1,123 @@ +#!/bin/bash + +set -euo pipefail +# Evaluate FM cached-latent processor runs from 2026-04-20 at the +# 75%-progress checkpoint with explicit eval.mode=ambient. +# +# This charges the autoencoder decode->encode drift at every rollout step. +# Outputs are kept under eval_0p75_ambient/ so they do not overwrite the +# default auto->encode_once 75%-checkpoint evals. +# +# Current 2026-04-20 FM runs saved legacy quarter-*.ckpt files, so this uses +# the third sorted quarter checkpoint as the 75% fallback. Future runs with +# snapshot-0p75-*.ckpt files are preferred automatically. + +EVAL_BATCH_SIZE=4 +EVAL_N_MEMBERS=10 +TIMEOUT_MIN=360 +EVAL_SUBDIR="eval_0p75_ambient" +PROGRESS_TOKEN="0p75" +PROGRESS_LABEL="0.75" +LEGACY_QUARTER_INDEX=2 +RUN_DRY_STATES=("true" "false") +EVAL_METRICS="[mse,mae,nmse,nmae,rmse,nrmse,vmse,vrmse,linf,psrmse,psrmse_low,psrmse_mid,psrmse_high,psrmse_tail,pscc,pscc_low,pscc_mid,pscc_high,pscc_tail,crps,fcrps,afcrps,energy,ssr,winkler]" + +RUN_DIRS=( + "outputs/2026-04-20/diff_gs64_flow_matching_vit_09490da_7e9e331" + "outputs/2026-04-20/diff_gpe64_flow_matching_vit_09490da_47bf39a" + "outputs/2026-04-20/diff_cns64_flow_matching_vit_09490da_636fcc3" + "outputs/2026-04-20/diff_ad64_flow_matching_vit_09490da_dae1382" +) +declare -A AE_CKPT=( + ["outputs/2026-04-20/diff_gs64_flow_matching_vit_09490da_7e9e331"]="$HOME/autocast/outputs/2026-04-17/ae_gs64_3a7999b_ed36b8e/autoencoder.ckpt" + ["outputs/2026-04-20/diff_gpe64_flow_matching_vit_09490da_47bf39a"]="$HOME/autocast/outputs/2026-04-17/ae_gpe64_3a7999b_31e1c9f/autoencoder.ckpt" + ["outputs/2026-04-20/diff_cns64_flow_matching_vit_09490da_636fcc3"]="$HOME/autocast/outputs/2026-04-17/ae_cns64_3a7999b_b9c29f8/autoencoder.ckpt" + ["outputs/2026-04-20/diff_ad64_flow_matching_vit_09490da_dae1382"]="$HOME/autocast/outputs/2026-04-17/ae_ad64_3a7999b_1a1e300/autoencoder.ckpt" +) + +resolve_progress_checkpoint() { + local run_dir="$1" + local progress_token="$2" + local legacy_quarter_index="$3" + local -a snapshot_ckpts=() + local -a quarter_ckpts=() + + mapfile -t snapshot_ckpts < <( + find "${run_dir}" -type f -path "*/checkpoints/snapshot-${progress_token}-*.ckpt" | sort + ) + + if (( ${#snapshot_ckpts[@]} >= 1 )); then + printf '%s\n' "${snapshot_ckpts[$(( ${#snapshot_ckpts[@]} - 1 ))]}" + return 0 + fi + + mapfile -t quarter_ckpts < <( + find "${run_dir}" -type f -path '*/checkpoints/quarter-*.ckpt' | sort + ) + + if (( ${#quarter_ckpts[@]} > legacy_quarter_index )); then + printf '%s\n' "${quarter_ckpts[$legacy_quarter_index]}" + return 0 + fi + + return 1 +} + +for run_dir in "${RUN_DIRS[@]}"; do + ae_ckpt="${AE_CKPT[$run_dir]:-}" + if [[ -z "${ae_ckpt}" ]]; then + echo "Skipping ${run_dir}: no autoencoder_checkpoint mapping" >&2 + continue + fi + + run_dir_abs="$(realpath "${run_dir}")" + if [[ ! -f "${run_dir_abs}/resolved_config.yaml" ]]; then + echo "Skipping ${run_dir}: resolved_config.yaml missing" >&2 + continue + fi + if [[ ! -f "${ae_ckpt}" ]]; then + echo "Skipping ${run_dir}: AE checkpoint missing at ${ae_ckpt}" >&2 + continue + fi + ae_ckpt_abs="$(realpath "${ae_ckpt}")" + + if ! eval_ckpt="$(resolve_progress_checkpoint "${run_dir_abs}" "${PROGRESS_TOKEN}" "${LEGACY_QUARTER_INDEX}")"; then + echo "Skipping ${run_dir}: neither snapshot-${PROGRESS_TOKEN}-*.ckpt nor legacy quarter checkpoint index ${LEGACY_QUARTER_INDEX} found" >&2 + continue + fi + eval_ckpt_abs="$(realpath "${eval_ckpt}")" + eval_output_dir="${run_dir_abs}/${EVAL_SUBDIR}" + + for run_dry in "${RUN_DRY_STATES[@]}"; do + dry_run_arg=() + run_label="slurm" + if [[ "${run_dry}" == "true" ]]; then + dry_run_arg=(--dry-run) + run_label="slurm --dry-run" + fi + + echo "Submitting FM ambient eval (${PROGRESS_LABEL} checkpoint)" + echo " mode: ${run_label}" + echo " run_dir: ${run_dir_abs}" + echo " eval.checkpoint: ${eval_ckpt_abs}" + echo " autoencoder_checkpoint: ${ae_ckpt_abs}" + echo " eval.mode: ambient" + echo " output_subdir: ${EVAL_SUBDIR}" + echo " eval.batch_size: ${EVAL_BATCH_SIZE}" + echo " eval.n_members: ${EVAL_N_MEMBERS}" + echo " eval.metrics: ${EVAL_METRICS}" + + uv run autocast eval --mode slurm "${dry_run_arg[@]}" \ + --workdir "${run_dir_abs}" \ + --output-subdir "${EVAL_SUBDIR}" \ + eval.checkpoint="${eval_ckpt_abs}" \ + eval.mode=ambient \ + +autoencoder_checkpoint="${ae_ckpt_abs}" \ + eval.csv_path="${eval_output_dir}/evaluation_metrics.csv" \ + eval.video_dir="${eval_output_dir}/videos" \ + eval.metrics="${EVAL_METRICS}" \ + eval.batch_size="${EVAL_BATCH_SIZE}" \ + eval.n_members="${EVAL_N_MEMBERS}" \ + hydra.launcher.timeout_min="${TIMEOUT_MIN}" + done +done diff --git a/slurm_scripts/comparison/eval/submit_eval_fm_latent.sh b/slurm_scripts/comparison/eval/submit_eval_fm_latent.sh index d50efc44..54f18542 100755 --- a/slurm_scripts/comparison/eval/submit_eval_fm_latent.sh +++ b/slurm_scripts/comparison/eval/submit_eval_fm_latent.sh @@ -69,6 +69,7 @@ for run_dir in "${RUN_DIRS[@]}"; do uv run autocast eval --mode slurm "${dry_run_arg[@]}" \ --workdir "${run_dir}" \ eval.checkpoint=processor.ckpt \ + eval.mode=auto \ +autoencoder_checkpoint="${ae_ckpt}" \ eval.metrics="${EVAL_METRICS}" \ eval.batch_size="${EVAL_BATCH_SIZE}" \ diff --git a/slurm_scripts/comparison/eval/submit_eval_fm_latent_0p25.sh b/slurm_scripts/comparison/eval/submit_eval_fm_latent_0p25.sh new file mode 100755 index 00000000..aaf880d5 --- /dev/null +++ b/slurm_scripts/comparison/eval/submit_eval_fm_latent_0p25.sh @@ -0,0 +1,124 @@ +#!/bin/bash + +set -euo pipefail +# Evaluate FM cached-latent processor runs from 2026-04-20 at the +# 25%-progress checkpoint using the default eval.mode=auto path. +# +# eval.mode=auto resolves to encode_once for processor-only cached-latent runs +# when autoencoder_checkpoint is supplied. That keeps raw-space metrics while +# avoiding the extra per-step decode->encode drift charged by the explicit +# ambient variant. +# +# Current 2026-04-20 FM runs saved legacy quarter-*.ckpt files, so this uses +# the first sorted quarter checkpoint as the 25% fallback. Future runs with +# snapshot-0p25-*.ckpt files are preferred automatically. + +EVAL_BATCH_SIZE=4 +EVAL_N_MEMBERS=10 +TIMEOUT_MIN=360 +EVAL_SUBDIR="eval_0p25" +PROGRESS_TOKEN="0p25" +PROGRESS_LABEL="0.25" +LEGACY_QUARTER_INDEX=0 +RUN_DRY_STATES=("true" "false") +EVAL_METRICS="[mse,mae,nmse,nmae,rmse,nrmse,vmse,vrmse,linf,psrmse,psrmse_low,psrmse_mid,psrmse_high,psrmse_tail,pscc,pscc_low,pscc_mid,pscc_high,pscc_tail,crps,fcrps,afcrps,energy,ssr,winkler]" + +RUN_DIRS=( + "outputs/2026-04-20/diff_gs64_flow_matching_vit_09490da_7e9e331" + "outputs/2026-04-20/diff_gpe64_flow_matching_vit_09490da_47bf39a" + "outputs/2026-04-20/diff_cns64_flow_matching_vit_09490da_636fcc3" + "outputs/2026-04-20/diff_ad64_flow_matching_vit_09490da_dae1382" +) +declare -A AE_CKPT=( + ["outputs/2026-04-20/diff_gs64_flow_matching_vit_09490da_7e9e331"]="$HOME/autocast/outputs/2026-04-17/ae_gs64_3a7999b_ed36b8e/autoencoder.ckpt" + ["outputs/2026-04-20/diff_gpe64_flow_matching_vit_09490da_47bf39a"]="$HOME/autocast/outputs/2026-04-17/ae_gpe64_3a7999b_31e1c9f/autoencoder.ckpt" + ["outputs/2026-04-20/diff_cns64_flow_matching_vit_09490da_636fcc3"]="$HOME/autocast/outputs/2026-04-17/ae_cns64_3a7999b_b9c29f8/autoencoder.ckpt" + ["outputs/2026-04-20/diff_ad64_flow_matching_vit_09490da_dae1382"]="$HOME/autocast/outputs/2026-04-17/ae_ad64_3a7999b_1a1e300/autoencoder.ckpt" +) + +resolve_progress_checkpoint() { + local run_dir="$1" + local progress_token="$2" + local legacy_quarter_index="$3" + local -a snapshot_ckpts=() + local -a quarter_ckpts=() + + mapfile -t snapshot_ckpts < <( + find "${run_dir}" -type f -path "*/checkpoints/snapshot-${progress_token}-*.ckpt" | sort + ) + + if (( ${#snapshot_ckpts[@]} >= 1 )); then + printf '%s\n' "${snapshot_ckpts[$(( ${#snapshot_ckpts[@]} - 1 ))]}" + return 0 + fi + + mapfile -t quarter_ckpts < <( + find "${run_dir}" -type f -path '*/checkpoints/quarter-*.ckpt' | sort + ) + + if (( ${#quarter_ckpts[@]} > legacy_quarter_index )); then + printf '%s\n' "${quarter_ckpts[$legacy_quarter_index]}" + return 0 + fi + + return 1 +} + +for run_dir in "${RUN_DIRS[@]}"; do + ae_ckpt="${AE_CKPT[$run_dir]:-}" + if [[ -z "${ae_ckpt}" ]]; then + echo "Skipping ${run_dir}: no autoencoder_checkpoint mapping" >&2 + continue + fi + + run_dir_abs="$(realpath "${run_dir}")" + if [[ ! -f "${run_dir_abs}/resolved_config.yaml" ]]; then + echo "Skipping ${run_dir}: resolved_config.yaml missing" >&2 + continue + fi + if [[ ! -f "${ae_ckpt}" ]]; then + echo "Skipping ${run_dir}: AE checkpoint missing at ${ae_ckpt}" >&2 + continue + fi + ae_ckpt_abs="$(realpath "${ae_ckpt}")" + + if ! eval_ckpt="$(resolve_progress_checkpoint "${run_dir_abs}" "${PROGRESS_TOKEN}" "${LEGACY_QUARTER_INDEX}")"; then + echo "Skipping ${run_dir}: neither snapshot-${PROGRESS_TOKEN}-*.ckpt nor legacy quarter checkpoint index ${LEGACY_QUARTER_INDEX} found" >&2 + continue + fi + eval_ckpt_abs="$(realpath "${eval_ckpt}")" + eval_output_dir="${run_dir_abs}/${EVAL_SUBDIR}" + + for run_dry in "${RUN_DRY_STATES[@]}"; do + dry_run_arg=() + run_label="slurm" + if [[ "${run_dry}" == "true" ]]; then + dry_run_arg=(--dry-run) + run_label="slurm --dry-run" + fi + + echo "Submitting FM cached-latent eval (${PROGRESS_LABEL} checkpoint, mode=auto -> encode_once)" + echo " mode: ${run_label}" + echo " run_dir: ${run_dir_abs}" + echo " eval.checkpoint: ${eval_ckpt_abs}" + echo " autoencoder_checkpoint: ${ae_ckpt_abs}" + echo " eval.mode: auto" + echo " output_subdir: ${EVAL_SUBDIR}" + echo " eval.batch_size: ${EVAL_BATCH_SIZE}" + echo " eval.n_members: ${EVAL_N_MEMBERS}" + echo " eval.metrics: ${EVAL_METRICS}" + + uv run autocast eval --mode slurm "${dry_run_arg[@]}" \ + --workdir "${run_dir_abs}" \ + --output-subdir "${EVAL_SUBDIR}" \ + eval.checkpoint="${eval_ckpt_abs}" \ + eval.mode=auto \ + +autoencoder_checkpoint="${ae_ckpt_abs}" \ + eval.csv_path="${eval_output_dir}/evaluation_metrics.csv" \ + eval.video_dir="${eval_output_dir}/videos" \ + eval.metrics="${EVAL_METRICS}" \ + eval.batch_size="${EVAL_BATCH_SIZE}" \ + eval.n_members="${EVAL_N_MEMBERS}" \ + hydra.launcher.timeout_min="${TIMEOUT_MIN}" + done +done diff --git a/slurm_scripts/comparison/eval/submit_eval_fm_latent_0p50.sh b/slurm_scripts/comparison/eval/submit_eval_fm_latent_0p50.sh new file mode 100755 index 00000000..02028de1 --- /dev/null +++ b/slurm_scripts/comparison/eval/submit_eval_fm_latent_0p50.sh @@ -0,0 +1,124 @@ +#!/bin/bash + +set -euo pipefail +# Evaluate FM cached-latent processor runs from 2026-04-20 at the +# 50%-progress checkpoint using the default eval.mode=auto path. +# +# eval.mode=auto resolves to encode_once for processor-only cached-latent runs +# when autoencoder_checkpoint is supplied. That keeps raw-space metrics while +# avoiding the extra per-step decode->encode drift charged by the explicit +# ambient variant. +# +# Current 2026-04-20 FM runs saved legacy quarter-*.ckpt files, so this uses +# the second sorted quarter checkpoint as the 50% fallback. Future runs with +# snapshot-0p50-*.ckpt files are preferred automatically. + +EVAL_BATCH_SIZE=4 +EVAL_N_MEMBERS=10 +TIMEOUT_MIN=360 +EVAL_SUBDIR="eval_0p50" +PROGRESS_TOKEN="0p50" +PROGRESS_LABEL="0.50" +LEGACY_QUARTER_INDEX=1 +RUN_DRY_STATES=("true" "false") +EVAL_METRICS="[mse,mae,nmse,nmae,rmse,nrmse,vmse,vrmse,linf,psrmse,psrmse_low,psrmse_mid,psrmse_high,psrmse_tail,pscc,pscc_low,pscc_mid,pscc_high,pscc_tail,crps,fcrps,afcrps,energy,ssr,winkler]" + +RUN_DIRS=( + "outputs/2026-04-20/diff_gs64_flow_matching_vit_09490da_7e9e331" + "outputs/2026-04-20/diff_gpe64_flow_matching_vit_09490da_47bf39a" + "outputs/2026-04-20/diff_cns64_flow_matching_vit_09490da_636fcc3" + "outputs/2026-04-20/diff_ad64_flow_matching_vit_09490da_dae1382" +) +declare -A AE_CKPT=( + ["outputs/2026-04-20/diff_gs64_flow_matching_vit_09490da_7e9e331"]="$HOME/autocast/outputs/2026-04-17/ae_gs64_3a7999b_ed36b8e/autoencoder.ckpt" + ["outputs/2026-04-20/diff_gpe64_flow_matching_vit_09490da_47bf39a"]="$HOME/autocast/outputs/2026-04-17/ae_gpe64_3a7999b_31e1c9f/autoencoder.ckpt" + ["outputs/2026-04-20/diff_cns64_flow_matching_vit_09490da_636fcc3"]="$HOME/autocast/outputs/2026-04-17/ae_cns64_3a7999b_b9c29f8/autoencoder.ckpt" + ["outputs/2026-04-20/diff_ad64_flow_matching_vit_09490da_dae1382"]="$HOME/autocast/outputs/2026-04-17/ae_ad64_3a7999b_1a1e300/autoencoder.ckpt" +) + +resolve_progress_checkpoint() { + local run_dir="$1" + local progress_token="$2" + local legacy_quarter_index="$3" + local -a snapshot_ckpts=() + local -a quarter_ckpts=() + + mapfile -t snapshot_ckpts < <( + find "${run_dir}" -type f -path "*/checkpoints/snapshot-${progress_token}-*.ckpt" | sort + ) + + if (( ${#snapshot_ckpts[@]} >= 1 )); then + printf '%s\n' "${snapshot_ckpts[$(( ${#snapshot_ckpts[@]} - 1 ))]}" + return 0 + fi + + mapfile -t quarter_ckpts < <( + find "${run_dir}" -type f -path '*/checkpoints/quarter-*.ckpt' | sort + ) + + if (( ${#quarter_ckpts[@]} > legacy_quarter_index )); then + printf '%s\n' "${quarter_ckpts[$legacy_quarter_index]}" + return 0 + fi + + return 1 +} + +for run_dir in "${RUN_DIRS[@]}"; do + ae_ckpt="${AE_CKPT[$run_dir]:-}" + if [[ -z "${ae_ckpt}" ]]; then + echo "Skipping ${run_dir}: no autoencoder_checkpoint mapping" >&2 + continue + fi + + run_dir_abs="$(realpath "${run_dir}")" + if [[ ! -f "${run_dir_abs}/resolved_config.yaml" ]]; then + echo "Skipping ${run_dir}: resolved_config.yaml missing" >&2 + continue + fi + if [[ ! -f "${ae_ckpt}" ]]; then + echo "Skipping ${run_dir}: AE checkpoint missing at ${ae_ckpt}" >&2 + continue + fi + ae_ckpt_abs="$(realpath "${ae_ckpt}")" + + if ! eval_ckpt="$(resolve_progress_checkpoint "${run_dir_abs}" "${PROGRESS_TOKEN}" "${LEGACY_QUARTER_INDEX}")"; then + echo "Skipping ${run_dir}: neither snapshot-${PROGRESS_TOKEN}-*.ckpt nor legacy quarter checkpoint index ${LEGACY_QUARTER_INDEX} found" >&2 + continue + fi + eval_ckpt_abs="$(realpath "${eval_ckpt}")" + eval_output_dir="${run_dir_abs}/${EVAL_SUBDIR}" + + for run_dry in "${RUN_DRY_STATES[@]}"; do + dry_run_arg=() + run_label="slurm" + if [[ "${run_dry}" == "true" ]]; then + dry_run_arg=(--dry-run) + run_label="slurm --dry-run" + fi + + echo "Submitting FM cached-latent eval (${PROGRESS_LABEL} checkpoint, mode=auto -> encode_once)" + echo " mode: ${run_label}" + echo " run_dir: ${run_dir_abs}" + echo " eval.checkpoint: ${eval_ckpt_abs}" + echo " autoencoder_checkpoint: ${ae_ckpt_abs}" + echo " eval.mode: auto" + echo " output_subdir: ${EVAL_SUBDIR}" + echo " eval.batch_size: ${EVAL_BATCH_SIZE}" + echo " eval.n_members: ${EVAL_N_MEMBERS}" + echo " eval.metrics: ${EVAL_METRICS}" + + uv run autocast eval --mode slurm "${dry_run_arg[@]}" \ + --workdir "${run_dir_abs}" \ + --output-subdir "${EVAL_SUBDIR}" \ + eval.checkpoint="${eval_ckpt_abs}" \ + eval.mode=auto \ + +autoencoder_checkpoint="${ae_ckpt_abs}" \ + eval.csv_path="${eval_output_dir}/evaluation_metrics.csv" \ + eval.video_dir="${eval_output_dir}/videos" \ + eval.metrics="${EVAL_METRICS}" \ + eval.batch_size="${EVAL_BATCH_SIZE}" \ + eval.n_members="${EVAL_N_MEMBERS}" \ + hydra.launcher.timeout_min="${TIMEOUT_MIN}" + done +done diff --git a/slurm_scripts/comparison/eval/submit_eval_fm_latent_0p75.sh b/slurm_scripts/comparison/eval/submit_eval_fm_latent_0p75.sh new file mode 100755 index 00000000..626093d4 --- /dev/null +++ b/slurm_scripts/comparison/eval/submit_eval_fm_latent_0p75.sh @@ -0,0 +1,124 @@ +#!/bin/bash + +set -euo pipefail +# Evaluate FM cached-latent processor runs from 2026-04-20 at the +# 75%-progress checkpoint using the default eval.mode=auto path. +# +# eval.mode=auto resolves to encode_once for processor-only cached-latent runs +# when autoencoder_checkpoint is supplied. That keeps raw-space metrics while +# avoiding the extra per-step decode->encode drift charged by the explicit +# ambient variant. +# +# Current 2026-04-20 FM runs saved legacy quarter-*.ckpt files, so this uses +# the third sorted quarter checkpoint as the 75% fallback. Future runs with +# snapshot-0p75-*.ckpt files are preferred automatically. + +EVAL_BATCH_SIZE=4 +EVAL_N_MEMBERS=10 +TIMEOUT_MIN=360 +EVAL_SUBDIR="eval_0p75" +PROGRESS_TOKEN="0p75" +PROGRESS_LABEL="0.75" +LEGACY_QUARTER_INDEX=2 +RUN_DRY_STATES=("true" "false") +EVAL_METRICS="[mse,mae,nmse,nmae,rmse,nrmse,vmse,vrmse,linf,psrmse,psrmse_low,psrmse_mid,psrmse_high,psrmse_tail,pscc,pscc_low,pscc_mid,pscc_high,pscc_tail,crps,fcrps,afcrps,energy,ssr,winkler]" + +RUN_DIRS=( + "outputs/2026-04-20/diff_gs64_flow_matching_vit_09490da_7e9e331" + "outputs/2026-04-20/diff_gpe64_flow_matching_vit_09490da_47bf39a" + "outputs/2026-04-20/diff_cns64_flow_matching_vit_09490da_636fcc3" + "outputs/2026-04-20/diff_ad64_flow_matching_vit_09490da_dae1382" +) +declare -A AE_CKPT=( + ["outputs/2026-04-20/diff_gs64_flow_matching_vit_09490da_7e9e331"]="$HOME/autocast/outputs/2026-04-17/ae_gs64_3a7999b_ed36b8e/autoencoder.ckpt" + ["outputs/2026-04-20/diff_gpe64_flow_matching_vit_09490da_47bf39a"]="$HOME/autocast/outputs/2026-04-17/ae_gpe64_3a7999b_31e1c9f/autoencoder.ckpt" + ["outputs/2026-04-20/diff_cns64_flow_matching_vit_09490da_636fcc3"]="$HOME/autocast/outputs/2026-04-17/ae_cns64_3a7999b_b9c29f8/autoencoder.ckpt" + ["outputs/2026-04-20/diff_ad64_flow_matching_vit_09490da_dae1382"]="$HOME/autocast/outputs/2026-04-17/ae_ad64_3a7999b_1a1e300/autoencoder.ckpt" +) + +resolve_progress_checkpoint() { + local run_dir="$1" + local progress_token="$2" + local legacy_quarter_index="$3" + local -a snapshot_ckpts=() + local -a quarter_ckpts=() + + mapfile -t snapshot_ckpts < <( + find "${run_dir}" -type f -path "*/checkpoints/snapshot-${progress_token}-*.ckpt" | sort + ) + + if (( ${#snapshot_ckpts[@]} >= 1 )); then + printf '%s\n' "${snapshot_ckpts[$(( ${#snapshot_ckpts[@]} - 1 ))]}" + return 0 + fi + + mapfile -t quarter_ckpts < <( + find "${run_dir}" -type f -path '*/checkpoints/quarter-*.ckpt' | sort + ) + + if (( ${#quarter_ckpts[@]} > legacy_quarter_index )); then + printf '%s\n' "${quarter_ckpts[$legacy_quarter_index]}" + return 0 + fi + + return 1 +} + +for run_dir in "${RUN_DIRS[@]}"; do + ae_ckpt="${AE_CKPT[$run_dir]:-}" + if [[ -z "${ae_ckpt}" ]]; then + echo "Skipping ${run_dir}: no autoencoder_checkpoint mapping" >&2 + continue + fi + + run_dir_abs="$(realpath "${run_dir}")" + if [[ ! -f "${run_dir_abs}/resolved_config.yaml" ]]; then + echo "Skipping ${run_dir}: resolved_config.yaml missing" >&2 + continue + fi + if [[ ! -f "${ae_ckpt}" ]]; then + echo "Skipping ${run_dir}: AE checkpoint missing at ${ae_ckpt}" >&2 + continue + fi + ae_ckpt_abs="$(realpath "${ae_ckpt}")" + + if ! eval_ckpt="$(resolve_progress_checkpoint "${run_dir_abs}" "${PROGRESS_TOKEN}" "${LEGACY_QUARTER_INDEX}")"; then + echo "Skipping ${run_dir}: neither snapshot-${PROGRESS_TOKEN}-*.ckpt nor legacy quarter checkpoint index ${LEGACY_QUARTER_INDEX} found" >&2 + continue + fi + eval_ckpt_abs="$(realpath "${eval_ckpt}")" + eval_output_dir="${run_dir_abs}/${EVAL_SUBDIR}" + + for run_dry in "${RUN_DRY_STATES[@]}"; do + dry_run_arg=() + run_label="slurm" + if [[ "${run_dry}" == "true" ]]; then + dry_run_arg=(--dry-run) + run_label="slurm --dry-run" + fi + + echo "Submitting FM cached-latent eval (${PROGRESS_LABEL} checkpoint, mode=auto -> encode_once)" + echo " mode: ${run_label}" + echo " run_dir: ${run_dir_abs}" + echo " eval.checkpoint: ${eval_ckpt_abs}" + echo " autoencoder_checkpoint: ${ae_ckpt_abs}" + echo " eval.mode: auto" + echo " output_subdir: ${EVAL_SUBDIR}" + echo " eval.batch_size: ${EVAL_BATCH_SIZE}" + echo " eval.n_members: ${EVAL_N_MEMBERS}" + echo " eval.metrics: ${EVAL_METRICS}" + + uv run autocast eval --mode slurm "${dry_run_arg[@]}" \ + --workdir "${run_dir_abs}" \ + --output-subdir "${EVAL_SUBDIR}" \ + eval.checkpoint="${eval_ckpt_abs}" \ + eval.mode=auto \ + +autoencoder_checkpoint="${ae_ckpt_abs}" \ + eval.csv_path="${eval_output_dir}/evaluation_metrics.csv" \ + eval.video_dir="${eval_output_dir}/videos" \ + eval.metrics="${EVAL_METRICS}" \ + eval.batch_size="${EVAL_BATCH_SIZE}" \ + eval.n_members="${EVAL_N_MEMBERS}" \ + hydra.launcher.timeout_min="${TIMEOUT_MIN}" + done +done diff --git a/src/autocast/scripts/workflow/commands.py b/src/autocast/scripts/workflow/commands.py index 74ec9b30..9f5568d4 100644 --- a/src/autocast/scripts/workflow/commands.py +++ b/src/autocast/scripts/workflow/commands.py @@ -129,13 +129,21 @@ def _struct_safe_overrides( def _resolved_eval_default_overrides() -> list[str]: """Return eval.* overrides from the live eval config for stale resolved configs.""" - cfg_path = ( - Path(get_default_config_path()) / "eval" / "encoder_processor_decoder.yaml" - ) - if not cfg_path.exists(): + eval_config_dir = Path(get_default_config_path()) / "eval" + default_cfg_path = eval_config_dir / "default.yaml" + epd_cfg_path = eval_config_dir / "encoder_processor_decoder.yaml" + if not epd_cfg_path.exists(): return [] - loaded = OmegaConf.to_container(OmegaConf.load(cfg_path), resolve=True) + if default_cfg_path.exists(): + cfg = OmegaConf.merge( + OmegaConf.load(default_cfg_path), + OmegaConf.load(epd_cfg_path), + ) + else: + cfg = OmegaConf.load(epd_cfg_path) + + loaded = OmegaConf.to_container(cfg, resolve=True) if not isinstance(loaded, dict): return [] diff --git a/tests/scripts/test_workflow.py b/tests/scripts/test_workflow.py index 7fcbb9b2..bdfb0da0 100644 --- a/tests/scripts/test_workflow.py +++ b/tests/scripts/test_workflow.py @@ -560,6 +560,40 @@ def _fake_run_module(_module, overrides, dry_run=False, mode="local", **_kwargs) assert any(o.startswith("eval.checkpoint=") for o in overrides) +def test_eval_command_adds_snapshot_defaults_for_stale_resolved_config( + monkeypatch, tmp_path +): + (tmp_path / "resolved_config.yaml").write_text( + "eval:\n checkpoint: encoder_processor_decoder.ckpt\n", + encoding="utf-8", + ) + (tmp_path / "encoder_processor_decoder.ckpt").touch() + captured: dict[str, object] = {} + + def _fake_run_module(_module, overrides, dry_run=False, mode="local", **_kwargs): + captured["overrides"] = overrides + del dry_run, mode, _kwargs # accept run_module's keyword args + + monkeypatch.setattr( + "autocast.scripts.workflow.commands.run_module", _fake_run_module + ) + + user_override = "eval.rollout_snapshot_dir=/tmp/snapshots" + eval_command( + mode="local", + dataset=None, + work_dir=str(tmp_path), + overrides=[user_override], + dry_run=True, + ) + + overrides = captured["overrides"] + assert isinstance(overrides, list) + default_override = "+eval.rollout_snapshot_dir=null" + assert default_override in overrides + assert overrides.index(default_override) < overrides.index(user_override) + + def test_eval_command_includes_defaults_without_resolved_config(monkeypatch, tmp_path): # No resolved_config.yaml in workdir (tmp_path / "encoder_processor_decoder.ckpt").touch()