|
| 1 | +#!/bin/bash |
| 2 | + |
| 3 | +set -euo pipefail |
| 4 | +# Evaluate FM cached-latent processor runs (2026-04-18) in AMBIENT mode. |
| 5 | +# |
| 6 | +# eval.mode=ambient forces encoder->processor->decoder rollout at every |
| 7 | +# step, so decode/encode drift is included in the metrics — the apples-to- |
| 8 | +# apples regime for comparison with the ambient FM baseline. |
| 9 | +# |
| 10 | +# The eval.mode selector landed via PR #327 and is now in-tree. When ambient |
| 11 | +# is requested on a cached-latents datamodule, eval auto-substitutes the raw |
| 12 | +# datamodule from <cache_dir>/autoencoder_config.yaml; the trained AE weights |
| 13 | +# are supplied via autoencoder_checkpoint. |
| 14 | +# |
| 15 | +# Batch size: ambient rollout pays encode/decode every step plus 50 ODE |
| 16 | +# substeps through the processor. Cached-latent processor forward is lighter |
| 17 | +# (64 tokens vs 256 for ambient FM), so 4/GPU is a safe start; the tight |
| 18 | +# spot is the same ODE + AE stack so it mirrors FM-ambient. |
| 19 | + |
| 20 | +EVAL_BATCH_SIZE=4 |
| 21 | +TIMEOUT_MIN=360 |
| 22 | +RUN_DRY_STATES=("true" "false") |
| 23 | +EVAL_METRICS="[mse,mae,nmse,nmae,rmse,nrmse,vmse,vrmse,linf,psrmse,psrmse_low,psrmse_mid,psrmse_high,psrmse_tail,pscc,pscc_low,pscc_mid,pscc_high,pscc_tail,crps,fcrps,afcrps,energy,ssr,winkler]" |
| 24 | + |
| 25 | +RUN_DIRS=( |
| 26 | + "outputs/2026-04-18/diff_gs64_flow_matching_vit_0f89f06_f6e8f51" |
| 27 | + "outputs/2026-04-18/diff_gpe64_flow_matching_vit_0f89f06_b954f94" |
| 28 | + "outputs/2026-04-18/diff_cns64_flow_matching_vit_0f89f06_0e1c64b" |
| 29 | + "outputs/2026-04-18/diff_ad64_flow_matching_vit_0f89f06_df2137c" |
| 30 | +) |
| 31 | +declare -A AE_CKPT=( |
| 32 | + ["outputs/2026-04-18/diff_gs64_flow_matching_vit_0f89f06_f6e8f51"]="$HOME/autocast/outputs/2026-04-17/ae_gs64_3a7999b_ed36b8e/autoencoder.ckpt" |
| 33 | + ["outputs/2026-04-18/diff_gpe64_flow_matching_vit_0f89f06_b954f94"]="$HOME/autocast/outputs/2026-04-17/ae_gpe64_3a7999b_31e1c9f/autoencoder.ckpt" |
| 34 | + ["outputs/2026-04-18/diff_cns64_flow_matching_vit_0f89f06_0e1c64b"]="$HOME/autocast/outputs/2026-04-17/ae_cns64_3a7999b_b9c29f8/autoencoder.ckpt" |
| 35 | + ["outputs/2026-04-18/diff_ad64_flow_matching_vit_0f89f06_df2137c"]="$HOME/autocast/outputs/2026-04-17/ae_ad64_3a7999b_1a1e300/autoencoder.ckpt" |
| 36 | +) |
| 37 | + |
| 38 | +for run_dir in "${RUN_DIRS[@]}"; do |
| 39 | + ae_ckpt="${AE_CKPT[$run_dir]:-}" |
| 40 | + if [[ -z "${ae_ckpt}" ]]; then |
| 41 | + echo "Skipping ${run_dir}: no autoencoder_checkpoint mapping" >&2 |
| 42 | + continue |
| 43 | + fi |
| 44 | + if [[ ! -f "${run_dir}/resolved_config.yaml" ]]; then |
| 45 | + echo "Skipping ${run_dir}: resolved_config.yaml missing" >&2 |
| 46 | + continue |
| 47 | + fi |
| 48 | + if [[ ! -f "${ae_ckpt}" ]]; then |
| 49 | + echo "Skipping ${run_dir}: AE checkpoint missing at ${ae_ckpt}" >&2 |
| 50 | + continue |
| 51 | + fi |
| 52 | + |
| 53 | + for run_dry in "${RUN_DRY_STATES[@]}"; do |
| 54 | + dry_run_arg=() |
| 55 | + run_label="slurm" |
| 56 | + if [[ "${run_dry}" == "true" ]]; then |
| 57 | + dry_run_arg=(--dry-run) |
| 58 | + run_label="slurm --dry-run" |
| 59 | + fi |
| 60 | + |
| 61 | + echo "Submitting FM cached-latent eval (mode=ambient)" |
| 62 | + echo " mode: ${run_label}" |
| 63 | + echo " run_dir: ${run_dir}" |
| 64 | + echo " autoencoder_checkpoint: ${ae_ckpt}" |
| 65 | + echo " eval.batch_size: ${EVAL_BATCH_SIZE}" |
| 66 | + echo " eval.metrics: ${EVAL_METRICS}" |
| 67 | + |
| 68 | + uv run autocast eval --mode slurm "${dry_run_arg[@]}" \ |
| 69 | + --workdir "${run_dir}" \ |
| 70 | + eval.checkpoint=processor.ckpt \ |
| 71 | + ++eval.mode=ambient \ |
| 72 | + +autoencoder_checkpoint="${ae_ckpt}" \ |
| 73 | + eval.metrics="${EVAL_METRICS}" \ |
| 74 | + eval.batch_size="${EVAL_BATCH_SIZE}" \ |
| 75 | + hydra.launcher.timeout_min="${TIMEOUT_MIN}" |
| 76 | + done |
| 77 | +done |
0 commit comments