MedARC-AI
diff --git a/‎README.md‎
Lines changed: 28 additions & 6 deletions b/‎README.md‎
Lines changed: 28 additions & 6 deletions
diff --git a/‎medarc_rl/launchers/rl_local.py‎
Lines changed: 2 additions & 2 deletions b/‎medarc_rl/launchers/rl_local.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎medarc_rl/medarc_slurm.py‎
Lines changed: 197 additions & 60 deletions b/‎medarc_rl/medarc_slurm.py‎
Lines changed: 197 additions & 60 deletions
diff --git a/‎medarc_rl/medarc_train.py‎
Lines changed: 44 additions & 14 deletions b/‎medarc_rl/medarc_train.py‎
Lines changed: 44 additions & 14 deletions
diff --git a/‎medarc_rl/slurm_templates/one_node_rl.j2‎
Lines changed: 14 additions & 2 deletions b/‎medarc_rl/slurm_templates/one_node_rl.j2‎
Lines changed: 14 additions & 2 deletions
diff --git a/‎medarc_rl/slurm_templates/one_node_sft.j2‎
Lines changed: 13 additions & 1 deletion b/‎medarc_rl/slurm_templates/one_node_sft.j2‎
Lines changed: 13 additions & 1 deletion
diff --git a/‎medarc_rl/utils.py‎
Lines changed: 51 additions & 13 deletions b/‎medarc_rl/utils.py‎
Lines changed: 51 additions & 13 deletions
diff --git a/‎prime-rl‎ b/‎prime-rl‎
@@ -31,8 +31,9 @@ uv sync
 For flash attention support:
 
 ```bash
-uv sync --extra flash-attn      # flash-attn v2
-uv sync --extra flash-attn-3    # flash-attn v2 + v3 (use for H100s)
+uv sync --extra flash-attn-2    # flash-attn 2
+uv sync --extra flash-attn-3    # flash-attn 2 + 3 (use for H100s)
+uv sync --extra flash-attn-4    # flash-attn 2, 3, & 4 (use for B200s)
 ```
 
 ## medarc_slurm
@@ -41,20 +42,41 @@ uv sync --extra flash-attn-3    # flash-attn v2 + v3 (use for H100s)
 
 ```bash
 # SFT: single torchrun job
-medarc_slurm sft config.toml --output-dir runs/my-sft --gpus 2
+medarc_slurm sft --config config.toml --output-dir runs/my-sft --gpus 2
 
 # RL: splits GPUs between vLLM inference and training
-medarc_slurm rl config.toml --output-dir runs/my-rl --train-gpus 1 --infer-gpus 2
+medarc_slurm rl --config config.toml --output-dir runs/my-rl --train-gpus 1 --infer-gpus 2
 
 # RL: share a single GPU between inference and training
-medarc_slurm rl config.toml --output-dir runs/my-rl --single-gpu
+medarc_slurm rl --config config.toml --output-dir runs/my-rl --single-gpu
+
+# SFT: low-priority queue + email notifications + resume from latest checkpoint
+medarc_slurm sft --config config.toml \
+  --output-dir runs/my-sft \
+  --gpus 2 \
+  --priority low \
+  --mail all \
+  --mail-user email@domain.com \
+  --slurm-resume
+
+# Validate an RL submission (including dependency syntax) without creating a job
+medarc_slurm rl --config config.toml \
+  --output-dir runs/my-rl \
+  --train-gpus 1 \
+  --infer-gpus 2 \
+  --dependency afterok:123456 \
+  --test-only
 ```
 
 Generated artifacts are written to `--output-dir`:
 - `sft.sh` or `rl.sh` — the SLURM batch script
 - `configs/` — resolved TOML subconfigs passed to each component
 
-You can pass PRIME-RL config overrides directly as extra flags (for example `--wandb.project my-proj --wandb.name my-run`). You may also insert `--` before passthrough overrides for readability, but it is optional.
+You can pass PRIME-RL config overrides directly as extra flags (for example `--wandb.project my-proj --wandb.name my-run`). You may also insert `--` before passthrough overrides for readability, but it is optional. To layer multiple PRIME-RL configs, repeat `--config` with later files overriding earlier ones.
+
+`medarc_slurm` now defaults `--account` to `training`. You can override it with `--account <name>`.
+Email mode is `--mail all` or `--mail begin_end` (with `--mail-user`).
+Use `--dependency "<expr>"` to pass SLURM dependencies and `--test-only` to run `sbatch` validation without submitting.
 
 Run `medarc_slurm sft --help` or `medarc_slurm rl --help` for more details on available options.
 
 
@@ -16,12 +16,12 @@
 from subprocess import Popen
 from threading import Event, Thread
 
+from pydantic_config import cli
 from prime_rl.configs.rl import RLConfig
 from prime_rl.entrypoints.rl import write_subconfigs
 from prime_rl.utils.logger import setup_logger
 from prime_rl.utils.pathing import get_log_dir
 from prime_rl.utils.process import cleanup_processes, cleanup_threads, monitor_process
-from prime_rl.utils.pydantic_config import parse_argv
 from prime_rl.utils.utils import get_free_port
 
 
@@ -315,7 +315,7 @@ def rl_local(config: RLConfig) -> None:
 
 
 def main() -> None:
-    config = parse_argv(RLConfig)
+    config = cli(RLConfig)
     rl_local(config)
 
 
 
@@ -7,7 +7,7 @@
 
 import typer
 from pydantic import ValidationError
-from typer import Argument, Option
+from typer import Option
 
 from medarc_rl.utils import TYPER_PASSTHROUGH_CONTEXT, _load_settings_from_toml, _write_toml, extra_config_args
 
@@ -25,6 +25,26 @@ def _gpu_ids(n: int) -> str:
     return ",".join(str(i) for i in range(n))
 
 
+def _enable_sft_resume(config, *, enabled: bool) -> None:
+    if not enabled:
+        return
+    if config.ckpt is None:
+        from prime_rl.configs.trainer import CheckpointConfig as TrainerCheckpointConfig
+
+        config.ckpt = TrainerCheckpointConfig()
+    config.ckpt.resume_step = -1
+
+
+def _enable_rl_resume(config, *, enabled: bool) -> None:
+    if not enabled:
+        return
+    if config.ckpt is None:
+        from prime_rl.configs.rl import SharedCheckpointConfig
+
+        config.ckpt = SharedCheckpointConfig()
+    config.ckpt.resume_step = -1
+
+
 @app.command(
     context_settings=TYPER_PASSTHROUGH_CONTEXT,
     help=(
@@ -33,19 +53,24 @@ def _gpu_ids(n: int) -> str:
 )
 def sft(
     ctx: typer.Context,
-    config_toml: Annotated[Path, Argument(metavar="CONFIG_TOML", help="Path to the PRIME-RL SFT trainer TOML.")],
     output_dir: Annotated[Path, Option("--output-dir", file_okay=False, dir_okay=True, help="Directory to write resolved configs and checkpoints.")],
+    config: Annotated[list[Path] | None, Option("--config", "--config-toml", help="One or more PRIME-RL SFT trainer TOMLs. Repeat `--config` to layer files with later files overriding earlier ones.")] = None,
     gpus: Annotated[int, Option("--gpus", min=1, max=8, help="Number of GPUs for SFT.")] = 1,
+    resume: Annotated[bool, Option("--resume/--no-resume", help="Resume from the latest checkpoint (sets ckpt.resume_step=-1).")] = False,
 ) -> None:  # fmt: skip
     from prime_rl.configs.sft import SFTConfig
 
+    config_tomls = list(config or [])
+    if not config_tomls:
+        raise typer.BadParameter("Missing config path. Pass one or more --config values.", param_hint="--config")
     output_dir = output_dir.expanduser().resolve()
     config = _load_settings_from_toml(
         SFTConfig,
-        config_toml.expanduser().resolve(),
+        [config_toml.expanduser().resolve() for config_toml in config_tomls],
         output_dir=output_dir,
-        extra_cli_args=extra_config_args(ctx),
+        extra_cli_args=extra_config_args(ctx, positional_count=0),
     )
+    _enable_sft_resume(config, enabled=resume)
 
     config_dir = output_dir / "configs"
     config_dir.mkdir(parents=True, exist_ok=True)
@@ -82,37 +107,41 @@ def sft(
 )
 def rl(
     ctx: typer.Context,
-    config_toml: Annotated[Path, Argument(metavar="CONFIG_TOML", help="Path to the PRIME-RL RL TOML.")],
     output_dir: Annotated[Path, Option("--output-dir", file_okay=False, dir_okay=True, help="Directory to write resolved configs and checkpoints.")],
+    config: Annotated[list[Path] | None, Option("--config", "--config-toml", help="One or more PRIME-RL RL TOMLs. Repeat `--config` to layer files with later files overriding earlier ones.")] = None,
     train_gpus: Annotated[int, Option("--train-gpus", min=1, max=4, help="Number of GPUs for training.")] = 1,
     infer_gpus: Annotated[int, Option("--infer-gpus", min=1, max=7, help="Number of GPUs for inference.")] = 1,
     single_gpu: Annotated[bool, Option("--single-gpu", help="Share a single GPU between trainer and inference.")] = False,
+    resume: Annotated[bool, Option("--resume/--no-resume", help="Resume from the latest checkpoint (sets ckpt.resume_step=-1).")] = False,
 ) -> None:  # fmt: skip
     from prime_rl.configs.rl import RLConfig
 
     from medarc_rl.launchers.rl_local import rl_local
 
+    config_tomls = list(config or [])
+    if not config_tomls:
+        raise typer.BadParameter("Missing config path. Pass one or more --config values.", param_hint="--config")
     output_dir = output_dir.expanduser().resolve()
     train_gpus = 1 if single_gpu else train_gpus
     infer_gpus = 1 if single_gpu else infer_gpus
-    total_gpus = 1 if single_gpu else (train_gpus + infer_gpus)
+    gpus = 1 if single_gpu else (train_gpus + infer_gpus)
 
-    if not single_gpu and total_gpus < 2:
+    if not single_gpu and gpus < 2:
         raise typer.BadParameter(
-            f"Total GPUs must be at least 2, got train_gpus ({train_gpus}) + infer_gpus ({infer_gpus}) = {total_gpus}.",
+            f"Total GPUs must be at least 2, got train_gpus ({train_gpus}) + infer_gpus ({infer_gpus}) = {gpus}.",
             param_hint="--train-gpus/--infer-gpus",
         )
-    if total_gpus > 8:
+    if gpus > 8:
         raise typer.BadParameter(
-            f"Total GPUs must be at most 8, got train_gpus ({train_gpus}) + infer_gpus ({infer_gpus}) = {total_gpus}.",
+            f"Total GPUs must be at most 8, got train_gpus ({train_gpus}) + infer_gpus ({infer_gpus}) = {gpus}.",
             param_hint="--train-gpus/--infer-gpus",
         )
 
     try:
         config = _load_settings_from_toml(
             RLConfig,
-            config_toml.expanduser().resolve(),
-            extra_cli_args=extra_config_args(ctx),
+            [config_toml.expanduser().resolve() for config_toml in config_tomls],
+            extra_cli_args=extra_config_args(ctx, positional_count=0),
             output_dir=output_dir,
             deployment={"type": "single_node", "num_train_gpus": train_gpus, "num_infer_gpus": infer_gpus},
         )
@@ -121,6 +150,7 @@ def rl(
             f"RL config validation failed:\n{e}",
             param_hint="CONFIG_TOML/--train-gpus/--infer-gpus",
         ) from e
+    _enable_rl_resume(config, enabled=resume)
 
     if single_gpu and getattr(config.trainer.weight_broadcast, "type", None) == "nccl":
         raise typer.BadParameter(
@@ -135,10 +165,10 @@ def rl(
         )
 
     # Set env vars for rl_local
-    os.environ["CUDA_VISIBLE_DEVICES"] = _gpu_ids(total_gpus)
+    os.environ["CUDA_VISIBLE_DEVICES"] = _gpu_ids(gpus)
     os.environ["MEDARC_SINGLE_GPU"] = "1" if single_gpu else "0"
 
-    typer.echo(f"Starting RL on {total_gpus} GPU(s) (single_gpu={single_gpu})")
+    typer.echo(f"Starting RL on {gpus} GPU(s) (single_gpu={single_gpu})")
     rl_local(config)
 
 
 
@@ -3,12 +3,24 @@
 #SBATCH --job-name={{ job_name }}
 #SBATCH --nodes=1
 #SBATCH --ntasks=1
-#SBATCH --gres=gpu:{{ total_gpus }}
-{% if total_gpus == 8 %}
+#SBATCH --gpus-per-task={{ gpus }}
+{% if gpus == 8 %}
 #SBATCH --exclusive
 {% else %}
 #SBATCH --cpus-per-gpu={{ cpus_per_gpu }}
 {% endif %}
+{% if slurm_resume %}
+#SBATCH --requeue
+{% endif %}
+{% if qos %}
+#SBATCH --qos={{ qos }}
+{% endif %}
+{% if mail_type %}
+#SBATCH --mail-type={{ mail_type }}
+{% endif %}
+{% if mail_user %}
+#SBATCH --mail-user={{ mail_user }}
+{% endif %}
 #SBATCH --export=ALL
 #SBATCH --output="{{ output_dir }}/slurm/job_%j.log"
 #SBATCH --error="{{ output_dir }}/slurm/job_%j.log"
 
@@ -3,12 +3,24 @@
 #SBATCH --job-name={{ job_name }}
 #SBATCH --nodes=1
 #SBATCH --ntasks=1
-#SBATCH --gres=gpu:{{ gpus }}
+#SBATCH --gpus-per-task={{ gpus }}
 {% if gpus == 8 %}
 #SBATCH --exclusive
 {% else %}
 #SBATCH --cpus-per-gpu={{ cpus_per_gpu }}
 {% endif %}
+{% if slurm_resume %}
+#SBATCH --requeue
+{% endif %}
+{% if qos %}
+#SBATCH --qos={{ qos }}
+{% endif %}
+{% if mail_type %}
+#SBATCH --mail-type={{ mail_type }}
+{% endif %}
+{% if mail_user %}
+#SBATCH --mail-user={{ mail_user }}
+{% endif %}
 #SBATCH --export=ALL
 #SBATCH --output="{{ output_dir }}/slurm/job_%j.log"
 #SBATCH --error="{{ output_dir }}/slurm/job_%j.log"
 
@@ -1,11 +1,14 @@
 from __future__ import annotations
 
+import json
 from pathlib import Path
 from typing import Any, TypeVar
 
 import tomli_w
 import typer
-from prime_rl.utils.pydantic_config import extract_toml_paths, to_kebab_case
+from pydantic import ValidationError
+from pydantic_config import ConfigFileError
+from prime_rl.utils.config import cli
 
 TYPER_PASSTHROUGH_CONTEXT = {"allow_extra_args": True, "ignore_unknown_options": True}
 T = TypeVar("T")
@@ -46,26 +49,30 @@ def _write_toml(path: Path, data: dict[str, Any]) -> None:
 
 def _load_settings_from_toml(
     config_cls: type[T],
-    config_path: Path,
+    config_paths: list[Path],
     *,
     extra_cli_args: list[str] | None = None,
     **overrides: Any,
 ) -> T:
-    if not config_path.exists():
-        raise typer.BadParameter(f"Config file does not exist: {config_path}", param_hint="CONFIG_TOML")
+    if not config_paths:
+        raise typer.BadParameter("At least one config file is required.", param_hint="CONFIG_TOML")
+    for config_path in config_paths:
+        if not config_path.exists():
+            raise typer.BadParameter(f"Config file does not exist: {config_path}", param_hint="CONFIG_TOML")
 
     reserved_roots = set(overrides)
     filtered_extra_args = filter_wrapper_owned_cli_args(extra_cli_args or [], override_roots=reserved_roots)
-    args = ["@", str(config_path), *filtered_extra_args]
-    toml_paths, cli_args = extract_toml_paths(args)
-    if not toml_paths:
-        raise typer.BadParameter(f"Failed to resolve TOML paths from {config_path}", param_hint="CONFIG_TOML")
-
-    config_cls.set_toml_files([str(path) for path in toml_paths])
     try:
-        return config_cls(_cli_parse_args=to_kebab_case(cli_args), **overrides)
-    finally:
-        config_cls.clear_toml_files()
+        return cli(
+            config_cls,
+            args=[
+                *[item for config_path in config_paths for item in ("@", str(config_path))],
+                *filtered_extra_args,
+                *_overrides_to_cli_args(overrides),
+            ],
+        )
+    except (ConfigFileError, ValidationError, SystemExit) as e:
+        raise typer.BadParameter(str(e), param_hint="CONFIG_TOML") from e
 
 
 def extra_config_args(ctx: typer.Context, *, positional_count: int = 1) -> list[str]:
@@ -129,3 +136,34 @@ def filter_wrapper_owned_cli_args(cli_args: list[str], *, override_roots: set[st
             i += 1
 
     return filtered
+
+
+def _overrides_to_cli_args(overrides: dict[str, Any]) -> list[str]:
+    args: list[str] = []
+    for key, value in overrides.items():
+        args.extend(_flatten_override(key, value))
+    return args
+
+
+def _flatten_override(key: str, value: Any) -> list[str]:
+    option = f"--{key.replace('_', '-')}"
+
+    if value is None:
+        return []
+
+    if isinstance(value, dict):
+        args: list[str] = []
+        for subkey, subvalue in value.items():
+            args.extend(_flatten_override(f"{key}.{subkey}", subvalue))
+        return args
+
+    if isinstance(value, bool):
+        return [option] if value else [f"--no-{key.replace('_', '-')}"]
+
+    if isinstance(value, Path):
+        return [option, str(value)]
+
+    if isinstance(value, (list, tuple)):
+        return [option, json.dumps(value)]
+
+    return [option, str(value)]