diff --git a/docs/index.md b/docs/index.md
index 72a8b446..710d5140 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -67,13 +67,14 @@ End-to-end applications: RAG agents, ML agents, and multi-agent systems.
 
 ## Training Pipeline
 
-The Nemotron training pipeline follows a three-stage approach with full artifact lineage tracking:
+The Nemotron training pipeline follows a four-stage approach with full artifact lineage tracking:
 
 | Stage | Name | Description |
 |-------|------|-------------|
 | 0 | [Pretraining](train/nano3/pretrain.md) | Base model training on large text corpus |
 | 1 | [SFT](train/nano3/sft.md) | Supervised fine-tuning for instruction following |
 | 2 | [RL](train/nano3/rl.md) | Reinforcement learning for alignment |
+| 3 | [Evaluation](train/nano3/eval.md) | Benchmark testing with NeMo Evaluator |
 
 ## Why Nemotron?
 
@@ -133,6 +134,7 @@ train/artifacts.md
 train/nano3/pretrain.md
 train/nano3/sft.md
 train/nano3/rl.md
+train/nano3/eval.md
 train/nano3/import.md
 ```
 
@@ -147,4 +149,5 @@ train/omegaconf.md
 train/wandb.md
 train/cli.md
 train/data-prep.md
+train/evaluator.md
 ```
diff --git a/docs/train/cli.md b/docs/train/cli.md
index b7d40ec4..292ad25a 100644
--- a/docs/train/cli.md
+++ b/docs/train/cli.md
@@ -487,4 +487,5 @@ uv run nemotron myrecipe train -c tiny --run MY-CLUSTER
 - [Data Preparation](./data-prep.md) — Data preparation module
 - [Artifact Lineage](./artifacts.md) — W&B artifact system and lineage tracking
 - [W&B Integration](./wandb.md) — Credentials and configuration
+- [Evaluation Framework](./evaluator.md) — Model evaluation with NeMo Evaluator
 - [Nano3 Recipe](./nano3/README.md) — Complete training recipe example
diff --git a/docs/train/evaluator.md b/docs/train/evaluator.md
new file mode 100644
index 00000000..34c2f420
--- /dev/null
+++ b/docs/train/evaluator.md
@@ -0,0 +1,393 @@
+# Evaluation Framework
+
+The Nemotron evaluation framework provides model evaluation capabilities using [NeMo Evaluator](https://github.com/NVIDIA/nemo-evaluator-launcher), enabling benchmark testing of trained models on standard NLP tasks.
+
+<div class="termy">
+
+```console
+$ uv run nemotron evaluate -c nemotron-3-nano-nemo-ray --run MY-CLUSTER
+Compiled Configuration
+╭──────────────────────────────────── run ─────────────────────────────────────╮
+│ wandb:                                                                       │
+│   project: nemotron                                                          │
+│   entity: my-team                                                            │
+╰──────────────────────────────────────────────────────────────────────────────╯
+
+[info] Detected W&B login, setting WANDB_API_KEY
+
+Starting evaluation...
+✓ Evaluation submitted: 480d3c89bfe4a55c
+Check status: nemo-evaluator-launcher status 480d3c89bfe4a55c
+```
+
+</div>
+
+## Overview
+
+The evaluation framework enables:
+
+- **Benchmark Testing** — Run standard benchmarks (MMLU, ARC, HellaSwag, etc.) on your models
+- **W&B Integration** — Auto-export results to Weights & Biases for tracking
+- **Slurm Execution** — Submit evaluation jobs to HPC clusters
+- **Auto-Squash** — Automatically converts Docker images to squashfs for Slurm clusters
+- **Credential Auto-Propagation** — Automatically passes W&B tokens to remote jobs
+
+The evaluator uses the same `env.toml` execution profiles as training recipes, providing a unified experience across all stages.
+
+## Quick Start
+
+```bash
+# Run evaluation on a cluster
+uv run nemotron evaluate -c nemotron-3-nano-nemo-ray --run MY-CLUSTER
+
+# Preview config without executing
+uv run nemotron evaluate -c nemotron-3-nano-nemo-ray --dry-run
+
+# Filter to specific tasks
+uv run nemotron evaluate -c nemotron-3-nano-nemo-ray --run MY-CLUSTER -t adlr_mmlu
+
+# Override checkpoint path
+uv run nemotron evaluate -c nemotron-3-nano-nemo-ray --run MY-CLUSTER \
+    deployment.checkpoint_path=/path/to/your/checkpoint
+```
+
+## CLI Options
+
+| Option | Short | Description |
+|--------|-------|-------------|
+| `--config` | `-c` | Config name or path |
+| `--run` | `-r` | Submit to cluster (attached, streams logs) |
+| `--batch` | `-b` | Submit to cluster (detached, exits immediately) |
+| `--dry-run` | `-d` | Preview config without executing |
+| `--task` | `-t` | Filter to specific task(s), can be repeated |
+| `--force-squash` | | Force re-squash even if cached |
+
+### Task Filtering
+
+Run specific benchmarks using the `-t` flag:
+
+```bash
+# Single task
+uv run nemotron evaluate -c config --run MY-CLUSTER -t adlr_mmlu
+
+# Multiple tasks
+uv run nemotron evaluate -c config --run MY-CLUSTER -t adlr_mmlu -t hellaswag
+```
+
+### Available Tasks
+
+Common evaluation tasks include:
+
+| Task | Description |
+|------|-------------|
+| `adlr_mmlu` | Massive Multitask Language Understanding |
+| `adlr_arc_challenge_llama_25_shot` | AI2 Reasoning Challenge |
+| `adlr_winogrande_5_shot` | Winograd Schema Challenge |
+| `hellaswag` | Commonsense reasoning |
+| `openbookqa` | Open-domain question answering |
+
+## Execution Profiles
+
+The evaluator uses the same `env.toml` profiles as training recipes. See [Execution through NeMo-Run](./nemo-run.md) for full documentation.
+
+### Basic Profile
+
+```toml
+# env.toml
+
+[wandb]
+project = "nemotron"
+entity = "my-team"
+
+[MY-CLUSTER]
+executor = "slurm"
+account = "my-account"
+partition = "batch"
+tunnel = "ssh"
+host = "cluster.example.com"
+user = "myuser"
+remote_job_dir = "/lustre/fsw/users/myuser/.nemotron"
+```
+
+### Profile with Auto-Squash
+
+Slurm clusters use [Pyxis](https://github.com/NVIDIA/pyxis) with enroot for container execution. While you can use Docker references directly, pre-squashed `.sqsh` files significantly speed up job startup by avoiding container pulls on each run.
+
+With SSH tunnel settings, the CLI can automatically create squash files from Docker references:
+
+```toml
+[MY-CLUSTER]
+executor = "slurm"
+account = "my-account"
+partition = "batch"
+
+# SSH settings (enables auto-squash)
+tunnel = "ssh"
+host = "cluster.example.com"
+user = "myuser"
+remote_job_dir = "/lustre/fsw/users/myuser/.nemotron"
+
+# Container settings - use Docker ref, auto-squashed on first run
+container_image = "nvcr.io/nvidia/nemo:25.01"
+```
+
+When you run with `--run MY-CLUSTER`, the CLI will:
+1. Detect that `deployment.image` is a Docker reference (not a `.sqsh` path)
+2. SSH to the cluster and run `enroot import` on a compute node
+3. Cache the `.sqsh` file in `${remote_job_dir}/containers/` for reuse
+4. Update the config to use the squashed path
+
+Subsequent runs reuse the cached squash file, eliminating container pull overhead.
+
+## Configuration
+
+Evaluation configs define how to deploy your model and which benchmarks to run.
+
+### Example Config
+
+```yaml
+# Execution (Slurm settings)
+execution:
+  type: slurm
+  hostname: ${run.env.host}
+  account: ${run.env.account}
+  partition: ${run.env.partition}
+  num_nodes: 1
+  gres: gpu:8
+
+  # Auto-export to W&B after evaluation
+  auto_export:
+    enabled: true
+    destinations:
+      - wandb
+
+# Deployment (Model serving)
+deployment:
+  type: generic
+  image: ${run.env.container}  # Docker image or .sqsh path
+  checkpoint_path: /path/to/checkpoint
+  command: >-
+    python deploy_ray_inframework.py
+    --megatron_checkpoint /checkpoint/
+    --num_gpus 8
+
+# Evaluation (Tasks to run)
+evaluation:
+  tasks:
+    - name: adlr_mmlu
+    - name: hellaswag
+    - name: openbookqa
+
+# Export (W&B settings)
+export:
+  wandb:
+    entity: ${run.wandb.entity}
+    project: ${run.wandb.project}
+```
+
+### Key Sections
+
+| Section | Purpose |
+|---------|---------|
+| `run.env` | Environment settings from env.toml (cluster, container) |
+| `run.wandb` | W&B settings from env.toml `[wandb]` section |
+| `execution` | Slurm executor configuration (nodes, GPUs, account) |
+| `deployment` | Model deployment (container, checkpoint, command) |
+| `evaluation` | Tasks and evaluation parameters |
+| `export` | Result export destinations (W&B) |
+
+## Auto-Squash
+
+For Slurm clusters that require squashfs containers, the evaluator automatically converts Docker images to `.sqsh` files—the same behavior as training recipes.
+
+### How It Works
+
+1. **Detection** — CLI checks if `deployment.image` is a Docker reference (not already `.sqsh`)
+2. **SSH Connection** — Connects to cluster via SSH tunnel (using `host` and `user` from env.toml)
+3. **Squash** — Runs `enroot import` on a compute node to create the `.sqsh` file
+4. **Cache** — Stores the squash file in `${remote_job_dir}/containers/` for reuse
+5. **Config Update** — Rewrites `deployment.image` to use the squashed path
+
+### Usage
+
+```bash
+# Auto-squash happens automatically for Docker refs
+uv run nemotron evaluate -c config --run MY-CLUSTER
+
+# Force re-squash (ignores cache)
+uv run nemotron evaluate -c config --run MY-CLUSTER --force-squash
+
+# Already-squashed paths skip the step
+# (if deployment.image ends in .sqsh, no squashing needed)
+```
+
+### Requirements
+
+Auto-squash requires these settings in your `env.toml` profile:
+
+| Field | Required | Description |
+|-------|----------|-------------|
+| `executor` | Yes | Must be `"slurm"` |
+| `tunnel` | Yes | Must be `"ssh"` |
+| `host` | Yes | SSH hostname (e.g., `cluster.example.com`) |
+| `user` | No | SSH username (defaults to current user) |
+| `remote_job_dir` | Yes | Remote directory for job files and squash cache |
+
+## W&B Integration
+
+The evaluator automatically propagates W&B credentials when you're logged in locally—the same behavior as training recipes.
+
+### Setup
+
+1. **Login to W&B locally:**
+   ```bash
+   wandb login
+   ```
+
+2. **Configure env.toml** (same `[wandb]` section used by all recipes):
+   ```toml
+   [wandb]
+   project = "nemotron"
+   entity = "my-team"
+   ```
+
+3. **Run evaluation** — credentials are automatically passed:
+   ```bash
+   uv run nemotron evaluate -c config --run MY-CLUSTER
+   # [info] Detected W&B login, setting WANDB_API_KEY
+   ```
+
+### What Gets Propagated
+
+| Variable | Source | Description |
+|----------|--------|-------------|
+| `WANDB_API_KEY` | Local wandb login | Auto-detected via `wandb.api.api_key` |
+| `WANDB_PROJECT` | `env.toml [wandb]` | Project name for result tracking |
+| `WANDB_ENTITY` | `env.toml [wandb]` | Team/user entity |
+
+## Monitoring Jobs
+
+### Check Status
+
+```bash
+# Using nemo-evaluator-launcher directly
+nemo-evaluator-launcher status INVOCATION_ID
+
+# Check Slurm queue
+ssh cluster squeue -u $USER
+```
+
+### Stream Logs
+
+```bash
+nemo-evaluator-launcher logs INVOCATION_ID
+```
+
+### Cancel Jobs
+
+```bash
+# Cancel via Slurm
+ssh cluster scancel JOB_ID
+
+# Or multiple jobs
+ssh cluster "scancel JOB_ID1 JOB_ID2 JOB_ID3"
+```
+
+## Creating Custom Configs
+
+### Step 1: Create Config File
+
+```yaml
+# src/nemotron/recipes/evaluator/config/my-model.yaml
+
+defaults:
+  - execution: slurm/default
+  - deployment: generic
+  - _self_
+
+run:
+  env:
+    container: nvcr.io/nvidia/nemo:25.01  # Docker ref (auto-squashed)
+    # OR: container: /path/to/container.sqsh  # Pre-squashed
+  wandb:
+    entity: null  # Populated from env.toml
+    project: null
+
+execution:
+  type: slurm
+  hostname: ${run.env.host}
+  account: ${run.env.account}
+  num_nodes: 1
+  gres: gpu:8
+
+  auto_export:
+    enabled: true
+    destinations:
+      - wandb
+
+deployment:
+  type: generic
+  image: ${run.env.container}
+  checkpoint_path: /path/to/your/model/checkpoint
+  command: >-
+    python deploy_script.py --checkpoint /checkpoint/
+
+evaluation:
+  tasks:
+    - name: adlr_mmlu
+    - name: hellaswag
+
+export:
+  wandb:
+    entity: ${run.wandb.entity}
+    project: ${run.wandb.project}
+```
+
+### Step 2: Run Evaluation
+
+```bash
+uv run nemotron evaluate -c my-model --run MY-CLUSTER
+```
+
+## Troubleshooting
+
+### "Missing key type" Error
+
+Ensure your config has all required Slurm fields:
+
+```yaml
+execution:
+  type: slurm  # Required
+  ntasks_per_node: 1  # Required
+  gres: gpu:8  # Required
+```
+
+### W&B Credentials Not Detected
+
+1. Verify you're logged in: `wandb login`
+2. Check env.toml has `[wandb]` section
+3. Look for `[info] Detected W&B login` message
+
+### Auto-Squash Not Working
+
+1. Verify `tunnel = "ssh"` in your env.toml profile
+2. Check `host` and `remote_job_dir` are set
+3. Ensure `nemo-run` is installed: `pip install nemo-run`
+
+### Jobs Stuck in PENDING
+
+Check queue status:
+```bash
+ssh cluster "squeue -p batch | head"
+```
+
+Common reasons:
+- `(Priority)` — Waiting for resources
+- `(Resources)` — Insufficient available nodes
+- `(QOSMaxJobsPerUserLimit)` — User job limit reached
+
+## Further Reading
+
+- [Execution through NeMo-Run](./nemo-run.md) — Execution profiles and env.toml
+- [W&B Integration](./wandb.md) — Credentials and artifact tracking
+- [NeMo Evaluator Documentation](https://github.com/NVIDIA/nemo-evaluator-launcher) — Launcher reference
diff --git a/docs/train/nano3/README.md b/docs/train/nano3/README.md
index 85632169..fefc59cb 100644
--- a/docs/train/nano3/README.md
+++ b/docs/train/nano3/README.md
@@ -55,6 +55,9 @@ $ uv run nemotron nano3 sft --run YOUR-CLUSTER
 // Stage 2: Reinforcement Learning
 $ uv run nemotron nano3 data prep rl --run YOUR-CLUSTER
 $ uv run nemotron nano3 rl --run YOUR-CLUSTER
+
+// Stage 3: Evaluation
+$ uv run nemotron nano3 eval --run YOUR-CLUSTER
 ```
 
 </div>
@@ -78,6 +81,7 @@ $ uv run nemotron nano3 rl --run YOUR-CLUSTER
 | 0 | [Pretraining](./pretrain.md) | Base model on 25T tokens with curriculum learning | [pretrain.md](./pretrain.md) |
 | 1 | [SFT](./sft.md) | Multi-domain instruction tuning with 12+ data sources | [sft.md](./sft.md) |
 | 2 | [RL](./rl.md) | GRPO alignment with multi-environment rewards | [rl.md](./rl.md) |
+| 3 | [Evaluation](./eval.md) | Benchmark testing with NeMo Evaluator | [eval.md](./eval.md) |
 
 ## Model Specifications
 
@@ -111,6 +115,12 @@ Multi-environment RLVR training across 7 reward environments using GRPO, plus Ge
 
 → [RL Guide](./rl.md)
 
+### Stage 3: Evaluation
+
+Benchmark testing on standard NLP tasks (MMLU, HellaSwag, ARC) using NeMo Evaluator, with automatic result export to W&B.
+
+→ [Evaluation Guide](./eval.md)
+
 ## Execution Options
 
 All commands support [NeMo-Run](../nemo-run.md) execution modes:
@@ -148,9 +158,15 @@ flowchart TB
         cmd2 --> model2["ModelArtifact-rl<br/>(Final Model)"]
     end
 
+    subgraph eval["Stage 3: Evaluation"]
+        model2 --> cmd3["uv run nemotron nano3 eval"]
+        cmd3 --> results["Benchmark Results<br/>(W&B)"]
+    end
+
     style pretrain fill:#e1f5fe,stroke:#2196f3
     style sft fill:#f3e5f5,stroke:#9c27b0
     style rl fill:#e8f5e9,stroke:#4caf50
+    style eval fill:#fff3e0,stroke:#ff9800
 ```
 
 → [Artifact Lineage & W&B Integration](../artifacts.md)
@@ -168,9 +184,8 @@ Native integrations with NVIDIA's NeMo ecosystem:
 | [NeMo Curator](https://github.com/NVIDIA-NeMo/Curator) | Scalable data curation—deduplication, quality filtering, PII removal | Planned |
 | [NeMo Data Designer](https://github.com/NVIDIA-NeMo/DataDesigner) | Synthetic data generation for instruction tuning and alignment | Planned |
 | [NeMo Export-Deploy](https://github.com/NVIDIA-NeMo/Export-Deploy) | Model export to TensorRT-LLM and deployment | Planned |
-| [NeMo Evaluator](https://github.com/NVIDIA-NeMo/Evaluator) | Comprehensive model evaluation and benchmarking | Planned |
 
-These integrations will enable end-to-end pipelines from data curation to model evaluation.
+These integrations will enable end-to-end pipelines from data curation to deployment.
 
 ## CLI Reference
 
@@ -191,6 +206,7 @@ Usage: nemotron nano3 [OPTIONS] COMMAND [ARGS]...
 │ pretrain   Run pretraining with Megatron-Bridge (stage0).                │
 │ sft        Run supervised fine-tuning with Megatron-Bridge (stage1).     │
 │ rl         Run reinforcement learning with NeMo-RL GRPO (stage2).        │
+│ eval       Run evaluation with NeMo-Evaluator (stage3).                  │
 ╰──────────────────────────────────────────────────────────────────────────╯
 
 // View training command help (SFT example with artifact overrides)
@@ -255,6 +271,7 @@ wandb login
 - [Stage 0: Pretraining](./pretrain.md)
 - [Stage 1: SFT](./sft.md)
 - [Stage 2: RL](./rl.md)
+- [Stage 3: Evaluation](./eval.md)
 - [Importing Models & Data](./import.md)
 - [Artifact Lineage](../artifacts.md)
 - [Execution through NeMo-Run](../nemo-run.md)
diff --git a/docs/train/nano3/eval.md b/docs/train/nano3/eval.md
new file mode 100644
index 00000000..f92c5d7d
--- /dev/null
+++ b/docs/train/nano3/eval.md
@@ -0,0 +1,267 @@
+# Stage 3: Evaluation
+
+This stage evaluates trained models using [NeMo Evaluator](https://github.com/NVIDIA/nemo-evaluator-launcher), running standard NLP benchmarks to measure model capabilities.
+
+---
+
+## Quick Start
+
+<div class="termy">
+
+```console
+// Run evaluation on the RL model (default)
+$ uv run nemotron nano3 eval --run YOUR-CLUSTER
+
+// Evaluate a specific model stage
+$ uv run nemotron nano3 eval --run YOUR-CLUSTER run.model=sft:latest
+
+// Run specific benchmarks only
+$ uv run nemotron nano3 eval --run YOUR-CLUSTER -t adlr_mmlu -t hellaswag
+
+// Preview config without executing
+$ uv run nemotron nano3 eval --dry-run
+```
+
+</div>
+
+> **Note**: The `--run YOUR-CLUSTER` flag submits jobs via [NeMo-Run](../nemo-run.md). See [Execution through NeMo-Run](../nemo-run.md) for setup.
+
+---
+
+## CLI Command
+
+```bash
+uv run nemotron nano3 eval [options] [overrides...]
+```
+
+| Option | Short | Description |
+|--------|-------|-------------|
+| `--run <profile>` | `-r` | Submit to cluster (attached—waits, streams logs) |
+| `--batch <profile>` | `-b` | Submit to cluster (detached—submits and exits) |
+| `--dry-run` | `-d` | Preview config without executing |
+| `--task <name>` | `-t` | Filter to specific task(s), can be repeated |
+| `--force-squash` | | Force re-squash of container image |
+| `key=value` | | Override config values |
+
+### Task Filtering
+
+Run specific benchmarks using the `-t` flag:
+
+```bash
+# Single task
+uv run nemotron nano3 eval --run YOUR-CLUSTER -t adlr_mmlu
+
+# Multiple tasks
+uv run nemotron nano3 eval --run YOUR-CLUSTER -t adlr_mmlu -t hellaswag -t arc_challenge
+```
+
+### Model Selection
+
+By default, evaluation runs on the RL stage output (`run.model=rl:latest`). Override to evaluate other stages:
+
+```bash
+# Evaluate SFT model
+uv run nemotron nano3 eval --run YOUR-CLUSTER run.model=sft:latest
+
+# Evaluate pretrained model
+uv run nemotron nano3 eval --run YOUR-CLUSTER run.model=pretrain:latest
+
+# Evaluate specific version
+uv run nemotron nano3 eval --run YOUR-CLUSTER run.model=rl:v2
+```
+
+---
+
+## Available Benchmarks
+
+The default configuration includes these tasks:
+
+| Task | Description |
+|------|-------------|
+| `adlr_mmlu` | Massive Multitask Language Understanding |
+| `hellaswag` | Commonsense reasoning |
+| `arc_challenge` | AI2 Reasoning Challenge |
+
+Additional tasks available in NeMo Evaluator:
+
+| Task | Description |
+|------|-------------|
+| `adlr_arc_challenge_llama_25_shot` | ARC Challenge (25-shot) |
+| `adlr_winogrande_5_shot` | Winograd Schema Challenge |
+| `openbookqa` | Open-domain question answering |
+| `truthfulqa` | Truthfulness evaluation |
+| `gsm8k` | Grade school math |
+
+See [NeMo Evaluator](https://github.com/NVIDIA/nemo-evaluator-launcher) for the full list of available tasks.
+
+---
+
+## Configuration
+
+Evaluation configs define how to deploy your model and which benchmarks to run.
+
+| File | Purpose |
+|------|---------|
+| `config/default.yaml` | Production configuration with vLLM deployment |
+
+### Key Configuration Sections
+
+```yaml
+# Model to evaluate (W&B artifact reference)
+run:
+  model: rl:latest  # Options: pretrain, sft, rl
+
+# Deployment (model serving)
+deployment:
+  type: vllm
+  tensor_parallel_size: 4
+  data_parallel_size: 1
+  extra_args: "--max-model-len 32768"
+
+# Tasks to run
+evaluation:
+  tasks:
+    - name: adlr_mmlu
+    - name: hellaswag
+    - name: arc_challenge
+
+# W&B export for results
+export:
+  wandb:
+    entity: ${run.wandb.entity}
+    project: ${run.wandb.project}
+```
+
+### Override Examples
+
+```bash
+# Different tensor parallelism
+uv run nemotron nano3 eval --run YOUR-CLUSTER deployment.tensor_parallel_size=8
+
+# Limit samples for quick testing
+uv run nemotron nano3 eval --run YOUR-CLUSTER \
+    evaluation.nemo_evaluator_config.config.params.limit_samples=10
+```
+
+---
+
+## Running with NeMo-Run
+
+The evaluator uses the same `env.toml` profiles as training stages, providing a unified experience across the pipeline.
+
+```toml
+[wandb]
+project = "nemotron"
+entity = "YOUR-TEAM"
+
+[YOUR-CLUSTER]
+executor = "slurm"
+account = "YOUR-ACCOUNT"
+partition = "batch"
+tunnel = "ssh"
+host = "cluster.example.com"
+user = "myuser"
+remote_job_dir = "/lustre/fsw/users/myuser/.nemotron"
+```
+
+See [Execution through NeMo-Run](../nemo-run.md) for complete configuration options.
+
+### W&B Integration
+
+Results are automatically exported to Weights & Biases when:
+1. You're logged in locally (`wandb login`)
+2. `[wandb]` section is configured in `env.toml`
+
+```bash
+# Verify W&B login
+wandb login
+
+# Run evaluation—results auto-export to W&B
+uv run nemotron nano3 eval --run YOUR-CLUSTER
+# [info] Detected W&B login, setting WANDB_API_KEY
+```
+
+---
+
+## Artifact Lineage
+
+Evaluation connects to the training pipeline through [W&B Artifacts](../artifacts.md):
+
+```mermaid
+%%{init: {'theme': 'base', 'themeVariables': { 'primaryBorderColor': '#333333', 'lineColor': '#333333', 'primaryTextColor': '#333333'}}}%%
+flowchart LR
+    model0["ModelArtifact-pretrain"] --> eval0["eval"]
+    model1["ModelArtifact-sft"] --> eval1["eval"]
+    model2["ModelArtifact-rl"] --> eval2["eval"]
+
+    eval0 --> results0["Benchmark Results"]
+    eval1 --> results1["Benchmark Results"]
+    eval2 --> results2["Benchmark Results"]
+
+    results0 --> wandb["W&B Dashboard"]
+    results1 --> wandb
+    results2 --> wandb
+
+    style model0 fill:#e1f5fe,stroke:#2196f3
+    style model1 fill:#f3e5f5,stroke:#9c27b0
+    style model2 fill:#e8f5e9,stroke:#4caf50
+    style wandb fill:#fff3e0,stroke:#ff9800
+```
+
+---
+
+## Monitoring Jobs
+
+### Check Status
+
+```bash
+# Using nemo-evaluator-launcher
+nemo-evaluator-launcher status INVOCATION_ID
+
+# Check Slurm queue
+ssh cluster squeue -u $USER
+```
+
+### Stream Logs
+
+```bash
+nemo-evaluator-launcher logs INVOCATION_ID
+```
+
+---
+
+## Troubleshooting
+
+### W&B Credentials Not Detected
+
+1. Verify you're logged in: `wandb login`
+2. Check env.toml has `[wandb]` section
+3. Look for `[info] Detected W&B login` message
+
+### Model Artifact Not Found
+
+Verify the artifact exists in W&B:
+```bash
+# Check available artifacts
+wandb artifact ls YOUR-ENTITY/YOUR-PROJECT
+```
+
+### Evaluation Times Out
+
+Increase the timeout in your config:
+```bash
+uv run nemotron nano3 eval --run YOUR-CLUSTER \
+    evaluation.nemo_evaluator_config.config.params.request_timeout=7200
+```
+
+---
+
+## Reference
+
+- [Evaluation Framework](../evaluator.md) — Full evaluator documentation
+- [NeMo Evaluator Documentation](https://github.com/NVIDIA/nemo-evaluator-launcher) — Launcher reference
+- [Artifact Lineage](../artifacts.md) — W&B artifact system
+- [Execution through NeMo-Run](../nemo-run.md) — Execution profiles
+- [W&B Integration](../wandb.md) — Credentials and configuration
+- **Recipe Source**: `src/nemotron/recipes/nano3/stage3_eval/` — Implementation details
+- [Back to Overview](./README.md)
diff --git a/pyproject.toml b/pyproject.toml
index 2d16415c..7cc2c625 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -49,6 +49,7 @@ wandb = ["wandb>=0.15.0"]
 s3 = ["s3fs>=2024.0.0"]
 gcs = ["gcsfs>=2024.0.0"]
 sentencepiece = ["sentencepiece>=0.2.0"]
+evaluator = ["nemo-evaluator-launcher>=0.1.0"]
 dev = [
     "pytest>=7.0.0",
     "pytest-cov>=4.0.0",
@@ -60,6 +61,7 @@ all = [
     "s3fs>=2024.0.0",
     "gcsfs>=2024.0.0",
     "sentencepiece>=0.2.0",
+    "nemo-evaluator-launcher>=0.1.0",
 ]
 
 # Note: megatron-bridge is required for training but not listed as a dependency
diff --git a/src/nemotron/cli/bin/nemotron.py b/src/nemotron/cli/bin/nemotron.py
index a595e5cc..580d7c3b 100644
--- a/src/nemotron/cli/bin/nemotron.py
+++ b/src/nemotron/cli/bin/nemotron.py
@@ -87,8 +87,23 @@ def _register_groups() -> None:
     app.add_typer(kit_app, name="kit")
 
 
-# Register groups on import
+def _register_commands() -> None:
+    """Register top-level commands with the main app."""
+    from nemotron.cli.evaluate import evaluate
+
+    # Register evaluate command with same context settings as recipe commands
+    app.command(
+        name="evaluate",
+        context_settings={
+            "allow_extra_args": True,
+            "ignore_unknown_options": True,
+        },
+    )(evaluate)
+
+
+# Register groups and commands on import
 _register_groups()
+_register_commands()
 
 
 def main() -> None:
diff --git a/src/nemotron/cli/evaluate.py b/src/nemotron/cli/evaluate.py
new file mode 100644
index 00000000..d55725d7
--- /dev/null
+++ b/src/nemotron/cli/evaluate.py
@@ -0,0 +1,64 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Top-level evaluate command.
+
+Provides a generic `nemotron evaluate` command with pre-built configs for
+common evaluation scenarios. Unlike recipe-specific commands (nano3/eval),
+this command has no default config and requires explicit config selection.
+"""
+
+from __future__ import annotations
+
+import typer
+
+from nemotron.kit.cli.evaluator import evaluator
+
+# Config directory for generic evaluator configs
+CONFIG_DIR = "src/nemotron/recipes/evaluator/config"
+
+
+@evaluator(
+    name="evaluate",
+    config_dir=CONFIG_DIR,
+    default_config="default",
+    require_explicit_config=True,
+)
+def evaluate(ctx: typer.Context) -> None:
+    """Run model evaluation with nemo-evaluator.
+
+    Generic evaluation command with pre-built configs for common models.
+    For recipe-specific evaluation with artifact resolution, use `nemotron nano3 eval`.
+
+    Available configs:
+        nemotron-3-nano-nemo-ray  NeMo Framework Ray deployment for Nemotron-3-Nano
+
+    Examples:
+        # Evaluate Nemotron-3-Nano with NeMo Ray deployment
+        nemotron evaluate -c nemotron-3-nano-nemo-ray --run MY-CLUSTER
+
+        # Override checkpoint path
+        nemotron evaluate -c nemotron-3-nano-nemo-ray --run MY-CLUSTER \\
+            deployment.checkpoint_path=/path/to/checkpoint
+
+        # Filter specific tasks
+        nemotron evaluate -c nemotron-3-nano-nemo-ray --run MY-CLUSTER -t adlr_mmlu
+
+        # Dry run (preview config)
+        nemotron evaluate -c nemotron-3-nano-nemo-ray --run MY-CLUSTER --dry-run
+
+        # Use custom config file
+        nemotron evaluate -c /path/to/custom.yaml --run MY-CLUSTER
+    """
+    ...
diff --git a/src/nemotron/cli/nano3/app.py b/src/nemotron/cli/nano3/app.py
index 9e13b79f..052c73ed 100644
--- a/src/nemotron/cli/nano3/app.py
+++ b/src/nemotron/cli/nano3/app.py
@@ -22,13 +22,13 @@
 import typer
 
 from nemotron.cli.nano3.data import data_app
-from nemotron.cli.nano3.help import RecipeCommand, make_recipe_command
+from nemotron.cli.nano3.eval import eval as eval_cmd
+from nemotron.cli.nano3.help import make_recipe_command
 from nemotron.cli.nano3.model import model_app
 from nemotron.cli.nano3.pretrain import pretrain
 from nemotron.cli.nano3.rl import rl
 from nemotron.cli.nano3.sft import sft
 
-
 # Create nano3 app
 nano3_app = typer.Typer(
     name="nano3",
@@ -91,3 +91,21 @@
         config_dir="src/nemotron/recipes/nano3/stage2_rl/config",
     ),
 )(rl)
+
+# Eval has model artifact override (evaluates trained model)
+# Note: supports_stage=False because evaluator doesn't use nemo-run staging
+nano3_app.command(
+    name="eval",
+    context_settings={
+        "allow_extra_args": True,
+        "ignore_unknown_options": True,
+    },
+    rich_help_panel="Training Stages",
+    cls=make_recipe_command(
+        artifact_overrides={
+            "model": "Model checkpoint artifact to evaluate",
+        },
+        config_dir="src/nemotron/recipes/nano3/stage3_eval/config",
+        supports_stage=False,
+    ),
+)(eval_cmd)
diff --git a/src/nemotron/cli/nano3/eval.py b/src/nemotron/cli/nano3/eval.py
new file mode 100644
index 00000000..4151ac33
--- /dev/null
+++ b/src/nemotron/cli/nano3/eval.py
@@ -0,0 +1,53 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Eval command implementation for nano3 recipe (stage3)."""
+
+from __future__ import annotations
+
+import typer
+
+from nemotron.kit.cli.evaluator import evaluator
+
+CONFIG_DIR = "src/nemotron/recipes/nano3/stage3_eval/config"
+
+
+@evaluator(
+    name="nano3/eval",
+    config_dir=CONFIG_DIR,
+    default_config="default",
+)
+def eval(ctx: typer.Context) -> None:
+    """Run evaluation with NeMo-Evaluator (stage3).
+
+    Evaluates the trained model using nemo-evaluator-launcher.
+    By default, evaluates the RL stage output (run.model=rl:latest).
+
+    Examples:
+        # Eval on cluster (loads env.toml profile)
+        nemotron nano3 eval --run MY-CLUSTER
+
+        # Override model artifact
+        nemotron nano3 eval --run MY-CLUSTER run.model=sft:v2
+
+        # Filter specific tasks
+        nemotron nano3 eval --run MY-CLUSTER -t adlr_mmlu -t hellaswag
+
+        # Dry run (show resolved config without executing)
+        nemotron nano3 eval --run MY-CLUSTER --dry-run
+
+        # Local execution
+        nemotron nano3 eval execution.type=local
+    """
+    ...
diff --git a/src/nemotron/cli/nano3/help.py b/src/nemotron/cli/nano3/help.py
index 2e9c5693..9e005bfa 100644
--- a/src/nemotron/cli/nano3/help.py
+++ b/src/nemotron/cli/nano3/help.py
@@ -84,10 +84,12 @@ class RecipeCommand(TyperCommand):
         artifact_overrides: Dict mapping artifact names to descriptions.
             Example: {"data": "Data artifact", "model": "Model checkpoint"}
         config_dir: Path to config directory (relative to repo root).
+        supports_stage: Whether this command supports --stage option.
     """
 
     artifact_overrides: ClassVar[dict[str, str]] = {}
     config_dir: ClassVar[str | None] = None
+    supports_stage: ClassVar[bool] = True
 
     def format_help(self, ctx, formatter):
         """Format help with custom recipe options section."""
@@ -115,7 +117,8 @@ def format_help(self, ctx, formatter):
         options_table.add_row("-r, --run PROFILE", "Submit to cluster (attached)")
         options_table.add_row("-b, --batch PROFILE", "Submit to cluster (detached)")
         options_table.add_row("-d, --dry-run", "Preview config without execution")
-        options_table.add_row("--stage", "Stage files for interactive debugging")
+        if self.supports_stage:
+            options_table.add_row("--stage", "Stage files for interactive debugging")
 
         console.print(
             Panel(
@@ -227,6 +230,7 @@ def format_help(self, ctx, formatter):
 def make_recipe_command(
     artifact_overrides: dict[str, str] | None = None,
     config_dir: str | None = None,
+    supports_stage: bool = True,
 ):
     """Factory function to create a RecipeCommand subclass with custom options.
 
@@ -234,6 +238,7 @@ def make_recipe_command(
         artifact_overrides: Dict mapping artifact names to descriptions.
             Example: {"data": "Data artifact", "model": "Model checkpoint"}
         config_dir: Path to config directory (relative to repo root).
+        supports_stage: Whether this command supports --stage option.
 
     Returns:
         A RecipeCommand subclass with the specified options.
@@ -244,4 +249,5 @@ class CustomRecipeCommand(RecipeCommand):
 
     CustomRecipeCommand.artifact_overrides = artifact_overrides or {}
     CustomRecipeCommand.config_dir = config_dir
+    CustomRecipeCommand.supports_stage = supports_stage
     return CustomRecipeCommand
diff --git a/src/nemotron/kit/cli/__init__.py b/src/nemotron/kit/cli/__init__.py
index e9b48156..0d63756e 100644
--- a/src/nemotron/kit/cli/__init__.py
+++ b/src/nemotron/kit/cli/__init__.py
@@ -17,11 +17,13 @@
 This module provides shared CLI infrastructure built on Typer + OmegaConf.
 """
 
+from nemotron.kit.cli.evaluator import evaluator
 from nemotron.kit.cli.globals import GlobalContext, global_callback
 from nemotron.kit.cli.recipe import recipe
 
 __all__ = [
     "GlobalContext",
+    "evaluator",
     "global_callback",
     "recipe",
 ]
diff --git a/src/nemotron/kit/cli/evaluator.py b/src/nemotron/kit/cli/evaluator.py
new file mode 100644
index 00000000..d43d1247
--- /dev/null
+++ b/src/nemotron/kit/cli/evaluator.py
@@ -0,0 +1,613 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""@evaluator decorator for evaluation commands.
+
+Reuses ConfigBuilder from recipe infrastructure for consistent config handling,
+but executes via nemo-evaluator-launcher instead of nemo-run.
+"""
+
+from __future__ import annotations
+
+import os
+import sys
+from collections.abc import Callable
+from dataclasses import dataclass
+from functools import wraps
+from pathlib import Path
+from typing import Any
+
+import typer
+from rich.console import Console
+
+from nemotron.kit.cli.config import ConfigBuilder, generate_job_dir
+from nemotron.kit.cli.display import display_job_config, display_job_submission
+from nemotron.kit.cli.env import get_wandb_config
+from nemotron.kit.cli.globals import GlobalContext, split_unknown_args
+from nemotron.kit.cli.utils import resolve_run_interpolations
+
+console = Console()
+
+
+@dataclass
+class EvaluatorMetadata:
+    """Metadata attached to an evaluator command function.
+
+    Attributes:
+        name: Evaluator identifier (e.g., "nano3/eval")
+        config_dir: Path to config directory relative to repo root
+        default_config: Default config name (default: "default")
+        require_explicit_config: If True, requires -c/--config to be provided
+    """
+
+    name: str
+    config_dir: str
+    default_config: str = "default"
+    require_explicit_config: bool = False
+
+
+def evaluator(
+    name: str,
+    config_dir: str,
+    default_config: str = "default",
+    *,
+    require_explicit_config: bool = False,
+) -> Callable:
+    """Decorator marking a function as an evaluator command.
+
+    Similar to @recipe but executes via nemo-evaluator-launcher.
+    Supports --run/--batch for cluster execution, local execution when no profile.
+
+    Args:
+        name: Evaluator identifier (e.g., "nano3/eval")
+        config_dir: Path to config directory
+                   (e.g., "src/nemotron/recipes/nano3/stage3_eval/config")
+        default_config: Default config name (stem) or path used when -c/--config
+            is not provided (default: "default").
+        require_explicit_config: If True, requires -c/--config to be provided.
+            Used for top-level `nemotron evaluate` command.
+
+    Example:
+        @evaluator(
+            name="nano3/eval",
+            config_dir="src/nemotron/recipes/nano3/stage3_eval/config",
+        )
+        def eval(ctx: typer.Context):
+            '''Run evaluation with NeMo-Evaluator (stage3).'''
+            ...
+    """
+
+    def decorator(func: Callable) -> Callable:
+        @wraps(func)
+        def wrapper(ctx: typer.Context) -> None:
+            # Get global context
+            global_ctx: GlobalContext = ctx.obj
+            if global_ctx is None:
+                global_ctx = GlobalContext()
+
+            # Split unknown args into dotlist and passthrough
+            # Also extract any global options that appear after the subcommand
+            dotlist, passthrough, global_ctx = split_unknown_args(ctx.args or [], global_ctx)
+            global_ctx.dotlist = dotlist
+            global_ctx.passthrough = passthrough
+
+            # Validate options after split_unknown_args has extracted all global options
+            if global_ctx.run and global_ctx.batch:
+                typer.echo("Error: --run and --batch cannot both be set", err=True)
+                raise typer.Exit(1)
+
+            # --stage is not supported for evaluator
+            if global_ctx.stage:
+                typer.echo("Error: --stage is not supported for evaluator commands", err=True)
+                raise typer.Exit(1)
+
+            # Check if explicit config is required
+            if require_explicit_config and not global_ctx.config:
+                typer.echo(
+                    "Error: -c/--config is required for this command.\n"
+                    "Example: nemotron evaluate -c /path/to/eval.yaml --run CLUSTER",
+                    err=True,
+                )
+                raise typer.Exit(1)
+
+            # Build configuration (reuses ConfigBuilder)
+            builder = ConfigBuilder(
+                recipe_name=name,
+                script_path="",  # Not used for evaluator
+                config_dir=config_dir,
+                default_config=default_config,
+                ctx=global_ctx,
+                argv=sys.argv,
+            )
+
+            # Load and merge config
+            builder.load_and_merge()
+
+            # Build full job config
+            builder.build_job_config()
+
+            # Auto-inject W&B env mappings if W&B export is configured
+            # This mirrors nemo-run's behavior of auto-passing WANDB_API_KEY
+            if _needs_wandb(builder.job_config):
+                _inject_wandb_env_mappings(builder.job_config)
+
+            # Auto-squash container images for Slurm execution
+            # This mirrors nemo-run's behavior of auto-squashing Docker images
+            _maybe_auto_squash_evaluator(builder.job_config, global_ctx)
+
+            # Display compiled configuration
+            # Show resolved paths for remote execution (--run/--batch)
+            for_remote = global_ctx.mode in ("run", "batch")
+            display_job_config(builder.job_config, for_remote=for_remote)
+
+            # Handle dry-run mode
+            if global_ctx.dry_run:
+                return
+
+            # Save configs (job.yaml for provenance, eval.yaml for launcher)
+            job_path, eval_path = _save_eval_configs(builder, for_remote=for_remote)
+
+            # Display job submission summary
+            display_job_submission(job_path, eval_path, {}, global_ctx.mode)
+
+            # Execute via evaluator launcher
+            _execute_evaluator(
+                job_config=builder.job_config,
+                passthrough=passthrough,
+            )
+
+        # Attach metadata to function for introspection
+        wrapper._evaluator_metadata = EvaluatorMetadata(
+            name=name,
+            config_dir=config_dir,
+            default_config=default_config,
+            require_explicit_config=require_explicit_config,
+        )
+
+        return wrapper
+
+    return decorator
+
+
+def _save_eval_configs(
+    builder: ConfigBuilder,
+    *,
+    for_remote: bool = False,
+) -> tuple[Path, Path]:
+    """Save job and eval configs to disk.
+
+    Args:
+        builder: ConfigBuilder with loaded configuration
+        for_remote: If True, rewrite paths for remote execution
+
+    Returns:
+        Tuple of (job_yaml_path, eval_yaml_path)
+    """
+    from omegaconf import OmegaConf
+
+    from nemotron.kit.cli.utils import rewrite_paths_for_remote
+
+    job_config = builder.job_config
+    job_dir = generate_job_dir(builder.recipe_name)
+
+    # Extract eval config (everything except 'run' section, with ${run.*} resolved)
+    config_dict = OmegaConf.to_container(job_config, resolve=False)
+    run_section = config_dict.pop("run", {})
+
+    # Rewrite paths for remote execution if needed
+    if for_remote:
+        repo_root = Path.cwd()
+        config_dict = rewrite_paths_for_remote(config_dict, repo_root)
+
+    # Resolve ${run.*} interpolations (${run.env.host}, ${run.wandb.entity}, etc.)
+    config_dict = resolve_run_interpolations(config_dict, run_section)
+
+    eval_config = OmegaConf.create(config_dict)
+
+    # Save configs
+    job_dir.mkdir(parents=True, exist_ok=True)
+
+    job_path = job_dir / "job.yaml"
+    eval_path = job_dir / "eval.yaml"
+
+    OmegaConf.save(job_config, job_path)
+    OmegaConf.save(eval_config, eval_path)
+
+    return job_path, eval_path
+
+
+def _execute_evaluator(
+    job_config: Any,
+    passthrough: list[str],
+) -> None:
+    """Execute evaluation via nemo-evaluator-launcher.
+
+    1. Ensure W&B env vars are set (needed for artifact resolution)
+    2. Resolve artifacts (${art:model,path})
+    3. Extract evaluator config (everything except 'run' section)
+    4. Call run_eval() with fully resolved config
+
+    Args:
+        job_config: Full job configuration
+        passthrough: Passthrough arguments (for -t/--task flags)
+    """
+    from omegaconf import OmegaConf
+
+    from nemotron.kit.resolvers import (
+        clear_artifact_cache,
+        register_resolvers_from_config,
+    )
+
+    # Ensure W&B host env vars BEFORE artifact resolution
+    # The resolver uses WANDB_ENTITY/WANDB_PROJECT from environment to locate artifacts
+    # This loads entity/project from env.toml [wandb] section if not already set
+    _ensure_wandb_host_env()
+
+    # Resolve artifacts (${art:model,path} etc.)
+    clear_artifact_cache()
+    register_resolvers_from_config(
+        job_config,
+        artifacts_key="run",
+        mode="pre_init",
+    )
+
+    # Resolve all interpolations
+    # This resolves: ${run.env.host}, ${run.wandb.entity}, ${art:model,path}, etc.
+    resolved_config = OmegaConf.to_container(job_config, resolve=True)
+
+    # Extract evaluator-specific config (everything except 'run' section)
+    # The 'run' section was only needed for interpolation, not for the launcher
+    eval_config = {k: v for k, v in resolved_config.items() if k != "run"}
+    eval_config = OmegaConf.create(eval_config)
+
+    # Parse -t/--task flags from passthrough
+    task_list = _parse_task_flags(passthrough)
+
+    # Validate that no extra passthrough args exist (only -t/--task allowed)
+    extra_args = _get_non_task_args(passthrough)
+    if extra_args:
+        typer.echo(
+            f"Error: Unknown arguments: {' '.join(extra_args)}\n"
+            "Only -t/--task flags are supported for passthrough.",
+            err=True,
+        )
+        raise typer.Exit(1)
+
+    # Import and call evaluator launcher
+    try:
+        from nemo_evaluator_launcher.api.functional import run_eval
+    except ImportError:
+        typer.echo("Error: nemo-evaluator-launcher is required for evaluation", err=True)
+        typer.echo('Install with: pip install "nemotron[evaluator]"', err=True)
+        raise typer.Exit(1)
+
+    # Inject W&B env var mappings into eval_config if needed
+    # (env vars were already set earlier for artifact resolution)
+    if _needs_wandb(eval_config):
+        _inject_wandb_env_mappings(eval_config)
+
+    # Call the launcher
+    console.print("\n[bold blue]Starting evaluation...[/bold blue]")
+    invocation_id = run_eval(eval_config, dry_run=False, tasks=task_list)
+
+    if invocation_id:
+        console.print(f"\n[green]✓[/green] Evaluation submitted: [cyan]{invocation_id}[/cyan]")
+        console.print(
+            f"[dim]Check status: nemo-evaluator-launcher status {invocation_id}[/dim]"
+        )
+        console.print(f"[dim]Stream logs: nemo-evaluator-launcher logs {invocation_id}[/dim]")
+
+
+def _parse_task_flags(passthrough: list[str]) -> list[str] | None:
+    """Parse -t/--task flags from passthrough args.
+
+    Args:
+        passthrough: List of passthrough arguments
+
+    Returns:
+        List of task names, or None if no tasks specified
+    """
+    tasks = []
+    i = 0
+    while i < len(passthrough):
+        if passthrough[i] in ("-t", "--task") and i + 1 < len(passthrough):
+            tasks.append(passthrough[i + 1])
+            i += 2
+        else:
+            i += 1
+    return tasks if tasks else None
+
+
+def _get_non_task_args(passthrough: list[str]) -> list[str]:
+    """Get passthrough args that are not -t/--task flags.
+
+    Args:
+        passthrough: List of passthrough arguments
+
+    Returns:
+        List of non-task arguments
+    """
+    extra = []
+    i = 0
+    while i < len(passthrough):
+        if passthrough[i] in ("-t", "--task") and i + 1 < len(passthrough):
+            i += 2  # Skip -t and its value
+        else:
+            extra.append(passthrough[i])
+            i += 1
+    return extra
+
+
+# =============================================================================
+# W&B Token Auto-Propagation
+# =============================================================================
+# Similar to how nemo-run automatically passes WANDB_API_KEY when logged in,
+# these helpers ensure the evaluator launcher receives the W&B credentials.
+
+
+def _needs_wandb(cfg: Any) -> bool:
+    """Check if config requires W&B credentials.
+
+    Returns True if:
+    - execution.auto_export.destinations contains "wandb", OR
+    - export.wandb section exists
+
+    Args:
+        cfg: Job configuration (OmegaConf DictConfig or dict)
+
+    Returns:
+        True if W&B credentials are needed
+    """
+    from omegaconf import OmegaConf
+
+    # Convert to dict for easier access
+    if hasattr(cfg, "_content"):
+        cfg_dict = OmegaConf.to_container(cfg, resolve=False)
+    else:
+        cfg_dict = cfg
+
+    # Check execution.auto_export.destinations
+    try:
+        destinations = cfg_dict.get("execution", {}).get("auto_export", {}).get("destinations", [])
+        if "wandb" in destinations:
+            return True
+    except (AttributeError, TypeError):
+        pass
+
+    # Check export.wandb section
+    try:
+        if cfg_dict.get("export", {}).get("wandb") is not None:
+            return True
+    except (AttributeError, TypeError):
+        pass
+
+    return False
+
+
+def _ensure_wandb_host_env() -> None:
+    """Ensure W&B environment variables are set on the host.
+
+    Auto-detects WANDB_API_KEY from local wandb login (same as nemo-run).
+    Also sets WANDB_PROJECT/WANDB_ENTITY from env.toml [wandb] section.
+
+    This is required because nemo-evaluator-launcher checks os.getenv()
+    for env_vars mappings at submission time.
+    """
+    # Auto-detect WANDB_API_KEY from wandb login
+    if "WANDB_API_KEY" not in os.environ:
+        try:
+            import wandb
+
+            api_key = wandb.api.api_key
+            if api_key:
+                os.environ["WANDB_API_KEY"] = api_key
+                sys.stderr.write("[info] Detected W&B login, setting WANDB_API_KEY\n")
+        except Exception:
+            pass  # wandb not installed or not logged in
+
+    # Load WANDB_PROJECT/WANDB_ENTITY from env.toml [wandb] section
+    wandb_config = get_wandb_config()
+    if wandb_config is not None:
+        if wandb_config.get("project") and "WANDB_PROJECT" not in os.environ:
+            os.environ["WANDB_PROJECT"] = wandb_config.project
+        if wandb_config.get("entity") and "WANDB_ENTITY" not in os.environ:
+            os.environ["WANDB_ENTITY"] = wandb_config.entity
+
+
+def _inject_wandb_env_mappings(cfg: Any) -> None:
+    """Inject W&B env var mappings into evaluator config.
+
+    The nemo-evaluator-launcher expects:
+    - evaluation.env_vars: mapping of container env var -> host env var name
+    - execution.env_vars.export: env vars for the W&B export container
+
+    This function adds the WANDB_API_KEY (and optionally PROJECT/ENTITY)
+    mappings so the launcher knows to forward these from the host environment.
+
+    Note: This only adds string mappings (e.g., "WANDB_API_KEY": "WANDB_API_KEY"),
+    not actual secrets. The launcher resolves these via os.getenv() at runtime.
+
+    Args:
+        cfg: Job configuration (OmegaConf DictConfig) - modified in place
+    """
+    from omegaconf import open_dict
+
+    # Helper to safely set nested dict value
+    def _ensure_nested(cfg_node: Any, *keys: str) -> Any:
+        """Ensure nested dict path exists, creating dicts as needed."""
+        current = cfg_node
+        for key in keys:
+            if key not in current or current[key] is None:
+                with open_dict(current):
+                    current[key] = {}
+            current = current[key]
+        return current
+
+    # Inject into evaluation.env_vars (for evaluation containers)
+    try:
+        eval_env = _ensure_nested(cfg, "evaluation", "env_vars")
+        with open_dict(eval_env):
+            if "WANDB_API_KEY" not in eval_env:
+                eval_env["WANDB_API_KEY"] = "WANDB_API_KEY"
+            if "WANDB_PROJECT" not in eval_env:
+                eval_env["WANDB_PROJECT"] = "WANDB_PROJECT"
+            if "WANDB_ENTITY" not in eval_env:
+                eval_env["WANDB_ENTITY"] = "WANDB_ENTITY"
+    except Exception:
+        pass  # Config structure doesn't support this
+
+    # Inject into execution.env_vars.export (for W&B export container)
+    try:
+        export_env = _ensure_nested(cfg, "execution", "env_vars", "export")
+        with open_dict(export_env):
+            if "WANDB_API_KEY" not in export_env:
+                export_env["WANDB_API_KEY"] = "WANDB_API_KEY"
+            if "WANDB_PROJECT" not in export_env:
+                export_env["WANDB_PROJECT"] = "WANDB_PROJECT"
+            if "WANDB_ENTITY" not in export_env:
+                export_env["WANDB_ENTITY"] = "WANDB_ENTITY"
+    except Exception:
+        pass  # Config structure doesn't support this
+
+
+# =============================================================================
+# Container Auto-Squash for Slurm
+# =============================================================================
+# Similar to how training recipes auto-squash Docker images for Slurm,
+# these helpers ensure evaluator container images are squashed before execution.
+
+
+def _collect_evaluator_images(cfg: Any) -> list[tuple[str, str]]:
+    """Collect (dotpath, image) for all container images in eval config.
+
+    Args:
+        cfg: Evaluator configuration (OmegaConf DictConfig)
+
+    Returns:
+        List of (dotpath, image_value) tuples for images that need squashing
+    """
+    from omegaconf import OmegaConf
+
+    images = []
+
+    # Deployment image
+    dep_image = OmegaConf.select(cfg, "deployment.image")
+    if dep_image and isinstance(dep_image, str):
+        images.append(("deployment.image", dep_image))
+
+    # Proxy image (if present)
+    proxy_image = OmegaConf.select(cfg, "execution.proxy.image")
+    if proxy_image and isinstance(proxy_image, str):
+        images.append(("execution.proxy.image", proxy_image))
+
+    return images
+
+
+def _maybe_auto_squash_evaluator(
+    job_config: Any,
+    global_ctx: GlobalContext,
+) -> None:
+    """Auto-squash container images for Slurm execution.
+
+    Checks if the executor is Slurm with SSH tunnel, and if so, squashes
+    any Docker images to .sqsh files on the remote cluster. Modifies
+    job_config in-place with the squashed paths.
+
+    Args:
+        job_config: Full job configuration (OmegaConf DictConfig) - modified in place
+        global_ctx: Global CLI context with mode and force_squash flag
+    """
+    from omegaconf import OmegaConf, open_dict
+
+    from nemotron.kit.cli.squash import ensure_squashed_image, is_sqsh_image
+
+    # Only for remote slurm execution
+    if global_ctx.mode not in ("run", "batch"):
+        return
+
+    # Skip on dry-run to avoid remote side effects
+    if global_ctx.dry_run:
+        return
+
+    # Get env config
+    env_config = OmegaConf.to_container(job_config.run.env, resolve=True)
+
+    # Only for Slurm executor
+    if env_config.get("executor") != "slurm":
+        return
+
+    # Need SSH tunnel support
+    if env_config.get("tunnel") != "ssh":
+        return
+
+    # Need SSH connection info
+    host = env_config.get("host")
+    user = env_config.get("user")
+    remote_job_dir = env_config.get("remote_job_dir")
+
+    if not all([host, remote_job_dir]):
+        return
+
+    # Check for nemo-run (optional dependency for SSH tunnel)
+    try:
+        import nemo_run as run
+    except ImportError:
+        console.print(
+            "[yellow]Warning:[/yellow] nemo-run not installed, skipping auto-squash. "
+            "Install with: pip install nemo-run"
+        )
+        return
+
+    # Collect images to squash
+    images = _collect_evaluator_images(job_config)
+    if not images:
+        return
+
+    # Filter out already-squashed images
+    images_to_squash = [(dp, img) for dp, img in images if not is_sqsh_image(img)]
+    if not images_to_squash:
+        return
+
+    # Create SSH tunnel
+    tunnel = run.SSHTunnel(
+        host=host,
+        user=user or "",
+        job_dir=remote_job_dir,
+    )
+
+    try:
+        tunnel.connect()
+
+        # Squash each image and update config
+        for dotpath, image in images_to_squash:
+            console.print(f"[blue]Auto-squashing:[/blue] {image}")
+            sqsh_path = ensure_squashed_image(
+                tunnel=tunnel,
+                container_image=image,
+                remote_job_dir=remote_job_dir,
+                env_config=env_config,
+                force=global_ctx.force_squash,
+            )
+
+            # Update config with squashed path
+            with open_dict(job_config):
+                OmegaConf.update(job_config, dotpath, sqsh_path, merge=False)
+
+    finally:
+        # Cleanup tunnel if it has a disconnect method
+        if hasattr(tunnel, "disconnect"):
+            try:
+                tunnel.disconnect()
+            except Exception:
+                pass
diff --git a/src/nemotron/kit/cli/recipe.py b/src/nemotron/kit/cli/recipe.py
index 48d2a7f6..bca98fe0 100644
--- a/src/nemotron/kit/cli/recipe.py
+++ b/src/nemotron/kit/cli/recipe.py
@@ -35,6 +35,7 @@
 from nemotron.kit.cli.config import ConfigBuilder
 from nemotron.kit.cli.display import display_job_config, display_job_submission
 from nemotron.kit.cli.globals import GlobalContext, split_unknown_args
+from nemotron.kit.cli.squash import ensure_squashed_image, get_sqsh_path, is_sqsh_image
 
 console = Console()
 
@@ -622,8 +623,12 @@ def _build_executor(
         if container_image and tunnel and remote_job_dir:
             # Connect tunnel to check/create squashed image
             tunnel.connect()
-            container_image = _ensure_squashed_image(
-                tunnel, container_image, remote_job_dir, env_config, force=force_squash
+            container_image = ensure_squashed_image(
+                tunnel=tunnel,
+                container_image=container_image,
+                remote_job_dir=remote_job_dir,
+                env_config=env_config,
+                force=force_squash,
             )
 
         # Select partition based on mode (--run uses run_partition, --batch uses batch_partition)
@@ -787,118 +792,6 @@ def _build_packager(
     )
 
 
-def _get_squash_path(container_image: str, remote_job_dir: str) -> str:
-    """Get the path to the squashed container image.
-
-    Creates a deterministic filename based on the container image name.
-    For example: nvcr.io/nvidian/nemo:25.11-nano-v3.rc2 -> nemo-25.11-nano-v3.rc2.sqsh
-
-    Args:
-        container_image: Docker container image (e.g., nvcr.io/nvidian/nemo:25.11-nano-v3.rc2)
-        remote_job_dir: Remote directory for squashed images
-
-    Returns:
-        Full path to squashed image file
-    """
-    # Extract image name and tag for readable filename
-    # nvcr.io/nvidian/nemo:25.11-nano-v3.rc2 -> nemo:25.11-nano-v3.rc2
-    image_name = container_image.split("/")[-1]
-    # nemo:25.11-nano-v3.rc2 -> nemo-25.11-nano-v3.rc2.sqsh
-    sqsh_name = image_name.replace(":", "-") + ".sqsh"
-
-    return f"{remote_job_dir}/{sqsh_name}"
-
-
-def _ensure_squashed_image(
-    tunnel: Any,
-    container_image: str,
-    remote_job_dir: str,
-    env_config: dict,
-    *,
-    force: bool = False,
-) -> str:
-    """Ensure the container image is squashed on the remote cluster.
-
-    Checks if a squashed version exists, and if not, creates it using enroot
-    on a compute node via salloc.
-
-    Args:
-        tunnel: SSHTunnel instance (already connected)
-        container_image: Docker container image to squash
-        remote_job_dir: Remote directory for squashed images
-        env_config: Environment config with slurm settings (account, partition, time)
-        force: If True, re-squash even if file already exists
-
-    Returns:
-        Path to the squashed image file
-    """
-    sqsh_path = _get_squash_path(container_image, remote_job_dir)
-
-    # Check if squashed image already exists (unless force is set)
-    if not force:
-        with console.status("[bold blue]Checking for squashed image..."):
-            result = tunnel.run(f"test -f {sqsh_path} && echo exists", hide=True, warn=True)
-
-        if result.ok and "exists" in result.stdout:
-            console.print(
-                f"[green]✓[/green] Using existing squashed image: [cyan]{sqsh_path}[/cyan]"
-            )
-            return sqsh_path
-
-    # Need to create the squashed image
-    if force:
-        console.print("[yellow]![/yellow] Force re-squash requested, removing existing file...")
-        tunnel.run(f"rm -f {sqsh_path}", hide=True)
-    else:
-        console.print("[yellow]![/yellow] Squashed image not found, creating...")
-    console.print(f"  [dim]Image:[/dim] {container_image}")
-    console.print(f"  [dim]Output:[/dim] {sqsh_path}")
-    console.print()
-
-    # Ensure directory exists
-    tunnel.run(f"mkdir -p {remote_job_dir}", hide=True)
-
-    # Build salloc command to run enroot import on a compute node
-    # (login nodes don't have enough memory for enroot import)
-    account = env_config.get("account")
-    partition = env_config.get("run_partition") or env_config.get("partition")
-    time_limit = env_config.get("time", "04:00:00")
-    gpus_per_node = env_config.get("gpus_per_node")
-
-    salloc_args = []
-    if account:
-        salloc_args.append(f"--account={account}")
-    if partition:
-        salloc_args.append(f"--partition={partition}")
-    salloc_args.append("--nodes=1")
-    salloc_args.append("--ntasks-per-node=1")
-    if gpus_per_node:
-        salloc_args.append(f"--gpus-per-node={gpus_per_node}")
-    salloc_args.append(f"--time={time_limit}")
-
-    enroot_cmd = f"enroot import --output {sqsh_path} docker://{container_image}"
-    cmd = f"salloc {' '.join(salloc_args)} srun {enroot_cmd}"
-
-    # Run enroot import via salloc (this can take a while)
-    console.print(
-        "[bold blue]Allocating compute node and importing container "
-        "(this may take several minutes)...[/bold blue]"
-    )
-    console.print(f"[dim]$ {cmd}[/dim]")
-    console.print()
-    result = tunnel.run(cmd, hide=False, warn=True)
-
-    if not result.ok:
-        raise RuntimeError(
-            f"Failed to squash container image.\n"
-            f"Command: {cmd}\n"
-            f"Error: {result.stderr or 'Unknown error'}"
-        )
-
-    console.print(f"[green]✓[/green] Created squashed image: [cyan]{sqsh_path}[/cyan]")
-    return sqsh_path
-
-
 def _execute_stage_only(
     script_path: str,
     train_path: Path,
@@ -1068,7 +961,10 @@ def _print_stage_commands(
     # Get squashed container path
     sqsh_path = None
     if container and remote_job_dir:
-        sqsh_path = _get_squash_path(container, remote_job_dir)
+        if is_sqsh_image(container):
+            sqsh_path = container
+        else:
+            sqsh_path = get_sqsh_path(container, remote_job_dir)
 
     # Mount to /workspace for simpler commands inside container
     container_mount_path = "/workspace"
diff --git a/src/nemotron/kit/cli/squash.py b/src/nemotron/kit/cli/squash.py
index a915a319..337fb437 100644
--- a/src/nemotron/kit/cli/squash.py
+++ b/src/nemotron/kit/cli/squash.py
@@ -21,8 +21,13 @@
 from __future__ import annotations
 
 import re
+import shlex
 from typing import Any
 
+from rich.console import Console
+
+console = Console()
+
 
 def container_to_sqsh_name(container: str) -> str:
     """Convert container image name to deterministic squash filename.
@@ -50,6 +55,32 @@ def container_to_sqsh_name(container: str) -> str:
     return f"{safe_name}.sqsh"
 
 
+def is_sqsh_image(image: str) -> bool:
+    """Check if image is already a .sqsh file.
+
+    Args:
+        image: Container image reference or path
+
+    Returns:
+        True if image is already a squash file
+    """
+    return image.endswith(".sqsh") or (image.startswith("/") and ".sqsh" in image)
+
+
+def get_sqsh_path(container: str, remote_job_dir: str) -> str:
+    """Compute deterministic .sqsh path for a container.
+
+    Args:
+        container: Docker container image (e.g., "nvcr.io/nvidian/nemo:25.11")
+        remote_job_dir: Remote directory for job files
+
+    Returns:
+        Full path to squashed image file under containers/
+    """
+    sqsh_name = container_to_sqsh_name(container)
+    return f"{remote_job_dir}/containers/{sqsh_name}"
+
+
 def check_sqsh_exists(tunnel: Any, remote_path: str) -> bool:
     """Check if a squash file exists on the remote cluster.
 
@@ -62,3 +93,99 @@ def check_sqsh_exists(tunnel: Any, remote_path: str) -> bool:
     """
     result = tunnel.run(f"test -f {remote_path} && echo exists", hide=True, warn=True)
     return result.ok and "exists" in result.stdout
+
+
+def ensure_squashed_image(
+    *,
+    tunnel: Any,
+    container_image: str,
+    remote_job_dir: str,
+    env_config: dict[str, Any],
+    force: bool = False,
+) -> str:
+    """Ensure container is squashed on remote cluster, return .sqsh path.
+
+    Checks if a squashed version exists, and if not, creates it using enroot
+    on a compute node via salloc.
+
+    Args:
+        tunnel: SSHTunnel instance (will be connected if not already)
+        container_image: Docker container image to squash
+        remote_job_dir: Remote directory for squashed images
+        env_config: Environment config with slurm settings (account, partition, time)
+        force: If True, re-squash even if file already exists
+
+    Returns:
+        Path to the squashed image file
+
+    Raises:
+        RuntimeError: If squashing fails
+    """
+    sqsh_path = get_sqsh_path(container_image, remote_job_dir)
+
+    # Ensure remote directory exists
+    tunnel.run(f"mkdir -p {shlex.quote(remote_job_dir)}/containers", hide=True)
+
+    # Check if squashed image already exists (unless force is set)
+    if not force:
+        with console.status("[bold blue]Checking for squashed image..."):
+            if check_sqsh_exists(tunnel, sqsh_path):
+                console.print(
+                    f"[green]✓[/green] Using existing squashed image: [cyan]{sqsh_path}[/cyan]"
+                )
+                return sqsh_path
+
+    # Need to create the squashed image
+    if force:
+        console.print("[yellow]![/yellow] Force re-squash requested, removing existing file...")
+        tunnel.run(f"rm -f {shlex.quote(sqsh_path)}", hide=True)
+    else:
+        console.print("[yellow]![/yellow] Squashed image not found, creating...")
+
+    console.print(f"  [dim]Image:[/dim] {container_image}")
+    console.print(f"  [dim]Output:[/dim] {sqsh_path}")
+    console.print()
+
+    # Build salloc command to run enroot import on a compute node
+    # (login nodes don't have enough memory for enroot import)
+    account = env_config.get("account")
+    partition = (
+        env_config.get("run_partition")
+        or env_config.get("batch_partition")
+        or env_config.get("partition")
+    )
+    time_limit = env_config.get("time", "04:00:00")
+    gpus_per_node = env_config.get("gpus_per_node")
+
+    salloc_args = []
+    if account:
+        salloc_args.append(f"--account={shlex.quote(account)}")
+    if partition:
+        salloc_args.append(f"--partition={shlex.quote(partition)}")
+    salloc_args.append("--nodes=1")
+    salloc_args.append("--ntasks-per-node=1")
+    if gpus_per_node:
+        salloc_args.append(f"--gpus-per-node={gpus_per_node}")
+    salloc_args.append(f"--time={time_limit}")
+
+    enroot_cmd = f"enroot import --output {shlex.quote(sqsh_path)} docker://{container_image}"
+    cmd = f"salloc {' '.join(salloc_args)} srun {enroot_cmd}"
+
+    # Run enroot import via salloc (this can take a while)
+    console.print(
+        "[bold blue]Allocating compute node and importing container "
+        "(this may take several minutes)...[/bold blue]"
+    )
+    console.print(f"[dim]$ {cmd}[/dim]")
+    console.print()
+    result = tunnel.run(cmd, hide=False, warn=True)
+
+    if not result.ok:
+        raise RuntimeError(
+            f"Failed to squash container image.\n"
+            f"Command: {cmd}\n"
+            f"Error: {result.stderr or 'Unknown error'}"
+        )
+
+    console.print(f"[green]✓[/green] Created squashed image: [cyan]{sqsh_path}[/cyan]")
+    return sqsh_path
diff --git a/src/nemotron/kit/cli/utils.py b/src/nemotron/kit/cli/utils.py
index 1b15d7ab..54be99bf 100644
--- a/src/nemotron/kit/cli/utils.py
+++ b/src/nemotron/kit/cli/utils.py
@@ -6,10 +6,36 @@
 from pathlib import Path
 from typing import Any
 
+# Pattern to match ${run.*} tokens (for embedded interpolations)
+RUN_TOKEN_RE = re.compile(r"\$\{run\.([^}]+)\}")
+
+
+def _lookup_run_path(run_data: dict, dotted_path: str) -> tuple[bool, Any]:
+    """Look up a dotted path in run_data.
+
+    Args:
+        run_data: The run section dictionary
+        dotted_path: Path like "env.remote_job_dir" or "wandb.project"
+
+    Returns:
+        Tuple of (found, value). If found is False, value is None.
+    """
+    keys = dotted_path.split(".")
+    current = run_data
+    for key in keys:
+        if not isinstance(current, dict) or key not in current:
+            return (False, None)
+        current = current[key]
+    return (True, current)
+
 
 def resolve_run_interpolations(obj: Any, run_data: dict) -> Any:
     """Recursively resolve ${run.*} interpolations in a dict/list.
 
+    Handles both:
+    - Exact matches: "${run.foo}" -> preserves type of resolved value
+    - Embedded: "${run.foo}/bar" -> string substitution via regex
+
     Only resolves ${run.X.Y} style interpolations, preserves other
     interpolations like ${art:data,path}.
 
@@ -24,18 +50,29 @@ def resolve_run_interpolations(obj: Any, run_data: dict) -> Any:
         return {k: resolve_run_interpolations(v, run_data) for k, v in obj.items()}
     elif isinstance(obj, list):
         return [resolve_run_interpolations(item, run_data) for item in obj]
-    elif isinstance(obj, str) and obj.startswith("${run.") and obj.endswith("}"):
-        # Extract the path: ${run.wandb.project} -> wandb.project
-        path = obj[6:-1]  # Remove "${run." and "}"
-        # Navigate run_data to get the value
-        parts = path.split(".")
-        value = run_data
-        for part in parts:
-            if isinstance(value, dict) and part in value:
-                value = value[part]
-            else:
-                return obj  # Can't resolve, keep original
-        return value
+    elif isinstance(obj, str):
+        # Check for exact match first (preserves type)
+        if obj.startswith("${run.") and obj.endswith("}") and obj.count("${") == 1:
+            # Extract the path: ${run.wandb.project} -> wandb.project
+            path = obj[6:-1]  # Remove "${run." and "}"
+            found, value = _lookup_run_path(run_data, path)
+            if found:
+                return value
+            return obj  # Can't resolve, keep original
+
+        # Check for embedded interpolations (string substitution)
+        if "${run." in obj:
+
+            def replace_token(match: re.Match) -> str:
+                dotted_path = match.group(1)
+                found, value = _lookup_run_path(run_data, dotted_path)
+                if found:
+                    return str(value)
+                return match.group(0)  # Keep original if not found
+
+            return RUN_TOKEN_RE.sub(replace_token, obj)
+
+        return obj
     else:
         return obj
 
diff --git a/src/nemotron/recipes/evaluator/config/nemotron-3-nano-nemo-ray.yaml b/src/nemotron/recipes/evaluator/config/nemotron-3-nano-nemo-ray.yaml
new file mode 100644
index 00000000..38d6f03c
--- /dev/null
+++ b/src/nemotron/recipes/evaluator/config/nemotron-3-nano-nemo-ray.yaml
@@ -0,0 +1,197 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Nemotron-3-Nano-30B Evaluation with NeMo Framework Ray Deployment
+#
+# This config evaluates the NVIDIA-Nemotron-3-Nano-30B-A3B-Base-BF16 model
+# using NeMo Framework's Ray-based in-framework deployment.
+#
+# Usage:
+#   nemotron evaluate -c nemotron-3-nano-nemo-ray --run MY-CLUSTER
+#
+# Override checkpoint:
+#   nemotron evaluate -c nemotron-3-nano-nemo-ray --run MY-CLUSTER \
+#       deployment.checkpoint_path=/path/to/checkpoint
+#
+# Filter tasks:
+#   nemotron evaluate -c nemotron-3-nano-nemo-ray --run MY-CLUSTER -t adlr_mmlu
+
+# =============================================================================
+# Defaults - Use slurm executor and generic deployment
+# =============================================================================
+defaults:
+  - execution: slurm/default
+  - deployment: generic
+  - _self_
+
+# =============================================================================
+# Nemotron run section (env.toml injection)
+# This section is used for interpolation and stripped before calling the launcher
+# =============================================================================
+run:
+  # Environment config - populated from env.toml profile via --run
+  env:
+    # Default container for NeMo Framework Ray deployment (squash file for Slurm)
+    container: /lustre/fsw/portfolios/coreai/users/athittenaman/nvidia+nemo+25.11.nemotron_3_nano.sqsh
+    executor: slurm
+    host: ${oc.env:HOSTNAME,localhost}
+    user: ${oc.env:USER}
+    account: null
+    partition: batch
+    remote_job_dir: ${oc.env:PWD}/.nemotron
+    time: "04:00:00"
+
+  # W&B config - populated from env.toml [wandb] section
+  wandb:
+    entity: null
+    project: null
+
+# =============================================================================
+# Execution Configuration
+# =============================================================================
+execution:
+  type: slurm
+  hostname: ${run.env.host}
+  username: ${run.env.user}
+  account: ${run.env.account}
+  output_dir: ${run.env.remote_job_dir}/evaluations
+  walltime: ${run.env.time}
+  partition: ${run.env.partition}
+
+  # Slurm resource configuration
+  num_nodes: 1
+  ntasks_per_node: 1
+  gres: gpu:8
+  subproject: nemo-evaluator-launcher
+  sbatch_comment: null
+
+  deployment:
+    n_tasks: ${execution.num_nodes}
+
+  # HAProxy for load balancing across Ray workers
+  proxy:
+    type: haproxy
+    image: gitlab-master.nvidia.com/dl/joc/competitive_evaluation/nvidia-core-evals/haproxy-container/haproxy:2025-10-03T17-19-2679aefe0800
+    config:
+      haproxy_port: 5009
+      health_check_path: /v1/health
+      health_check_status: 200
+
+  # Auto-export results after evaluation completes
+  auto_export:
+    enabled: true
+    destinations:
+      - wandb
+
+  # Environment variables for deployment and evaluation containers
+  # NOTE: HF_TOKEN must be set in your environment if using HuggingFace gated models
+  # NOTE: WANDB_API_KEY is auto-detected from local wandb login (like nemo-run)
+  env_vars:
+    deployment:
+      HF_HOME: /cache/huggingface
+      NIM_CACHE_PATH: /cache/nim
+      VLLM_CACHE_ROOT: /cache/vllm
+    evaluation:
+      HF_HOME: /cache/huggingface
+    # W&B export env vars (auto-injected by CLI if logged in locally)
+    # These map host env var names -> container env var names
+    export:
+      WANDB_API_KEY: WANDB_API_KEY
+      WANDB_PROJECT: WANDB_PROJECT
+      WANDB_ENTITY: WANDB_ENTITY
+
+  # Mounts for deployment and evaluation containers
+  mounts:
+    deployment:
+      /lustre: /lustre
+      /lustre/fsw/portfolios/coreai/users/athittenaman/Export-Deploy: /opt/Export-Deploy
+    evaluation:
+      /lustre: /lustre
+    mount_home: false
+
+# =============================================================================
+# Deployment Configuration - NeMo Framework Ray
+# =============================================================================
+deployment:
+  type: generic
+  multiple_instances: true
+  image: ${run.env.container}
+  health_check_path: /v1/health
+  port: 1235  # Port used by Ray deployment
+  served_model_name: nemo-model
+  # Hardcoded checkpoint path - override via CLI: deployment.checkpoint_path=/your/path
+  checkpoint_path: /lustre/fsw/portfolios/coreai/users/athittenaman/checkpoints/NVIDIA-Nemotron-3-Nano-30B-A3B-Base-BF16/iter_0000000
+
+  # NeMo Framework Ray deployment command
+  # Parallelism settings for Nano3 (30B MoE model): TP=2, EP=8
+  command: >-
+    bash -c 'export TRITON_CACHE_DIR=/tmp/triton_cache_$$SLURM_NODEID;
+    python /opt/Export-Deploy/scripts/deploy/nlp/deploy_ray_inframework.py
+    --megatron_checkpoint /checkpoint/
+    --num_gpus 8
+    --tensor_model_parallel_size 2
+    --expert_model_parallel_size 8
+    --port 1235
+    --num_replicas 1'
+
+  # Health check endpoints
+  endpoints:
+    chat: /v1/chat/completions/
+    completions: /v1/completions/
+    health: /v1/health
+
+# =============================================================================
+# Evaluation Configuration
+# =============================================================================
+evaluation:
+  nemo_evaluator_config:
+    config:
+      params:
+        max_retries: 5
+        parallelism: 4
+        request_timeout: 6000
+        extra:
+          tokenizer: ${deployment.checkpoint_path}/tokenizer
+          tokenizer_backend: huggingface
+    target:
+      api_endpoint:
+        adapter_config:
+          output_dir: /results
+          use_progress_tracking: false
+          use_caching: true
+          caching_dir: /results/cache
+          use_response_logging: true
+          max_logged_responses: 10
+          use_request_logging: true
+          max_logged_requests: 10
+
+  # Tasks to run (can be filtered with -t flag)
+  tasks:
+    - name: adlr_mmlu
+      nemo_evaluator_config:
+        config:
+          params:
+            top_p: 0.0
+    - name: adlr_arc_challenge_llama_25_shot
+    - name: adlr_winogrande_5_shot
+    - name: hellaswag
+    - name: openbookqa
+
+# =============================================================================
+# Export Configuration - W&B
+# =============================================================================
+export:
+  wandb:
+    entity: ${run.wandb.entity}
+    project: ${run.wandb.project}
diff --git a/src/nemotron/recipes/nano3/README.md b/src/nemotron/recipes/nano3/README.md
index dae4c0a6..871a7611 100644
--- a/src/nemotron/recipes/nano3/README.md
+++ b/src/nemotron/recipes/nano3/README.md
@@ -71,6 +71,7 @@ flowchart TB
 | [Stage 0: Pretrain](./stage0_pretrain/) | Train on large text corpus | Megatron-Bridge | Base model checkpoint |
 | [Stage 1: SFT](./stage1_sft/) | Instruction tuning | Megatron-Bridge | Instruction-following model |
 | [Stage 2: RL](./stage2_rl/) | Alignment with GRPO | NeMo-RL | Final aligned model |
+| [Stage 3: Eval](./stage3_eval/) | Model evaluation | NeMo-Evaluator | Benchmark results |
 
 ## Prerequisites
 
@@ -121,6 +122,9 @@ uv run nemotron nano3 sft --run YOUR-CLUSTER
 # Stage 2: Data prep + RL
 uv run nemotron nano3 data prep rl --run YOUR-CLUSTER
 uv run nemotron nano3 rl --run YOUR-CLUSTER
+
+# Stage 3: Evaluation
+uv run nemotron nano3 eval --run YOUR-CLUSTER
 ```
 
 ### Testing with Tiny Config
@@ -163,6 +167,24 @@ uv run nemotron nano3 sft [--run <profile>] [-c <config>] [overrides...]
 uv run nemotron nano3 rl [--run <profile>] [-c <config>] [overrides...]
 ```
 
+### Evaluation
+
+```bash
+# Evaluate the trained model (defaults to RL output: run.model=rl:latest)
+uv run nemotron nano3 eval [--run <profile>] [-c <config>] [-t <task>...] [overrides...]
+
+# Evaluate a specific model artifact
+uv run nemotron nano3 eval --run YOUR-CLUSTER run.model=sft:v2
+
+# Filter specific tasks
+uv run nemotron nano3 eval --run YOUR-CLUSTER -t adlr_mmlu -t hellaswag
+
+# Dry run (preview resolved config)
+uv run nemotron nano3 eval --run YOUR-CLUSTER --dry-run
+```
+
+> **Note**: Evaluation requires the `nemo-evaluator-launcher` package. Install with: `pip install "nemotron[evaluator]"`
+
 ### Execution Options
 
 | Option | Description |
@@ -261,6 +283,7 @@ torchrun --nproc_per_node=8 train.py --config config/tiny.yaml
 - [Stage 0: Pretraining](./stage0_pretrain/README.md) - Pretrain on large text corpus
 - [Stage 1: SFT](./stage1_sft/README.md) - Supervised fine-tuning for instruction following
 - [Stage 2: RL](./stage2_rl/README.md) - Reinforcement learning for alignment
+- [Stage 3: Eval](./stage3_eval/README.md) - Model evaluation with NeMo-Evaluator
 
 ## Further Reading
 
diff --git a/src/nemotron/recipes/nano3/stage3_eval/README.md b/src/nemotron/recipes/nano3/stage3_eval/README.md
new file mode 100644
index 00000000..cbe54bc2
--- /dev/null
+++ b/src/nemotron/recipes/nano3/stage3_eval/README.md
@@ -0,0 +1,178 @@
+# Stage 3: Evaluation
+
+Evaluate trained models using NeMo-Evaluator, supporting multiple benchmark tasks and automatic results export.
+
+## Overview
+
+The evaluation stage integrates with `nemo-evaluator-launcher` to:
+- Deploy your trained model using vLLM
+- Run standardized benchmark tasks (MMLU, HellaSwag, etc.)
+- Export results to W&B for tracking
+
+## Prerequisites
+
+Install the evaluator dependency:
+
+```bash
+pip install "nemotron[evaluator]"
+```
+
+## Quick Start
+
+```bash
+# Evaluate the RL model (default)
+uv run nemotron nano3 eval --run YOUR-CLUSTER
+
+# Evaluate a specific model
+uv run nemotron nano3 eval --run YOUR-CLUSTER run.model=sft:v2
+
+# Run specific tasks only
+uv run nemotron nano3 eval --run YOUR-CLUSTER -t adlr_mmlu -t hellaswag
+
+# Preview config without running
+uv run nemotron nano3 eval --run YOUR-CLUSTER --dry-run
+```
+
+## Configuration
+
+### Default Config
+
+The default configuration (`config/default.yaml`) includes:
+
+- **Model**: `run.model=rl:latest` (last RL checkpoint)
+- **Deployment**: vLLM with TP=4 for Nano3 (30B MoE)
+- **Tasks**: MMLU, HellaSwag, ARC-Challenge
+
+### Config Structure
+
+```yaml
+# Nemotron artifact resolution
+run:
+  model: rl:latest  # Model artifact to evaluate
+  env: {...}        # Populated from env.toml
+  wandb: {...}      # Populated from env.toml [wandb]
+
+# Evaluator launcher config
+execution:
+  type: slurm       # Execution backend (local/slurm)
+  hostname: ...     # From ${run.env.host}
+  
+deployment:
+  type: vllm
+  checkpoint_path: ${art:model,path}  # Resolved from artifact
+  tensor_parallel_size: 4
+  
+evaluation:
+  tasks:
+    - name: adlr_mmlu
+    - name: hellaswag
+
+export:
+  wandb:
+    entity: ${run.wandb.entity}
+    project: ${run.wandb.project}
+```
+
+### Task Filtering
+
+Use `-t/--task` to run specific tasks:
+
+```bash
+# Single task
+uv run nemotron nano3 eval --run CLUSTER -t adlr_mmlu
+
+# Multiple tasks
+uv run nemotron nano3 eval --run CLUSTER -t adlr_mmlu -t hellaswag -t arc_challenge
+```
+
+## env.toml Integration
+
+Evaluation uses the same `env.toml` profile as training:
+
+```toml
+[YOUR-CLUSTER]
+executor = "slurm"
+host = "login.cluster.com"
+user = "myuser"
+account = "my-account"
+partition = "batch"
+remote_job_dir = "/lustre/jobs"
+time = "04:00:00"
+
+[wandb]
+entity = "my-org"
+project = "nano3-evals"
+```
+
+The env.toml fields map to evaluator config:
+- `host` → `execution.hostname`
+- `user` → `execution.username`
+- `account` → `execution.account`
+- `partition` → `execution.partition`
+- `remote_job_dir` → `execution.output_dir` base
+- `time` → `execution.walltime`
+- `[wandb]` → `export.wandb.*`
+
+## Artifacts
+
+### Input Artifact
+
+By default, evaluates the RL stage output. Override with:
+
+```bash
+# Evaluate SFT checkpoint
+uv run nemotron nano3 eval --run CLUSTER run.model=sft:latest
+
+# Evaluate specific version
+uv run nemotron nano3 eval --run CLUSTER run.model=sft:v2
+```
+
+### Output
+
+Results are exported to W&B as specified in `export.wandb`. Check status:
+
+```bash
+nemo-evaluator-launcher status <invocation_id>
+nemo-evaluator-launcher logs <invocation_id>
+```
+
+## Local Execution
+
+For local testing without Slurm:
+
+```bash
+# Set execution type to local
+uv run nemotron nano3 eval execution.type=local
+```
+
+## Generic Evaluate Command
+
+For custom evaluation configs not tied to nano3:
+
+```bash
+# Requires explicit config path
+uv run nemotron evaluate -c /path/to/eval.yaml --run YOUR-CLUSTER
+```
+
+## Troubleshooting
+
+### Missing Evaluator Package
+
+```
+Error: nemo-evaluator-launcher is required for evaluation
+Install with: pip install "nemotron[evaluator]"
+```
+
+### Task Not Found
+
+```
+Error: Requested task(s) not found in config: ['missing_task']
+Available tasks: ['adlr_mmlu', 'hellaswag', 'arc_challenge']
+```
+
+Check available tasks in your config or use `nemo-evaluator-launcher tasks` to list all available tasks.
+
+## Further Reading
+
+- [NeMo-Evaluator Documentation](https://github.com/NVIDIA-NeMo/Evaluator)
+- [env.toml Configuration](../../../../docs/train/nemo-run.md)
diff --git a/src/nemotron/recipes/nano3/stage3_eval/config/default.yaml b/src/nemotron/recipes/nano3/stage3_eval/config/default.yaml
new file mode 100644
index 00000000..99a47937
--- /dev/null
+++ b/src/nemotron/recipes/nano3/stage3_eval/config/default.yaml
@@ -0,0 +1,120 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Stage 3: Evaluation Configuration for Nemotron Nano3
+#
+# This config integrates with nemo-evaluator-launcher. The 'run' section is
+# used for Nemotron's artifact resolution and env.toml profile injection,
+# then stripped before passing to the evaluator launcher.
+#
+# Usage:
+#   nemotron nano3 eval --run MY-CLUSTER
+#   nemotron nano3 eval --run MY-CLUSTER run.model=sft:v2
+#   nemotron nano3 eval --run MY-CLUSTER -t adlr_mmlu -t hellaswag
+#   nemotron nano3 eval --dry-run
+
+# =============================================================================
+# Defaults - Use local executor and vLLM deployment
+# For slurm, override with: execution.type=slurm
+# =============================================================================
+defaults:
+  - execution: local
+  - deployment: vllm
+  - _self_
+
+# =============================================================================
+# Nemotron run section (artifact resolution + env.toml injection)
+# This section is used for interpolation and stripped before calling the launcher
+# =============================================================================
+run:
+  # Model artifact to evaluate (default: RL stage output)
+  model: rl:latest
+
+  # Environment config - populated from env.toml profile via --run
+  # These defaults allow local execution without env.toml
+  env:
+    executor: local
+    container: nvcr.io/nvidia/nemo-evaluator:latest
+    host: ${oc.env:HOSTNAME,localhost}
+    user: ${oc.env:USER}
+    account: null
+    partition: null
+    remote_job_dir: ${oc.env:PWD}/.nemotron
+    time: "04:00:00"
+
+  # W&B config - populated from env.toml [wandb] section
+  wandb:
+    entity: null
+    project: null
+
+# =============================================================================
+# NeMo-Evaluator Launcher Configuration
+# Everything below is passed directly to nemo-evaluator-launcher
+# =============================================================================
+
+# Execution configuration
+# Maps env.toml profile fields to evaluator's execution section
+execution:
+  type: ${run.env.executor}
+  hostname: ${run.env.host}
+  username: ${run.env.user}
+  account: ${run.env.account}
+  partition: ${run.env.partition}
+  output_dir: ${run.env.remote_job_dir}/evaluations
+  walltime: ${run.env.time}
+  # Auto-export results to W&B after evaluation completes
+  auto_export:
+    enabled: true
+    destinations:
+      - wandb
+
+# Deployment configuration
+# Specifies how to serve the model for evaluation
+deployment:
+  type: vllm
+  image: ${run.env.container}
+  checkpoint_path: ${art:model,path}
+  # Parallelism settings for Nano3 (30B MoE model)
+  tensor_parallel_size: 4
+  data_parallel_size: 1
+  extra_args: "--max-model-len 32768"
+
+# Evaluation configuration
+# Defines tasks and evaluation parameters
+evaluation:
+  # Environment variables for evaluation tasks
+  env_vars:
+    HF_TOKEN: HF_TOKEN
+
+  # Global config settings that apply to all tasks
+  nemo_evaluator_config:
+    config:
+      params:
+        request_timeout: 3600
+        parallelism: 8
+        # Uncomment for quick testing:
+        # limit_samples: 10
+
+  # Tasks to run (can be filtered with -t flag)
+  tasks:
+    - name: adlr_mmlu
+    - name: hellaswag
+    - name: arc_challenge
+
+# Export configuration
+# W&B export for results logging - populated from env.toml [wandb] section
+export:
+  wandb:
+    entity: ${run.wandb.entity}
+    project: ${run.wandb.project}
diff --git a/uv.lock b/uv.lock
index edb4618d..88704e28 100644
--- a/uv.lock
+++ b/uv.lock
@@ -252,6 +252,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/a1/ee/48ca1a7c89ffec8b6a0c5d02b89c305671d5ffd8d3c94acf8b8c408575bb/anyio-4.9.0-py3-none-any.whl", hash = "sha256:9f76d541cad6e36af7beb62e978876f3b41e3e04f2c1fbf0884604c0a9c4d93c", size = 100916, upload-time = "2025-03-17T00:02:52.713Z" },
 ]
 
+[[package]]
+name = "argcomplete"
+version = "3.6.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/38/61/0b9ae6399dd4a58d8c1b1dc5a27d6f2808023d0b5dd3104bb99f45a33ff6/argcomplete-3.6.3.tar.gz", hash = "sha256:62e8ed4fd6a45864acc8235409461b72c9a28ee785a2011cc5eb78318786c89c", size = 73754, upload-time = "2025-10-20T03:33:34.741Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/74/f5/9373290775639cb67a2fce7f629a1c240dce9f12fe927bc32b2736e16dfc/argcomplete-3.6.3-py3-none-any.whl", hash = "sha256:f5007b3a600ccac5d25bbce33089211dfd49eab4a7718da3f10e3082525a92ce", size = 43846, upload-time = "2025-10-20T03:33:33.021Z" },
+]
+
 [[package]]
 name = "astroid"
 version = "3.3.11"
@@ -374,6 +383,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/1a/39/47f9197bdd44df24d67ac8893641e16f386c984a0619ef2ee4c51fbbc019/beautifulsoup4-4.14.3-py3-none-any.whl", hash = "sha256:0918bfe44902e6ad8d57732ba310582e98da931428d231a5ecb9e7c703a735bb", size = 107721, upload-time = "2025-11-30T15:08:24.087Z" },
 ]
 
+[[package]]
+name = "blinker"
+version = "1.9.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/21/28/9b3f50ce0e048515135495f198351908d99540d69bfdc8c1d15b73dc55ce/blinker-1.9.0.tar.gz", hash = "sha256:b4ce2265a7abece45e7cc896e98dbebe6cead56bcf805a3d23136d145f5445bf", size = 22460, upload-time = "2024-11-08T17:25:47.436Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/10/cb/f2ad4230dc2eb1a74edf38f1a38b9b52277f75bef262d8908e60d957e13c/blinker-1.9.0-py3-none-any.whl", hash = "sha256:ba0efaa9080b619ff2f3459d1d500c57bddea4a6b424b60a91141db6fd2f08bc", size = 8458, upload-time = "2024-11-08T17:25:46.184Z" },
+]
+
 [[package]]
 name = "botocore"
 version = "1.41.5"
@@ -921,6 +939,23 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/76/91/7216b27286936c16f5b4d0c530087e4a54eead683e6b0b73dd0c64844af6/filelock-3.20.0-py3-none-any.whl", hash = "sha256:339b4732ffda5cd79b13f4e2711a31b0365ce445d95d243bb996273d072546a2", size = 16054, upload-time = "2025-10-08T18:03:48.35Z" },
 ]
 
+[[package]]
+name = "flask"
+version = "3.1.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "blinker" },
+    { name = "click" },
+    { name = "itsdangerous" },
+    { name = "jinja2" },
+    { name = "markupsafe" },
+    { name = "werkzeug" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/dc/6d/cfe3c0fcc5e477df242b98bfe186a4c34357b4847e87ecaef04507332dab/flask-3.1.2.tar.gz", hash = "sha256:bf656c15c80190ed628ad08cdfd3aaa35beb087855e2f494910aa3774cc4fd87", size = 720160, upload-time = "2025-08-19T21:03:21.205Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ec/f9/7f9263c5695f4bd0023734af91bedb2ff8209e8de6ead162f35d8dc762fd/flask-3.1.2-py3-none-any.whl", hash = "sha256:ca1d8112ec8a6158cc29ea4858963350011b5c846a414cdb7a954aa9e967d03c", size = 103308, upload-time = "2025-08-19T21:03:19.499Z" },
+]
+
 [[package]]
 name = "frozenlist"
 version = "1.8.0"
@@ -1352,6 +1387,20 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/cb/bd/1a875e0d592d447cbc02805fd3fe0f497714d6a2583f59d14fa9ebad96eb/huggingface_hub-0.36.0-py3-none-any.whl", hash = "sha256:7bcc9ad17d5b3f07b57c78e79d527102d08313caa278a641993acddcb894548d", size = 566094, upload-time = "2025-10-23T12:11:59.557Z" },
 ]
 
+[[package]]
+name = "hydra-core"
+version = "1.3.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "antlr4-python3-runtime" },
+    { name = "omegaconf" },
+    { name = "packaging" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/6d/8e/07e42bc434a847154083b315779b0a81d567154504624e181caf2c71cd98/hydra-core-1.3.2.tar.gz", hash = "sha256:8a878ed67216997c3e9d88a8e72e7b4767e81af37afb4ea3334b269a4390a824", size = 3263494, upload-time = "2023-02-23T18:33:43.03Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c6/50/e0edd38dcd63fb26a8547f13d28f7a008bc4a3fd4eb4ff030673f22ad41a/hydra_core-1.3.2-py3-none-any.whl", hash = "sha256:fa0238a9e31df3373b35b0bfb672c34cc92718d21f81311d8996a16de1141d8b", size = 154547, upload-time = "2023-02-23T18:33:40.801Z" },
+]
+
 [[package]]
 name = "hyperframe"
 version = "6.1.0"
@@ -1422,6 +1471,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/32/4b/b99e37f88336009971405cbb7630610322ed6fbfa31e1d7ab3fbf3049a2d/invoke-2.2.1-py3-none-any.whl", hash = "sha256:2413bc441b376e5cd3f55bb5d364f973ad8bdd7bf87e53c79de3c11bf3feecc8", size = 160287, upload-time = "2025-10-11T00:36:33.703Z" },
 ]
 
+[[package]]
+name = "itsdangerous"
+version = "2.2.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/9c/cb/8ac0172223afbccb63986cc25049b154ecfb5e85932587206f42317be31d/itsdangerous-2.2.0.tar.gz", hash = "sha256:e0050c0b7da1eea53ffaf149c0cfbb5c6e2e2b69c4bef22c81fa6eb73e5f6173", size = 54410, upload-time = "2024-04-16T21:28:15.614Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/04/96/92447566d16df59b2a776c0fb82dbc4d9e07cd95062562af01e408583fc4/itsdangerous-2.2.0-py3-none-any.whl", hash = "sha256:c6242fc49e35958c8b15141343aa660db5fc54d4f13a1db01a3f5891b98700ef", size = 16234, upload-time = "2024-04-16T21:28:14.499Z" },
+]
+
 [[package]]
 name = "jinja2"
 version = "3.1.6"
@@ -2093,6 +2151,49 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/5f/df/76d0321c3797b54b60fef9ec3bd6f4cfd124b9e422182156a1dd418722cf/myst_parser-4.0.1-py3-none-any.whl", hash = "sha256:9134e88959ec3b5780aedf8a99680ea242869d012e8821db3126d427edc9c95d", size = 84579, upload-time = "2025-02-12T10:53:02.078Z" },
 ]
 
+[[package]]
+name = "nemo-evaluator"
+version = "0.1.69"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "flask" },
+    { name = "jinja2" },
+    { name = "psutil" },
+    { name = "pydantic" },
+    { name = "pydantic-core" },
+    { name = "pyyaml" },
+    { name = "requests" },
+    { name = "structlog" },
+    { name = "typing-extensions" },
+    { name = "werkzeug" },
+    { name = "yq" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/2e/aa/2d0dc08fbe404987159c292497d055639d236408ba82472c93aa34cc9be8/nemo_evaluator-0.1.69.tar.gz", hash = "sha256:8b386fb4a0882661d4863bdb96487e40cb7c89c90a04b09f0db9bf6115e29681", size = 107078, upload-time = "2026-01-22T01:35:48.067Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/5a/42/c9fe6aded43f6174e1bd72805f2621d580e0b039f754cb76ffa7800cc715/nemo_evaluator-0.1.69-py3-none-any.whl", hash = "sha256:329ac3aadf22028515aab03b0cd902c95b7d547d90e6a58620295e98c5ab7a16", size = 141078, upload-time = "2026-01-22T01:35:46.971Z" },
+]
+
+[[package]]
+name = "nemo-evaluator-launcher"
+version = "0.1.71"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "hydra-core" },
+    { name = "jinja2" },
+    { name = "leptonai" },
+    { name = "nemo-evaluator" },
+    { name = "pyyaml" },
+    { name = "requests" },
+    { name = "simple-parsing" },
+    { name = "structlog" },
+    { name = "tabulate" },
+    { name = "tomli", marker = "python_full_version < '3.11'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/1e/30/a2c5ccf527a2c0d70531ffab1c1875006cdd07823d4f8fbdc460e72f92aa/nemo_evaluator_launcher-0.1.71.tar.gz", hash = "sha256:5ccbca7b315278dc1aa87c5a5012213439884e3308c2ccd26ed10ad2a0edd007", size = 176684, upload-time = "2026-01-22T01:35:56.947Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c6/7f/27f7d7937cd599b3b4b3bad0d0e1e41bad573aae5dc1011fee85b3e9e6bc/nemo_evaluator_launcher-0.1.71-py3-none-any.whl", hash = "sha256:80a78d896f2972cfaba999eea844961d44452d5225fb45d8360bffbccc25ab3f", size = 221172, upload-time = "2026-01-22T01:35:55.674Z" },
+]
+
 [[package]]
 name = "nemo-run"
 version = "0.7.0"
@@ -2150,6 +2251,7 @@ dependencies = [
 [package.optional-dependencies]
 all = [
     { name = "gcsfs" },
+    { name = "nemo-evaluator-launcher" },
     { name = "s3fs" },
     { name = "sentencepiece" },
     { name = "wandb" },
@@ -2160,6 +2262,9 @@ dev = [
     { name = "pytest-cov" },
     { name = "ruff" },
 ]
+evaluator = [
+    { name = "nemo-evaluator-launcher" },
+]
 gcs = [
     { name = "gcsfs" },
 ]
@@ -2203,6 +2308,8 @@ requires-dist = [
     { name = "huggingface-hub", specifier = ">=0.20.0" },
     { name = "jinja2", specifier = ">=3.0.0" },
     { name = "mypy", marker = "extra == 'dev'", specifier = ">=1.0.0" },
+    { name = "nemo-evaluator-launcher", marker = "extra == 'all'", specifier = ">=0.1.0" },
+    { name = "nemo-evaluator-launcher", marker = "extra == 'evaluator'", specifier = ">=0.1.0" },
     { name = "nemo-run", specifier = ">=0.4.0" },
     { name = "numpy", specifier = ">=1.24.0" },
     { name = "omegaconf", specifier = ">=2.3.0" },
@@ -2228,7 +2335,7 @@ requires-dist = [
     { name = "wandb", marker = "extra == 'wandb'", specifier = ">=0.15.0" },
     { name = "xxhash", specifier = ">=3.4.0" },
 ]
-provides-extras = ["wandb", "s3", "gcs", "sentencepiece", "dev", "all"]
+provides-extras = ["wandb", "s3", "gcs", "sentencepiece", "evaluator", "dev", "all"]
 
 [package.metadata.requires-dev]
 dev = [{ name = "pytest", specifier = ">=9.0.2" }]
@@ -2863,6 +2970,34 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/0e/15/4f02896cc3df04fc465010a4c6a0cd89810f54617a32a70ef531ed75d61c/protobuf-6.33.2-py3-none-any.whl", hash = "sha256:7636aad9bb01768870266de5dc009de2d1b936771b38a793f73cbbf279c91c5c", size = 170501, upload-time = "2025-12-06T00:17:52.211Z" },
 ]
 
+[[package]]
+name = "psutil"
+version = "7.2.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/73/cb/09e5184fb5fc0358d110fc3ca7f6b1d033800734d34cac10f4136cfac10e/psutil-7.2.1.tar.gz", hash = "sha256:f7583aec590485b43ca601dd9cea0dcd65bd7bb21d30ef4ddbf4ea6b5ed1bdd3", size = 490253, upload-time = "2025-12-29T08:26:00.169Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/77/8e/f0c242053a368c2aa89584ecd1b054a18683f13d6e5a318fc9ec36582c94/psutil-7.2.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:ba9f33bb525b14c3ea563b2fd521a84d2fa214ec59e3e6a2858f78d0844dd60d", size = 129624, upload-time = "2025-12-29T08:26:04.255Z" },
+    { url = "https://files.pythonhosted.org/packages/26/97/a58a4968f8990617decee234258a2b4fc7cd9e35668387646c1963e69f26/psutil-7.2.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:81442dac7abfc2f4f4385ea9e12ddf5a796721c0f6133260687fec5c3780fa49", size = 130132, upload-time = "2025-12-29T08:26:06.228Z" },
+    { url = "https://files.pythonhosted.org/packages/db/6d/ed44901e830739af5f72a85fa7ec5ff1edea7f81bfbf4875e409007149bd/psutil-7.2.1-cp313-cp313t-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ea46c0d060491051d39f0d2cff4f98d5c72b288289f57a21556cc7d504db37fc", size = 180612, upload-time = "2025-12-29T08:26:08.276Z" },
+    { url = "https://files.pythonhosted.org/packages/c7/65/b628f8459bca4efbfae50d4bf3feaab803de9a160b9d5f3bd9295a33f0c2/psutil-7.2.1-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:35630d5af80d5d0d49cfc4d64c1c13838baf6717a13effb35869a5919b854cdf", size = 183201, upload-time = "2025-12-29T08:26:10.622Z" },
+    { url = "https://files.pythonhosted.org/packages/fb/23/851cadc9764edcc18f0effe7d0bf69f727d4cf2442deb4a9f78d4e4f30f2/psutil-7.2.1-cp313-cp313t-win_amd64.whl", hash = "sha256:923f8653416604e356073e6e0bccbe7c09990acef442def2f5640dd0faa9689f", size = 139081, upload-time = "2025-12-29T08:26:12.483Z" },
+    { url = "https://files.pythonhosted.org/packages/59/82/d63e8494ec5758029f31c6cb06d7d161175d8281e91d011a4a441c8a43b5/psutil-7.2.1-cp313-cp313t-win_arm64.whl", hash = "sha256:cfbe6b40ca48019a51827f20d830887b3107a74a79b01ceb8cc8de4ccb17b672", size = 134767, upload-time = "2025-12-29T08:26:14.528Z" },
+    { url = "https://files.pythonhosted.org/packages/05/c2/5fb764bd61e40e1fe756a44bd4c21827228394c17414ade348e28f83cd79/psutil-7.2.1-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:494c513ccc53225ae23eec7fe6e1482f1b8a44674241b54561f755a898650679", size = 129716, upload-time = "2025-12-29T08:26:16.017Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/d2/935039c20e06f615d9ca6ca0ab756cf8408a19d298ffaa08666bc18dc805/psutil-7.2.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:3fce5f92c22b00cdefd1645aa58ab4877a01679e901555067b1bd77039aa589f", size = 130133, upload-time = "2025-12-29T08:26:18.009Z" },
+    { url = "https://files.pythonhosted.org/packages/77/69/19f1eb0e01d24c2b3eacbc2f78d3b5add8a89bf0bb69465bc8d563cc33de/psutil-7.2.1-cp314-cp314t-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:93f3f7b0bb07711b49626e7940d6fe52aa9940ad86e8f7e74842e73189712129", size = 181518, upload-time = "2025-12-29T08:26:20.241Z" },
+    { url = "https://files.pythonhosted.org/packages/e1/6d/7e18b1b4fa13ad370787626c95887b027656ad4829c156bb6569d02f3262/psutil-7.2.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d34d2ca888208eea2b5c68186841336a7f5e0b990edec929be909353a202768a", size = 184348, upload-time = "2025-12-29T08:26:22.215Z" },
+    { url = "https://files.pythonhosted.org/packages/98/60/1672114392dd879586d60dd97896325df47d9a130ac7401318005aab28ec/psutil-7.2.1-cp314-cp314t-win_amd64.whl", hash = "sha256:2ceae842a78d1603753561132d5ad1b2f8a7979cb0c283f5b52fb4e6e14b1a79", size = 140400, upload-time = "2025-12-29T08:26:23.993Z" },
+    { url = "https://files.pythonhosted.org/packages/fb/7b/d0e9d4513c46e46897b46bcfc410d51fc65735837ea57a25170f298326e6/psutil-7.2.1-cp314-cp314t-win_arm64.whl", hash = "sha256:08a2f175e48a898c8eb8eace45ce01777f4785bc744c90aa2cc7f2fa5462a266", size = 135430, upload-time = "2025-12-29T08:26:25.999Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/cf/5180eb8c8bdf6a503c6919f1da28328bd1e6b3b1b5b9d5b01ae64f019616/psutil-7.2.1-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:b2e953fcfaedcfbc952b44744f22d16575d3aa78eb4f51ae74165b4e96e55f42", size = 128137, upload-time = "2025-12-29T08:26:27.759Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/2c/78e4a789306a92ade5000da4f5de3255202c534acdadc3aac7b5458fadef/psutil-7.2.1-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:05cc68dbb8c174828624062e73078e7e35406f4ca2d0866c272c2410d8ef06d1", size = 128947, upload-time = "2025-12-29T08:26:29.548Z" },
+    { url = "https://files.pythonhosted.org/packages/29/f8/40e01c350ad9a2b3cb4e6adbcc8a83b17ee50dd5792102b6142385937db5/psutil-7.2.1-cp36-abi3-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5e38404ca2bb30ed7267a46c02f06ff842e92da3bb8c5bfdadbd35a5722314d8", size = 154694, upload-time = "2025-12-29T08:26:32.147Z" },
+    { url = "https://files.pythonhosted.org/packages/06/e4/b751cdf839c011a9714a783f120e6a86b7494eb70044d7d81a25a5cd295f/psutil-7.2.1-cp36-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ab2b98c9fc19f13f59628d94df5cc4cc4844bc572467d113a8b517d634e362c6", size = 156136, upload-time = "2025-12-29T08:26:34.079Z" },
+    { url = "https://files.pythonhosted.org/packages/44/ad/bbf6595a8134ee1e94a4487af3f132cef7fce43aef4a93b49912a48c3af7/psutil-7.2.1-cp36-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:f78baafb38436d5a128f837fab2d92c276dfb48af01a240b861ae02b2413ada8", size = 148108, upload-time = "2025-12-29T08:26:36.225Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/15/dd6fd869753ce82ff64dcbc18356093471a5a5adf4f77ed1f805d473d859/psutil-7.2.1-cp36-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:99a4cd17a5fdd1f3d014396502daa70b5ec21bf4ffe38393e152f8e449757d67", size = 147402, upload-time = "2025-12-29T08:26:39.21Z" },
+    { url = "https://files.pythonhosted.org/packages/34/68/d9317542e3f2b180c4306e3f45d3c922d7e86d8ce39f941bb9e2e9d8599e/psutil-7.2.1-cp37-abi3-win_amd64.whl", hash = "sha256:b1b0671619343aa71c20ff9767eced0483e4fc9e1f489d50923738caf6a03c17", size = 136938, upload-time = "2025-12-29T08:26:41.036Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/73/2ce007f4198c80fcf2cb24c169884f833fe93fbc03d55d302627b094ee91/psutil-7.2.1-cp37-abi3-win_arm64.whl", hash = "sha256:0d67c1822c355aa6f7314d92018fb4268a76668a536f133599b91edd48759442", size = 133836, upload-time = "2025-12-29T08:26:43.086Z" },
+]
+
 [[package]]
 name = "pyarrow"
 version = "22.0.0"
@@ -3838,6 +3973,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", size = 9755, upload-time = "2023-10-24T04:13:38.866Z" },
 ]
 
+[[package]]
+name = "simple-parsing"
+version = "0.1.8"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "docstring-parser" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/be/67/e3e5b89f1c81ca574a157104b0ecebfc3096933cbf58f644c9cb0a56c94f/simple_parsing-0.1.8.tar.gz", hash = "sha256:19c2a9002ebd7ad281fce579f9b2a0aa0c4d67e1688cee0e8cdf6d8e98ec2c18", size = 255933, upload-time = "2026-01-20T23:29:05.258Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/70/46/eab9fe2a4a2f6665a7c79b2007121a00ba95502fef50c1537d8147b4f91c/simple_parsing-0.1.8-py3-none-any.whl", hash = "sha256:4d1ef136a28674b3ebb9760cacda4d6f01de32de0b280a869df977d182f12947", size = 113438, upload-time = "2026-01-20T23:29:04.17Z" },
+]
+
 [[package]]
 name = "six"
 version = "1.17.0"
@@ -4122,6 +4270,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/d9/52/1064f510b141bd54025f9b55105e26d1fa970b9be67ad766380a3c9b74b0/starlette-0.50.0-py3-none-any.whl", hash = "sha256:9e5391843ec9b6e472eed1365a78c8098cfceb7a74bfd4d6b1c0c0095efb3bca", size = 74033, upload-time = "2025-11-01T15:25:25.461Z" },
 ]
 
+[[package]]
+name = "structlog"
+version = "25.5.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "typing-extensions", marker = "python_full_version < '3.11'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/ef/52/9ba0f43b686e7f3ddfeaa78ac3af750292662284b3661e91ad5494f21dbc/structlog-25.5.0.tar.gz", hash = "sha256:098522a3bebed9153d4570c6d0288abf80a031dfdb2048d59a49e9dc2190fc98", size = 1460830, upload-time = "2025-10-27T08:28:23.028Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a8/45/a132b9074aa18e799b891b91ad72133c98d8042c70f6240e4c5f9dabee2f/structlog-25.5.0-py3-none-any.whl", hash = "sha256:a8453e9b9e636ec59bd9e79bbd4a72f025981b3ba0f5837aebf48f02f37a7f9f", size = 72510, upload-time = "2025-10-27T08:28:21.535Z" },
+]
+
 [[package]]
 name = "tabulate"
 version = "0.9.0"
@@ -4584,6 +4744,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/fa/a8/5b41e0da817d64113292ab1f8247140aac61cbf6cfd085d6a0fa77f4984f/websockets-15.0.1-py3-none-any.whl", hash = "sha256:f7a866fbc1e97b5c617ee4116daaa09b722101d4a3c170c787450ba409f9736f", size = 169743, upload-time = "2025-03-05T20:03:39.41Z" },
 ]
 
+[[package]]
+name = "werkzeug"
+version = "3.1.5"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "markupsafe" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/5a/70/1469ef1d3542ae7c2c7b72bd5e3a4e6ee69d7978fa8a3af05a38eca5becf/werkzeug-3.1.5.tar.gz", hash = "sha256:6a548b0e88955dd07ccb25539d7d0cc97417ee9e179677d22c7041c8f078ce67", size = 864754, upload-time = "2026-01-08T17:49:23.247Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ad/e4/8d97cca767bcc1be76d16fb76951608305561c6e056811587f36cb1316a8/werkzeug-3.1.5-py3-none-any.whl", hash = "sha256:5111e36e91086ece91f93268bb39b4a35c1e6f1feac762c9c822ded0a4e322dc", size = 225025, upload-time = "2026-01-08T17:49:21.859Z" },
+]
+
 [[package]]
 name = "win32-setctime"
 version = "1.2.0"
@@ -4662,6 +4834,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/1f/f6/a933bd70f98e9cf3e08167fc5cd7aaaca49147e48411c0bd5ae701bb2194/wrapt-1.17.3-py3-none-any.whl", hash = "sha256:7171ae35d2c33d326ac19dd8facb1e82e5fd04ef8c6c0e394d7af55a55051c22", size = 23591, upload-time = "2025-08-12T05:53:20.674Z" },
 ]
 
+[[package]]
+name = "xmltodict"
+version = "1.0.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/6a/aa/917ceeed4dbb80d2f04dbd0c784b7ee7bba8ae5a54837ef0e5e062cd3cfb/xmltodict-1.0.2.tar.gz", hash = "sha256:54306780b7c2175a3967cad1db92f218207e5bc1aba697d887807c0fb68b7649", size = 25725, upload-time = "2025-09-17T21:59:26.459Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c0/20/69a0e6058bc5ea74892d089d64dfc3a62ba78917ec5e2cfa70f7c92ba3a5/xmltodict-1.0.2-py3-none-any.whl", hash = "sha256:62d0fddb0dcbc9f642745d8bbf4d81fd17d6dfaec5a15b5c1876300aad92af0d", size = 13893, upload-time = "2025-09-17T21:59:24.859Z" },
+]
+
 [[package]]
 name = "xxhash"
 version = "3.6.0"
@@ -4906,6 +5087,21 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/73/ae/b48f95715333080afb75a4504487cbe142cae1268afc482d06692d605ae6/yarl-1.22.0-py3-none-any.whl", hash = "sha256:1380560bdba02b6b6c90de54133c81c9f2a453dee9912fe58c1dcced1edb7cff", size = 46814, upload-time = "2025-10-06T14:12:53.872Z" },
 ]
 
+[[package]]
+name = "yq"
+version = "3.4.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "argcomplete" },
+    { name = "pyyaml" },
+    { name = "tomlkit" },
+    { name = "xmltodict" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/38/6a/eb9721ed0929d0f55d167c2222d288b529723afbef0a07ed7aa6cca72380/yq-3.4.3.tar.gz", hash = "sha256:ba586a1a6f30cf705b2f92206712df2281cd320280210e7b7b80adcb8f256e3b", size = 33214, upload-time = "2024-04-27T15:39:43.29Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f2/ba/d1b21f3e57469030bd6536b91bb28fedd2511d4e68b5a575f2bdb3a3dbb6/yq-3.4.3-py3-none-any.whl", hash = "sha256:547e34bc3caacce83665fd3429bf7c85f8e8b6b9aaee3f953db1ad716ff3434d", size = 18812, upload-time = "2024-04-27T15:39:41.652Z" },
+]
+
 [[package]]
 name = "zipp"
 version = "3.23.0"