THUDM
diff --git a/‎.github/workflows/conda-ci.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/conda-ci.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/generate_github_workflows.py‎
Lines changed: 37 additions & 0 deletions b/‎.github/workflows/generate_github_workflows.py‎
Lines changed: 37 additions & 0 deletions
diff --git a/‎.github/workflows/pr-test.yml‎
Lines changed: 99 additions & 10 deletions b/‎.github/workflows/pr-test.yml‎
Lines changed: 99 additions & 10 deletions
diff --git a/‎.github/workflows/pr-test.yml.j2‎
Lines changed: 90 additions & 0 deletions b/‎.github/workflows/pr-test.yml.j2‎
Lines changed: 90 additions & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 2 additions & 1 deletion b/‎.gitignore‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 6 additions & 0 deletions b/‎.pre-commit-config.yaml‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 10 additions & 2 deletions b/‎README.md‎
Lines changed: 10 additions & 2 deletions
diff --git a/‎README_zh.md‎
Lines changed: 1 addition & 1 deletion b/‎README_zh.md‎
Lines changed: 1 addition & 1 deletion
@@ -77,7 +77,7 @@ jobs:
           micromamba activate slime
           export CUDA_HOME="$CONDA_PREFIX"
 
-          bash tests/test_qwen3-30B-A3B.sh
+          SLIME_TEST_USE_DEEPEP=0 SLIME_TEST_USE_FP8_ROLLOUT=0 python tests/test_qwen3_30B_A3B.py
         shell: bash
 
       - name: Cleanup
 
@@ -0,0 +1,37 @@
+from pathlib import Path
+import jinja2
+
+
+def main():
+    """
+    Generates GitHub workflow YAML files from Jinja2 templates.
+    """
+    workflows_dir = Path(__file__).parent
+    print(f"Scan dir: {workflows_dir}")
+    env = jinja2.Environment(
+        loader=jinja2.FileSystemLoader(str(workflows_dir)),
+        block_start_string="<%",
+        block_end_string="%>",
+        variable_start_string="<<",
+        variable_end_string=">>",
+    )
+
+    for template_path in workflows_dir.glob("*.yml.j2"):
+        template = env.get_template(template_path.name)
+        content = template.render()
+
+        yaml_path = template_path.with_suffix("")
+        with open(yaml_path, "w") as f:
+            f.write(
+                "#" * 80
+                + "\n# This file is auto-generated from the .j2 file via generate_github_workflows.py. Do not edit manually.\n"
+                + "#" * 80
+                + "\n"
+            )
+            f.write(content)
+
+        print(f"Generated {yaml_path} from {template_path}")
+
+
+if __name__ == "__main__":
+    main()
@@ -1,3 +1,7 @@
+################################################################################
+# This file is auto-generated from the .j2 file via generate_github_workflows.py. Do not edit manually.
+################################################################################
+
 name: PR Test
 
 on:
@@ -7,15 +11,102 @@ on:
   pull_request:
     branches: [main]
     types: [synchronize, labeled]
+  workflow_dispatch:
+    inputs:
+      infinite_run:
+        description: 'Run training infinitely'
+        required: false
+        type: boolean
+        default: false
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
   cancel-in-progress: true
 
 jobs:
-  e2e-test:
-    # TODO may use run-ci label etc
-    if: github.event.pull_request.draft == false
+
+  e2e-test-short:
+    if: (github.event_name == 'workflow_dispatch') || (github.event.pull_request && contains(github.event.pull_request.labels.*.name, 'run-ci-short'))
+    runs-on: self-hosted
+    container:
+      image: slimerl/slime:latest
+      options: >
+        --gpus all
+        --ipc=host
+        --shm-size=16g
+        --ulimit memlock=-1
+        --ulimit stack=67108864
+        --memory=0
+        --memory-swap=0
+        -v /mnt/nvme0n1/slime_ci:/data/slime_ci
+        -v /mnt/nvme0n1/slime_ci/models:/root/models
+        -v /mnt/nvme0n1/slime_ci/datasets:/root/datasets
+    strategy:
+      fail-fast: false
+      matrix:
+        info: [{"num_gpus": 8, "test_file": "test_quick_start_glm4_9B.py"}, {"num_gpus": 8, "test_file": "test_qwen3_30B_A3B.py"}, {"num_gpus": 8, "test_file": "test_qwen3_4B_ppo.py"}, {"num_gpus": 8, "test_file": "test_moonlight_16B_A3B.py"}, {"num_gpus": 2, "test_file": "test_qwen3_4B_fsdp_true_on_policy.py"}]
+    defaults:
+      run:
+        working-directory: ${{ github.workspace }}
+    env:
+      GITHUB_COMMIT_NAME: ${{ github.sha }}_${{ github.event.pull_request.number || 'non-pr' }}
+      WANDB_API_KEY: ${{ secrets.WANDB_API_KEY }}
+      SLIME_TEST_ENABLE_INFINITE_RUN: ${{ (github.event_name == 'workflow_dispatch' && github.event.inputs.infinite_run) || 'false' }}
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Install
+        shell: bash
+        run: cd $GITHUB_WORKSPACE && pip install -e .
+
+      - name: Execute
+        shell: bash
+        run: python tests/ci/gpu_lock_exec.py --count ${{ matrix.info.num_gpus }} -- python tests/${{ matrix.info.test_file }}
+
+  e2e-test-long:
+    if: (github.event_name == 'workflow_dispatch') || (github.event.pull_request && contains(github.event.pull_request.labels.*.name, 'run-ci-long'))
+    runs-on: self-hosted
+    container:
+      image: slimerl/slime:latest
+      options: >
+        --gpus all
+        --ipc=host
+        --shm-size=16g
+        --ulimit memlock=-1
+        --ulimit stack=67108864
+        --memory=0
+        --memory-swap=0
+        -v /mnt/nvme0n1/slime_ci:/data/slime_ci
+        -v /mnt/nvme0n1/slime_ci/models:/root/models
+        -v /mnt/nvme0n1/slime_ci/datasets:/root/datasets
+    strategy:
+      fail-fast: false
+      matrix:
+        info: [{"num_gpus": 2, "test_file": "test_qwen2.5_0.5B_gsm8k.py"}, {"num_gpus": 2, "test_file": "test_qwen2.5_0.5B_gsm8k_async.py"}, {"num_gpus": 2, "test_file": "test_qwen3_0.6B_fsdp_colocated_2xGPU.py"}, {"num_gpus": 2, "test_file": "test_qwen3_0.6B_fsdp_distributed.py"}]
+    defaults:
+      run:
+        working-directory: ${{ github.workspace }}
+    env:
+      GITHUB_COMMIT_NAME: ${{ github.sha }}_${{ github.event.pull_request.number || 'non-pr' }}
+      WANDB_API_KEY: ${{ secrets.WANDB_API_KEY }}
+      SLIME_TEST_ENABLE_INFINITE_RUN: ${{ (github.event_name == 'workflow_dispatch' && github.event.inputs.infinite_run) || 'false' }}
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Install
+        shell: bash
+        run: cd $GITHUB_WORKSPACE && pip install -e .
+
+      - name: Execute
+        shell: bash
+        run: python tests/ci/gpu_lock_exec.py --count ${{ matrix.info.num_gpus }} -- python tests/${{ matrix.info.test_file }}
+
+  e2e-test-precision:
+    if: (github.event_name == 'workflow_dispatch') || (github.event.pull_request && contains(github.event.pull_request.labels.*.name, 'run-ci-precision'))
     runs-on: self-hosted
     container:
       image: slimerl/slime:latest
@@ -28,21 +119,19 @@ jobs:
         --memory=0
         --memory-swap=0
         -v /mnt/nvme0n1/slime_ci:/data/slime_ci
-        -v /mnt/nvme0n1/models:/root/models
-        -v /mnt/nvme0n1/datasets:/root/datasets
+        -v /mnt/nvme0n1/slime_ci/models:/root/models
+        -v /mnt/nvme0n1/slime_ci/datasets:/root/datasets
     strategy:
       fail-fast: false
       matrix:
-        info:
-          - {test_file: test_quick_start_glm4_9B.py}
-          - {test_file: test_qwen3_30B_A3B.py}
-          # TODO use deterministic kernel
+        info: [{"num_gpus": 8, "test_file": "test_qwen3_0.6B_parallel_check.py"}]
     defaults:
       run:
         working-directory: ${{ github.workspace }}
     env:
       GITHUB_COMMIT_NAME: ${{ github.sha }}_${{ github.event.pull_request.number || 'non-pr' }}
       WANDB_API_KEY: ${{ secrets.WANDB_API_KEY }}
+      SLIME_TEST_ENABLE_INFINITE_RUN: ${{ (github.event_name == 'workflow_dispatch' && github.event.inputs.infinite_run) || 'false' }}
 
     steps:
       - name: Checkout repository
@@ -54,4 +143,4 @@ jobs:
 
       - name: Execute
         shell: bash
-        run: python tests/${{ matrix.info.test_file }}
+        run: python tests/ci/gpu_lock_exec.py --count ${{ matrix.info.num_gpus }} -- python tests/${{ matrix.info.test_file }}
@@ -0,0 +1,90 @@
+<% set jobs = {
+    'e2e-test-short': {
+      'label': 'run-ci-short',
+      'tests': [
+        {'test_file': 'test_quick_start_glm4_9B.py', 'num_gpus': 8},
+        {'test_file': 'test_qwen3_30B_A3B.py', 'num_gpus': 8},
+        {'test_file': 'test_qwen3_4B_ppo.py', 'num_gpus': 8},
+        {'test_file': 'test_moonlight_16B_A3B.py', 'num_gpus': 8},
+        {'test_file': 'test_qwen3_4B_fsdp_true_on_policy.py', 'num_gpus': 2},
+      ],
+    },
+    'e2e-test-long': {
+      'label': 'run-ci-long',
+      'tests': [
+        {'test_file': 'test_qwen2.5_0.5B_gsm8k.py', 'num_gpus': 2},
+        {'test_file': 'test_qwen2.5_0.5B_gsm8k_async.py', 'num_gpus': 2},
+        {'test_file': 'test_qwen3_0.6B_fsdp_colocated_2xGPU.py', 'num_gpus': 2},
+        {'test_file': 'test_qwen3_0.6B_fsdp_distributed.py', 'num_gpus': 2},
+      ],
+    },
+    'e2e-test-precision': {
+      'label': 'run-ci-precision',
+      'tests': [
+        {'test_file': 'test_qwen3_0.6B_parallel_check.py', 'num_gpus': 8},
+      ],
+    },
+} %>
+name: PR Test
+
+on:
+  # Do not run CI on push to reduce CI time
+  # push:
+  #   branches: [main]
+  pull_request:
+    branches: [main]
+    types: [synchronize, labeled]
+  workflow_dispatch:
+    inputs:
+      infinite_run:
+        description: 'Run training infinitely'
+        required: false
+        type: boolean
+        default: false
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+jobs:
+<% for job_name, config in jobs.items() %>
+  << job_name >>:
+    if: (github.event_name == 'workflow_dispatch') || (github.event.pull_request && contains(github.event.pull_request.labels.*.name, '<< config.label >>'))
+    runs-on: self-hosted
+    container:
+      image: slimerl/slime:latest
+      options: >
+        --gpus all
+        --ipc=host
+        --shm-size=16g
+        --ulimit memlock=-1
+        --ulimit stack=67108864
+        --memory=0
+        --memory-swap=0
+        -v /mnt/nvme0n1/slime_ci:/data/slime_ci
+        -v /mnt/nvme0n1/slime_ci/models:/root/models
+        -v /mnt/nvme0n1/slime_ci/datasets:/root/datasets
+    strategy:
+      fail-fast: false
+      matrix:
+        info: << config.tests | tojson >>
+    defaults:
+      run:
+        working-directory: ${{ github.workspace }}
+    env:
+      GITHUB_COMMIT_NAME: ${{ github.sha }}_${{ github.event.pull_request.number || 'non-pr' }}
+      WANDB_API_KEY: ${{ secrets.WANDB_API_KEY }}
+      SLIME_TEST_ENABLE_INFINITE_RUN: ${{ (github.event_name == 'workflow_dispatch' && github.event.inputs.infinite_run) || 'false' }}
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Install
+        shell: bash
+        run: cd $GITHUB_WORKSPACE && pip install -e .
+
+      - name: Execute
+        shell: bash
+        run: python tests/ci/gpu_lock_exec.py --count ${{ matrix.info.num_gpus }} -- python tests/${{ matrix.info.test_file }}
+<% endfor %>
@@ -190,4 +190,5 @@ local/
 
 glm/
 _examples_synced/
-.env
+.env
+.DS_Store
@@ -17,6 +17,12 @@ repos:
         args: ['--maxkb=1000']
       - id: requirements-txt-fixer
 
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.14.7
+    hooks:
+      - id: ruff-check
+        args: [ --fix ]
+
   - repo: https://github.com/PyCQA/autoflake
     rev: v2.0.2
     hooks:
 
@@ -51,6 +51,14 @@ We also provide examples for some use cases not covered in the quick start guide
 
 slime has powered several novel research projects and production systems. Here are some notable examples:
 
+### ⚛️ P1: Mastering Physics Olympiads with Reinforcement Learning
+
+[**P1**](https://prime-rl.github.io/P1/) is a family of open-source physics reasoning models trained entirely through reinforcement learning. P1 leverages slime as the RL post training framework, and introduces a multi-stage RL training algorithm that progressively enhances reasoning ability through adaptive learnability adjustment and stabilization mechanisms. Enpowered by this training paradigm, P1 delivers breakthrough performance in open-source physics reasoning.
+
+### 📈RLVE: Scaling LM RL with Adaptive Verifiable Environments
+
+[**RLVE**](https://github.com/Zhiyuan-Zeng/RLVE) introduces an approach using verifiable environments that procedurally generate problems and provide algorithmically verifiable rewards, to scale up RL for language models (LMs). With joint training across 400 verifiable environments, RLVE enables each environment to dynamically adapt its problem difficulty distribution to the policy model's capabilities as training progresses.
+
 ### ⚡ TritonForge: Agentic RL Training Framework for Kernel Generation
 
 [**TritonForge**](https://github.com/RLsys-Foundation/TritonForge) leverages slime's SFT & RL capabilities to train LLMs that automatically generate optimized GPU kernels. By using a two-stage training approach—supervised fine-tuning followed by reinforcement learning with multi-turn compilation feedback—TritonForge achieves remarkable results in converting PyTorch operations into high-performance Triton kernels.
@@ -65,7 +73,7 @@ These projects showcase slime's versatility—from training code-generation mode
 
 Arguments in slime are divided into three categories:
 
-1.  **Megatron arguments**: slime reads all arguments set in Megatron via `PYTHONPATH`. You can configure Megatron by passing arguments like `--tensor-model-parallel-size 2`.
+1.  **Megatron arguments**: slime reads all arguments in Megatron. You can configure Megatron by passing arguments like `--tensor-model-parallel-size 2`.
 2.  **SGLang arguments**: All arguments for the installed SGLang are supported. These arguments must be prefixed with `--sglang-`. For example, `--mem-fraction-static` should be passed as `--sglang-mem-fraction-static`.
 3.  **slime-specific arguments**: Please refer to: [slime/utils/arguments.py](slime/utils/arguments.py)
 
@@ -93,7 +101,7 @@ pre-commit run --all-files --show-diff-on-failure --color=always
 - Special thanks to the following projects & communities: SGLang, Megatron‑LM, mbridge, OpenRLHF, veRL, Pai-Megatron-Patch and others.
 - To quote slime, please use:
 
-```bibtext
+```bibtex
 @misc{slime_github,
   author       = {Zilin Zhu and Chengxing Xie and Xin Lv and slime Contributors},
   title        = {slime: An LLM post-training framework for RL Scaling},
 
@@ -82,7 +82,7 @@ slime 是 [GLM-4.5](https://z.ai/blog/glm-4.5) 与 [GLM-4.6](https://z.ai/blog/g
 - 特别感谢以下项目 & 社区：SGLang、Megatron‑LM、mbridge、OpenRLHF、veRL、Pai-Megatron-Patch 等。
 
 - 引用 slime 请使用：
-```bibtext
+```bibtex
 @misc{slime_github,
   author       = {Zilin Zhu and Chengxing Xie and Xin Lv and slime Contributors},
   title        = {slime: An LLM post-training framework for RL Scaling},