THUDM
diff --git a/‎.github/workflows/conda-ci.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/conda-ci.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/generate_github_workflows.py‎
Lines changed: 37 additions & 0 deletions b/‎.github/workflows/generate_github_workflows.py‎
Lines changed: 37 additions & 0 deletions
diff --git a/‎.github/workflows/pr-test.yml‎
Lines changed: 59 additions & 10 deletions b/‎.github/workflows/pr-test.yml‎
Lines changed: 59 additions & 10 deletions
diff --git a/‎.github/workflows/pr-test.yml.j2‎
Lines changed: 84 additions & 0 deletions b/‎.github/workflows/pr-test.yml.j2‎
Lines changed: 84 additions & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 2 additions & 1 deletion b/‎.gitignore‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎README.md‎
Lines changed: 10 additions & 2 deletions b/‎README.md‎
Lines changed: 10 additions & 2 deletions
diff --git a/‎README_zh.md‎
Lines changed: 1 addition & 1 deletion b/‎README_zh.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎build_conda.sh‎
Lines changed: 36 additions & 30 deletions b/‎build_conda.sh‎
Lines changed: 36 additions & 30 deletions
@@ -77,7 +77,7 @@ jobs:
           micromamba activate slime
           export CUDA_HOME="$CONDA_PREFIX"
 
-          bash tests/test_qwen3-30B-A3B.sh
+          SLIME_TEST_USE_DEEPEP=0 SLIME_TEST_USE_FP8_ROLLOUT=0 python tests/test_qwen3_30B_A3B.py
         shell: bash
 
       - name: Cleanup
 
@@ -0,0 +1,37 @@
+from pathlib import Path
+import jinja2
+
+
+def main():
+    """
+    Generates GitHub workflow YAML files from Jinja2 templates.
+    """
+    workflows_dir = Path(__file__).parent
+    print(f"Scan dir: {workflows_dir}")
+    env = jinja2.Environment(
+        loader=jinja2.FileSystemLoader(str(workflows_dir)),
+        block_start_string="<%",
+        block_end_string="%>",
+        variable_start_string="<<",
+        variable_end_string=">>",
+    )
+
+    for template_path in workflows_dir.glob("*.yml.j2"):
+        template = env.get_template(template_path.name)
+        content = template.render()
+
+        yaml_path = template_path.with_suffix("")
+        with open(yaml_path, "w") as f:
+            f.write(
+                "#" * 80
+                + "\n# This file is auto-generated from the .j2 file via generate_github_workflows.py. Do not edit manually.\n"
+                + "#" * 80
+                + "\n"
+            )
+            f.write(content)
+
+        print(f"Generated {yaml_path} from {template_path}")
+
+
+if __name__ == "__main__":
+    main()
@@ -1,3 +1,7 @@
+################################################################################
+# This file is auto-generated from the .j2 file via generate_github_workflows.py. Do not edit manually.
+################################################################################
+
 name: PR Test
 
 on:
@@ -7,15 +11,62 @@ on:
   pull_request:
     branches: [main]
     types: [synchronize, labeled]
+  workflow_dispatch:
+    inputs:
+      infinite_run:
+        description: 'Run training infinitely'
+        required: false
+        type: boolean
+        default: false
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
   cancel-in-progress: true
 
 jobs:
-  e2e-test:
-    # TODO may use run-ci label etc
-    if: github.event.pull_request.draft == false
+
+  e2e-test-short:
+    if: (github.event_name == 'workflow_dispatch') || (github.event.pull_request && contains(github.event.pull_request.labels.*.name, 'run-ci-short'))
+    runs-on: self-hosted
+    container:
+      image: slimerl/slime:latest
+      options: >
+        --gpus all
+        --ipc=host
+        --shm-size=16g
+        --ulimit memlock=-1
+        --ulimit stack=67108864
+        --memory=0
+        --memory-swap=0
+        -v /mnt/nvme0n1/slime_ci:/data/slime_ci
+        -v /mnt/nvme0n1/slime_ci/models:/root/models
+        -v /mnt/nvme0n1/slime_ci/datasets:/root/datasets
+    strategy:
+      fail-fast: false
+      matrix:
+        info: [{"num_gpus": 8, "test_file": "test_quick_start_glm4_9B.py"}, {"num_gpus": 8, "test_file": "test_qwen3_30B_A3B.py"}, {"num_gpus": 8, "test_file": "test_qwen3_4B_ppo.py"}, {"num_gpus": 8, "test_file": "test_moonlight_16B_A3B.py"}, {"num_gpus": 2, "test_file": "test_qwen3_4B_fsdp_true_on_policy.py"}]
+    defaults:
+      run:
+        working-directory: ${{ github.workspace }}
+    env:
+      GITHUB_COMMIT_NAME: ${{ github.sha }}_${{ github.event.pull_request.number || 'non-pr' }}
+      WANDB_API_KEY: ${{ secrets.WANDB_API_KEY }}
+      SLIME_TEST_ENABLE_INFINITE_RUN: ${{ (github.event_name == 'workflow_dispatch' && github.event.inputs.infinite_run) || 'false' }}
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Install
+        shell: bash
+        run: cd $GITHUB_WORKSPACE && pip install -e .
+
+      - name: Execute
+        shell: bash
+        run: python tests/ci/gpu_lock_exec.py --count ${{ matrix.info.num_gpus }} -- python tests/${{ matrix.info.test_file }}
+
+  e2e-test-long:
+    if: (github.event_name == 'workflow_dispatch') || (github.event.pull_request && contains(github.event.pull_request.labels.*.name, 'run-ci-long'))
     runs-on: self-hosted
     container:
       image: slimerl/slime:latest
@@ -28,21 +79,19 @@ jobs:
         --memory=0
         --memory-swap=0
         -v /mnt/nvme0n1/slime_ci:/data/slime_ci
-        -v /mnt/nvme0n1/models:/root/models
-        -v /mnt/nvme0n1/datasets:/root/datasets
+        -v /mnt/nvme0n1/slime_ci/models:/root/models
+        -v /mnt/nvme0n1/slime_ci/datasets:/root/datasets
     strategy:
       fail-fast: false
       matrix:
-        info:
-          - {test_file: test_quick_start_glm4_9B.py}
-          - {test_file: test_qwen3_30B_A3B.py}
-          # TODO use deterministic kernel
+        info: [{"num_gpus": 2, "test_file": "test_qwen2.5_0.5B_gsm8k.py"}, {"num_gpus": 2, "test_file": "test_qwen2.5_0.5B_gsm8k_async.py"}, {"num_gpus": 2, "test_file": "test_qwen3_0.6B_fsdp_colocated_2xGPU.py"}, {"num_gpus": 2, "test_file": "test_qwen3_0.6B_fsdp_distributed.py"}]
     defaults:
       run:
         working-directory: ${{ github.workspace }}
     env:
       GITHUB_COMMIT_NAME: ${{ github.sha }}_${{ github.event.pull_request.number || 'non-pr' }}
       WANDB_API_KEY: ${{ secrets.WANDB_API_KEY }}
+      SLIME_TEST_ENABLE_INFINITE_RUN: ${{ (github.event_name == 'workflow_dispatch' && github.event.inputs.infinite_run) || 'false' }}
 
     steps:
       - name: Checkout repository
@@ -54,4 +103,4 @@ jobs:
 
       - name: Execute
         shell: bash
-        run: python tests/${{ matrix.info.test_file }}
+        run: python tests/ci/gpu_lock_exec.py --count ${{ matrix.info.num_gpus }} -- python tests/${{ matrix.info.test_file }}
@@ -0,0 +1,84 @@
+<% set jobs = {
+    'e2e-test-short': {
+      'label': 'run-ci-short',
+      'tests': [
+        {'test_file': 'test_quick_start_glm4_9B.py', 'num_gpus': 8},
+        {'test_file': 'test_qwen3_30B_A3B.py', 'num_gpus': 8},
+        {'test_file': 'test_qwen3_4B_ppo.py', 'num_gpus': 8},
+        {'test_file': 'test_moonlight_16B_A3B.py', 'num_gpus': 8},
+        {'test_file': 'test_qwen3_4B_fsdp_true_on_policy.py', 'num_gpus': 2},
+      ],
+    },
+    'e2e-test-long': {
+      'label': 'run-ci-long',
+      'tests': [
+        {'test_file': 'test_qwen2.5_0.5B_gsm8k.py', 'num_gpus': 2},
+        {'test_file': 'test_qwen2.5_0.5B_gsm8k_async.py', 'num_gpus': 2},
+        {'test_file': 'test_qwen3_0.6B_fsdp_colocated_2xGPU.py', 'num_gpus': 2},
+        {'test_file': 'test_qwen3_0.6B_fsdp_distributed.py', 'num_gpus': 2},
+      ],
+    },
+} %>
+name: PR Test
+
+on:
+  # Do not run CI on push to reduce CI time
+  # push:
+  #   branches: [main]
+  pull_request:
+    branches: [main]
+    types: [synchronize, labeled]
+  workflow_dispatch:
+    inputs:
+      infinite_run:
+        description: 'Run training infinitely'
+        required: false
+        type: boolean
+        default: false
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+jobs:
+<% for job_name, config in jobs.items() %>
+  << job_name >>:
+    if: (github.event_name == 'workflow_dispatch') || (github.event.pull_request && contains(github.event.pull_request.labels.*.name, '<< config.label >>'))
+    runs-on: self-hosted
+    container:
+      image: slimerl/slime:latest
+      options: >
+        --gpus all
+        --ipc=host
+        --shm-size=16g
+        --ulimit memlock=-1
+        --ulimit stack=67108864
+        --memory=0
+        --memory-swap=0
+        -v /mnt/nvme0n1/slime_ci:/data/slime_ci
+        -v /mnt/nvme0n1/slime_ci/models:/root/models
+        -v /mnt/nvme0n1/slime_ci/datasets:/root/datasets
+    strategy:
+      fail-fast: false
+      matrix:
+        info: << config.tests | tojson >>
+    defaults:
+      run:
+        working-directory: ${{ github.workspace }}
+    env:
+      GITHUB_COMMIT_NAME: ${{ github.sha }}_${{ github.event.pull_request.number || 'non-pr' }}
+      WANDB_API_KEY: ${{ secrets.WANDB_API_KEY }}
+      SLIME_TEST_ENABLE_INFINITE_RUN: ${{ (github.event_name == 'workflow_dispatch' && github.event.inputs.infinite_run) || 'false' }}
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Install
+        shell: bash
+        run: cd $GITHUB_WORKSPACE && pip install -e .
+
+      - name: Execute
+        shell: bash
+        run: python tests/ci/gpu_lock_exec.py --count ${{ matrix.info.num_gpus }} -- python tests/${{ matrix.info.test_file }}
+<% endfor %>
@@ -190,4 +190,5 @@ local/
 
 glm/
 _examples_synced/
-.env
+.env
+.DS_Store
@@ -51,6 +51,14 @@ We also provide examples for some use cases not covered in the quick start guide
 
 slime has powered several novel research projects and production systems. Here are some notable examples:
 
+### ⚛️ P1: Mastering Physics Olympiads with Reinforcement Learning
+
+[**P1**](https://prime-rl.github.io/P1/) is a family of open-source physics reasoning models trained entirely through reinforcement learning. P1 leverages slime as the RL post training framework, and introduces a multi-stage RL training algorithm that progressively enhances reasoning ability through adaptive learnability adjustment and stabilization mechanisms. Enpowered by this training paradigm, P1 delivers breakthrough performance in open-source physics reasoning.
+
+### 📈RLVE: Scaling LM RL with Adaptive Verifiable Environments
+
+[**RLVE**](https://github.com/Zhiyuan-Zeng/RLVE) introduces an approach using verifiable environments that procedurally generate problems and provide algorithmically verifiable rewards, to scale up RL for language models (LMs). With joint training across 400 verifiable environments, RLVE enables each environment to dynamically adapt its problem difficulty distribution to the policy model's capabilities as training progresses.
+
 ### ⚡ TritonForge: Agentic RL Training Framework for Kernel Generation
 
 [**TritonForge**](https://github.com/RLsys-Foundation/TritonForge) leverages slime's SFT & RL capabilities to train LLMs that automatically generate optimized GPU kernels. By using a two-stage training approach—supervised fine-tuning followed by reinforcement learning with multi-turn compilation feedback—TritonForge achieves remarkable results in converting PyTorch operations into high-performance Triton kernels.
@@ -65,7 +73,7 @@ These projects showcase slime's versatility—from training code-generation mode
 
 Arguments in slime are divided into three categories:
 
-1.  **Megatron arguments**: slime reads all arguments set in Megatron via `PYTHONPATH`. You can configure Megatron by passing arguments like `--tensor-model-parallel-size 2`.
+1.  **Megatron arguments**: slime reads all arguments in Megatron. You can configure Megatron by passing arguments like `--tensor-model-parallel-size 2`.
 2.  **SGLang arguments**: All arguments for the installed SGLang are supported. These arguments must be prefixed with `--sglang-`. For example, `--mem-fraction-static` should be passed as `--sglang-mem-fraction-static`.
 3.  **slime-specific arguments**: Please refer to: [slime/utils/arguments.py](slime/utils/arguments.py)
 
@@ -93,7 +101,7 @@ pre-commit run --all-files --show-diff-on-failure --color=always
 - Special thanks to the following projects & communities: SGLang, Megatron‑LM, mbridge, OpenRLHF, veRL, Pai-Megatron-Patch and others.
 - To quote slime, please use:
 
-```bibtext
+```bibtex
 @misc{slime_github,
   author       = {Zilin Zhu and Chengxing Xie and Xin Lv and slime Contributors},
   title        = {slime: An LLM post-training framework for RL Scaling},
 
@@ -82,7 +82,7 @@ slime 是 [GLM-4.5](https://z.ai/blog/glm-4.5) 与 [GLM-4.6](https://z.ai/blog/g
 - 特别感谢以下项目 & 社区：SGLang、Megatron‑LM、mbridge、OpenRLHF、veRL、Pai-Megatron-Patch 等。
 
 - 引用 slime 请使用：
-```bibtext
+```bibtex
 @misc{slime_github,
   author       = {Zilin Zhu and Chengxing Xie and Xin Lv and slime Contributors},
   title        = {slime: An LLM post-training framework for RL Scaling},
 
@@ -4,6 +4,9 @@ set -ex
 
 # create conda
 yes '' | "${SHELL}" <(curl -L micro.mamba.pm/install.sh)
+export PS1=tmp
+mkdir -p /root/.cargo/
+touch /root/.cargo/env
 source ~/.bashrc
 
 micromamba create -n slime python=3.12 pip -c conda-forge -y
@@ -12,46 +15,50 @@ export CUDA_HOME="$CONDA_PREFIX"
 
 export BASE_DIR=${BASE_DIR:-"/root"}
 cd $BASE_DIR
+
+# install cuda 12.9 as it's the default cuda version for torch
+micromamba install -n slime cuda cuda-nvtx cuda-nvtx-dev nccl -c nvidia/label/cuda-12.9.1 -y
+micromamba install -n slime -c conda-forge cudnn -y
+
+# prevent installing cuda 13.0 for sglang
+pip install cuda-python==12.9.1
+pip install torch==2.8.0 torchvision==0.23.0 torchaudio==2.8.0 --index-url https://download.pytorch.org/whl/cu129
+
 # install sglang
 git clone https://github.com/sgl-project/sglang.git
 cd sglang
-git checkout 8ecf6b9d2480c3f600826c7d8fef6a16ed603c3f
+git checkout 303cc957e62384044dfa8e52d7d8af8abe12f0ac
 # Install the python packages
 pip install -e "python[all]"
 
-# install cuda 12.8 as it's the default cuda version for torch
-micromamba install -n slime cuda cuda-nvtx cuda-nvtx-dev -c nvidia/label/cuda-12.8.0 -y
-micromamba install -n slime -c conda-forge cudnn -y
+
 pip install cmake ninja
 
-# reinstall sglang deps
-pip install git+https://github.com/fzyzcjy/torch_memory_saver.git --no-cache-dir --force-reinstall --no-build-isolation
+# flash attn
+# the newest version megatron supports is v2.7.4.post1
+MAX_JOBS=64 pip -v install flash-attn==2.7.4.post1 --no-build-isolation
 
-# install megatron deps
-TORCH_CUDA_ARCH_LIST="9.0;9.0a" \
-  pip -v install --no-build-isolation \
-  git+https://github.com/fanshiqing/grouped_gemm@v1.1.4
-# apex
-TORCH_CUDA_ARCH_LIST="9.0;9.0a" NVCC_APPEND_FLAGS="--threads 4" \
-\
+pip install git+https://github.com/ISEEKYAN/mbridge.git@89eb10887887bc74853f89a4de258c0702932a1c --no-deps
+pip install --no-build-isolation "transformer_engine[pytorch]==2.8.0"
+pip install flash-linear-attention==0.4.0
+NVCC_APPEND_FLAGS="--threads 4" \
   pip -v install --disable-pip-version-check --no-cache-dir \
   --no-build-isolation \
-  --config-settings "--build-option=--cpp_ext --cuda_ext --parallel 8" git+https://github.com/NVIDIA/apex.git
-# transformer engine
-TORCH_CUDA_ARCH_LIST="9.0;9.0a" \
-  pip -v install transformer_engine[pytorch]
-# flash attn
-# the newest version megatron supports is v2.7.4.post1
-MAX_JOBS=64 pip -v install flash-attn==2.7.4.post1
+  --config-settings "--build-option=--cpp_ext --cuda_ext --parallel 8" git+https://github.com/NVIDIA/apex.git@10417aceddd7d5d05d7cbf7b0fc2daad1105f8b4
+
+git clone https://github.com/NVIDIA/Megatron-LM.git --recursive && \
+    cd Megatron-LM && git checkout ${MEGATRON_COMMIT} && \
+    pip install -e .
+
+pip install git+https://github.com/fzyzcjy/torch_memory_saver.git@9b8b788fdeb9c2ee528183214cef65a99b71e7d5 --no-cache-dir --force-reinstall
+pip install git+https://github.com/fzyzcjy/Megatron-Bridge.git@dev_rl --no-build-isolation
+pip install nvidia-modelopt[torch]>=0.37.0 --no-build-isolation
+
 # megatron
 cd $BASE_DIR
-git clone https://github.com/NVIDIA/Megatron-LM.git
-cd Megatron-LM/
-git checkout 48406695c4efcf1026a7ed70bb390793918dd97b
-pip install -e .
-
-# mbridge
-pip install git+https://github.com/ISEEKYAN/mbridge.git --no-deps
+git clone https://github.com/NVIDIA/Megatron-LM.git --recursive && \
+  cd Megatron-LM/ && git checkout core_v0.14.0 && \
+  pip install -e .
 
 # install slime and apply patches
 
@@ -67,9 +74,8 @@ else
   pip install -e .
 fi
 
-
 # apply patch
 cd $BASE_DIR/sglang
-git apply $SLIME_DIR/docker/patch/v0.5.0rc0-cu126/sglang.patch
+git apply $SLIME_DIR/docker/patch/v0.5.5.post1/sglang.patch
 cd $BASE_DIR/Megatron-LM
-git apply $SLIME_DIR/docker/patch/v0.5.0rc0-cu126/megatron.patch
+git apply $SLIME_DIR/docker/patch/v0.5.5.post1/megatron.patch