Skip to content

Commit b342c5e

Browse files
authored
Merge branch 'main' into feature/add-cispo
2 parents 0edbebf + d7f60be commit b342c5e

File tree

196 files changed

+16225
-6275
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

196 files changed

+16225
-6275
lines changed

.github/workflows/conda-ci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ jobs:
7777
micromamba activate slime
7878
export CUDA_HOME="$CONDA_PREFIX"
7979
80-
bash tests/test_qwen3-30B-A3B.sh
80+
SLIME_TEST_USE_DEEPEP=0 SLIME_TEST_USE_FP8_ROLLOUT=0 python tests/test_qwen3_30B_A3B.py
8181
shell: bash
8282

8383
- name: Cleanup
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
from pathlib import Path
2+
import jinja2
3+
4+
5+
def main():
6+
"""
7+
Generates GitHub workflow YAML files from Jinja2 templates.
8+
"""
9+
workflows_dir = Path(__file__).parent
10+
print(f"Scan dir: {workflows_dir}")
11+
env = jinja2.Environment(
12+
loader=jinja2.FileSystemLoader(str(workflows_dir)),
13+
block_start_string="<%",
14+
block_end_string="%>",
15+
variable_start_string="<<",
16+
variable_end_string=">>",
17+
)
18+
19+
for template_path in workflows_dir.glob("*.yml.j2"):
20+
template = env.get_template(template_path.name)
21+
content = template.render()
22+
23+
yaml_path = template_path.with_suffix("")
24+
with open(yaml_path, "w") as f:
25+
f.write(
26+
"#" * 80
27+
+ "\n# This file is auto-generated from the .j2 file via generate_github_workflows.py. Do not edit manually.\n"
28+
+ "#" * 80
29+
+ "\n"
30+
)
31+
f.write(content)
32+
33+
print(f"Generated {yaml_path} from {template_path}")
34+
35+
36+
if __name__ == "__main__":
37+
main()

.github/workflows/pr-test.yml

Lines changed: 59 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
################################################################################
2+
# This file is auto-generated from the .j2 file via generate_github_workflows.py. Do not edit manually.
3+
################################################################################
4+
15
name: PR Test
26

37
on:
@@ -7,15 +11,62 @@ on:
711
pull_request:
812
branches: [main]
913
types: [synchronize, labeled]
14+
workflow_dispatch:
15+
inputs:
16+
infinite_run:
17+
description: 'Run training infinitely'
18+
required: false
19+
type: boolean
20+
default: false
1021

1122
concurrency:
1223
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
1324
cancel-in-progress: true
1425

1526
jobs:
16-
e2e-test:
17-
# TODO may use run-ci label etc
18-
if: github.event.pull_request.draft == false
27+
28+
e2e-test-short:
29+
if: (github.event_name == 'workflow_dispatch') || (github.event.pull_request && contains(github.event.pull_request.labels.*.name, 'run-ci-short'))
30+
runs-on: self-hosted
31+
container:
32+
image: slimerl/slime:latest
33+
options: >
34+
--gpus all
35+
--ipc=host
36+
--shm-size=16g
37+
--ulimit memlock=-1
38+
--ulimit stack=67108864
39+
--memory=0
40+
--memory-swap=0
41+
-v /mnt/nvme0n1/slime_ci:/data/slime_ci
42+
-v /mnt/nvme0n1/slime_ci/models:/root/models
43+
-v /mnt/nvme0n1/slime_ci/datasets:/root/datasets
44+
strategy:
45+
fail-fast: false
46+
matrix:
47+
info: [{"num_gpus": 8, "test_file": "test_quick_start_glm4_9B.py"}, {"num_gpus": 8, "test_file": "test_qwen3_30B_A3B.py"}, {"num_gpus": 8, "test_file": "test_qwen3_4B_ppo.py"}, {"num_gpus": 8, "test_file": "test_moonlight_16B_A3B.py"}, {"num_gpus": 2, "test_file": "test_qwen3_4B_fsdp_true_on_policy.py"}]
48+
defaults:
49+
run:
50+
working-directory: ${{ github.workspace }}
51+
env:
52+
GITHUB_COMMIT_NAME: ${{ github.sha }}_${{ github.event.pull_request.number || 'non-pr' }}
53+
WANDB_API_KEY: ${{ secrets.WANDB_API_KEY }}
54+
SLIME_TEST_ENABLE_INFINITE_RUN: ${{ (github.event_name == 'workflow_dispatch' && github.event.inputs.infinite_run) || 'false' }}
55+
56+
steps:
57+
- name: Checkout repository
58+
uses: actions/checkout@v4
59+
60+
- name: Install
61+
shell: bash
62+
run: cd $GITHUB_WORKSPACE && pip install -e .
63+
64+
- name: Execute
65+
shell: bash
66+
run: python tests/ci/gpu_lock_exec.py --count ${{ matrix.info.num_gpus }} -- python tests/${{ matrix.info.test_file }}
67+
68+
e2e-test-long:
69+
if: (github.event_name == 'workflow_dispatch') || (github.event.pull_request && contains(github.event.pull_request.labels.*.name, 'run-ci-long'))
1970
runs-on: self-hosted
2071
container:
2172
image: slimerl/slime:latest
@@ -28,21 +79,19 @@ jobs:
2879
--memory=0
2980
--memory-swap=0
3081
-v /mnt/nvme0n1/slime_ci:/data/slime_ci
31-
-v /mnt/nvme0n1/models:/root/models
32-
-v /mnt/nvme0n1/datasets:/root/datasets
82+
-v /mnt/nvme0n1/slime_ci/models:/root/models
83+
-v /mnt/nvme0n1/slime_ci/datasets:/root/datasets
3384
strategy:
3485
fail-fast: false
3586
matrix:
36-
info:
37-
- {test_file: test_quick_start_glm4_9B.py}
38-
- {test_file: test_qwen3_30B_A3B.py}
39-
# TODO use deterministic kernel
87+
info: [{"num_gpus": 2, "test_file": "test_qwen2.5_0.5B_gsm8k.py"}, {"num_gpus": 2, "test_file": "test_qwen2.5_0.5B_gsm8k_async.py"}, {"num_gpus": 2, "test_file": "test_qwen3_0.6B_fsdp_colocated_2xGPU.py"}, {"num_gpus": 2, "test_file": "test_qwen3_0.6B_fsdp_distributed.py"}]
4088
defaults:
4189
run:
4290
working-directory: ${{ github.workspace }}
4391
env:
4492
GITHUB_COMMIT_NAME: ${{ github.sha }}_${{ github.event.pull_request.number || 'non-pr' }}
4593
WANDB_API_KEY: ${{ secrets.WANDB_API_KEY }}
94+
SLIME_TEST_ENABLE_INFINITE_RUN: ${{ (github.event_name == 'workflow_dispatch' && github.event.inputs.infinite_run) || 'false' }}
4695

4796
steps:
4897
- name: Checkout repository
@@ -54,4 +103,4 @@ jobs:
54103

55104
- name: Execute
56105
shell: bash
57-
run: python tests/${{ matrix.info.test_file }}
106+
run: python tests/ci/gpu_lock_exec.py --count ${{ matrix.info.num_gpus }} -- python tests/${{ matrix.info.test_file }}

.github/workflows/pr-test.yml.j2

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
<% set jobs = {
2+
'e2e-test-short': {
3+
'label': 'run-ci-short',
4+
'tests': [
5+
{'test_file': 'test_quick_start_glm4_9B.py', 'num_gpus': 8},
6+
{'test_file': 'test_qwen3_30B_A3B.py', 'num_gpus': 8},
7+
{'test_file': 'test_qwen3_4B_ppo.py', 'num_gpus': 8},
8+
{'test_file': 'test_moonlight_16B_A3B.py', 'num_gpus': 8},
9+
{'test_file': 'test_qwen3_4B_fsdp_true_on_policy.py', 'num_gpus': 2},
10+
],
11+
},
12+
'e2e-test-long': {
13+
'label': 'run-ci-long',
14+
'tests': [
15+
{'test_file': 'test_qwen2.5_0.5B_gsm8k.py', 'num_gpus': 2},
16+
{'test_file': 'test_qwen2.5_0.5B_gsm8k_async.py', 'num_gpus': 2},
17+
{'test_file': 'test_qwen3_0.6B_fsdp_colocated_2xGPU.py', 'num_gpus': 2},
18+
{'test_file': 'test_qwen3_0.6B_fsdp_distributed.py', 'num_gpus': 2},
19+
],
20+
},
21+
} %>
22+
name: PR Test
23+
24+
on:
25+
# Do not run CI on push to reduce CI time
26+
# push:
27+
# branches: [main]
28+
pull_request:
29+
branches: [main]
30+
types: [synchronize, labeled]
31+
workflow_dispatch:
32+
inputs:
33+
infinite_run:
34+
description: 'Run training infinitely'
35+
required: false
36+
type: boolean
37+
default: false
38+
39+
concurrency:
40+
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
41+
cancel-in-progress: true
42+
43+
jobs:
44+
<% for job_name, config in jobs.items() %>
45+
<< job_name >>:
46+
if: (github.event_name == 'workflow_dispatch') || (github.event.pull_request && contains(github.event.pull_request.labels.*.name, '<< config.label >>'))
47+
runs-on: self-hosted
48+
container:
49+
image: slimerl/slime:latest
50+
options: >
51+
--gpus all
52+
--ipc=host
53+
--shm-size=16g
54+
--ulimit memlock=-1
55+
--ulimit stack=67108864
56+
--memory=0
57+
--memory-swap=0
58+
-v /mnt/nvme0n1/slime_ci:/data/slime_ci
59+
-v /mnt/nvme0n1/slime_ci/models:/root/models
60+
-v /mnt/nvme0n1/slime_ci/datasets:/root/datasets
61+
strategy:
62+
fail-fast: false
63+
matrix:
64+
info: << config.tests | tojson >>
65+
defaults:
66+
run:
67+
working-directory: ${{ github.workspace }}
68+
env:
69+
GITHUB_COMMIT_NAME: ${{ github.sha }}_${{ github.event.pull_request.number || 'non-pr' }}
70+
WANDB_API_KEY: ${{ secrets.WANDB_API_KEY }}
71+
SLIME_TEST_ENABLE_INFINITE_RUN: ${{ (github.event_name == 'workflow_dispatch' && github.event.inputs.infinite_run) || 'false' }}
72+
73+
steps:
74+
- name: Checkout repository
75+
uses: actions/checkout@v4
76+
77+
- name: Install
78+
shell: bash
79+
run: cd $GITHUB_WORKSPACE && pip install -e .
80+
81+
- name: Execute
82+
shell: bash
83+
run: python tests/ci/gpu_lock_exec.py --count ${{ matrix.info.num_gpus }} -- python tests/${{ matrix.info.test_file }}
84+
<% endfor %>

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -190,4 +190,5 @@ local/
190190

191191
glm/
192192
_examples_synced/
193-
.env
193+
.env
194+
.DS_Store

README.md

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,14 @@ We also provide examples for some use cases not covered in the quick start guide
5151

5252
slime has powered several novel research projects and production systems. Here are some notable examples:
5353

54+
### ⚛️ P1: Mastering Physics Olympiads with Reinforcement Learning
55+
56+
[**P1**](https://prime-rl.github.io/P1/) is a family of open-source physics reasoning models trained entirely through reinforcement learning. P1 leverages slime as the RL post training framework, and introduces a multi-stage RL training algorithm that progressively enhances reasoning ability through adaptive learnability adjustment and stabilization mechanisms. Enpowered by this training paradigm, P1 delivers breakthrough performance in open-source physics reasoning.
57+
58+
### 📈RLVE: Scaling LM RL with Adaptive Verifiable Environments
59+
60+
[**RLVE**](https://github.com/Zhiyuan-Zeng/RLVE) introduces an approach using verifiable environments that procedurally generate problems and provide algorithmically verifiable rewards, to scale up RL for language models (LMs). With joint training across 400 verifiable environments, RLVE enables each environment to dynamically adapt its problem difficulty distribution to the policy model's capabilities as training progresses.
61+
5462
### ⚡ TritonForge: Agentic RL Training Framework for Kernel Generation
5563

5664
[**TritonForge**](https://github.com/RLsys-Foundation/TritonForge) leverages slime's SFT & RL capabilities to train LLMs that automatically generate optimized GPU kernels. By using a two-stage training approach—supervised fine-tuning followed by reinforcement learning with multi-turn compilation feedback—TritonForge achieves remarkable results in converting PyTorch operations into high-performance Triton kernels.
@@ -65,7 +73,7 @@ These projects showcase slime's versatility—from training code-generation mode
6573

6674
Arguments in slime are divided into three categories:
6775

68-
1. **Megatron arguments**: slime reads all arguments set in Megatron via `PYTHONPATH`. You can configure Megatron by passing arguments like `--tensor-model-parallel-size 2`.
76+
1. **Megatron arguments**: slime reads all arguments in Megatron. You can configure Megatron by passing arguments like `--tensor-model-parallel-size 2`.
6977
2. **SGLang arguments**: All arguments for the installed SGLang are supported. These arguments must be prefixed with `--sglang-`. For example, `--mem-fraction-static` should be passed as `--sglang-mem-fraction-static`.
7078
3. **slime-specific arguments**: Please refer to: [slime/utils/arguments.py](slime/utils/arguments.py)
7179

@@ -93,7 +101,7 @@ pre-commit run --all-files --show-diff-on-failure --color=always
93101
- Special thanks to the following projects & communities: SGLang, Megatron‑LM, mbridge, OpenRLHF, veRL, Pai-Megatron-Patch and others.
94102
- To quote slime, please use:
95103

96-
```bibtext
104+
```bibtex
97105
@misc{slime_github,
98106
author = {Zilin Zhu and Chengxing Xie and Xin Lv and slime Contributors},
99107
title = {slime: An LLM post-training framework for RL Scaling},

README_zh.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ slime 是 [GLM-4.5](https://z.ai/blog/glm-4.5) 与 [GLM-4.6](https://z.ai/blog/g
8282
- 特别感谢以下项目 & 社区:SGLang、Megatron‑LM、mbridge、OpenRLHF、veRL、Pai-Megatron-Patch 等。
8383

8484
- 引用 slime 请使用:
85-
```bibtext
85+
```bibtex
8686
@misc{slime_github,
8787
author = {Zilin Zhu and Chengxing Xie and Xin Lv and slime Contributors},
8888
title = {slime: An LLM post-training framework for RL Scaling},

build_conda.sh

Lines changed: 36 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@ set -ex
44

55
# create conda
66
yes '' | "${SHELL}" <(curl -L micro.mamba.pm/install.sh)
7+
export PS1=tmp
8+
mkdir -p /root/.cargo/
9+
touch /root/.cargo/env
710
source ~/.bashrc
811

912
micromamba create -n slime python=3.12 pip -c conda-forge -y
@@ -12,46 +15,50 @@ export CUDA_HOME="$CONDA_PREFIX"
1215

1316
export BASE_DIR=${BASE_DIR:-"/root"}
1417
cd $BASE_DIR
18+
19+
# install cuda 12.9 as it's the default cuda version for torch
20+
micromamba install -n slime cuda cuda-nvtx cuda-nvtx-dev nccl -c nvidia/label/cuda-12.9.1 -y
21+
micromamba install -n slime -c conda-forge cudnn -y
22+
23+
# prevent installing cuda 13.0 for sglang
24+
pip install cuda-python==12.9.1
25+
pip install torch==2.8.0 torchvision==0.23.0 torchaudio==2.8.0 --index-url https://download.pytorch.org/whl/cu129
26+
1527
# install sglang
1628
git clone https://github.com/sgl-project/sglang.git
1729
cd sglang
18-
git checkout 8ecf6b9d2480c3f600826c7d8fef6a16ed603c3f
30+
git checkout 303cc957e62384044dfa8e52d7d8af8abe12f0ac
1931
# Install the python packages
2032
pip install -e "python[all]"
2133

22-
# install cuda 12.8 as it's the default cuda version for torch
23-
micromamba install -n slime cuda cuda-nvtx cuda-nvtx-dev -c nvidia/label/cuda-12.8.0 -y
24-
micromamba install -n slime -c conda-forge cudnn -y
34+
2535
pip install cmake ninja
2636

27-
# reinstall sglang deps
28-
pip install git+https://github.com/fzyzcjy/torch_memory_saver.git --no-cache-dir --force-reinstall --no-build-isolation
37+
# flash attn
38+
# the newest version megatron supports is v2.7.4.post1
39+
MAX_JOBS=64 pip -v install flash-attn==2.7.4.post1 --no-build-isolation
2940

30-
# install megatron deps
31-
TORCH_CUDA_ARCH_LIST="9.0;9.0a" \
32-
pip -v install --no-build-isolation \
33-
git+https://github.com/fanshiqing/grouped_gemm@v1.1.4
34-
# apex
35-
TORCH_CUDA_ARCH_LIST="9.0;9.0a" NVCC_APPEND_FLAGS="--threads 4" \
36-
\
41+
pip install git+https://github.com/ISEEKYAN/mbridge.git@89eb10887887bc74853f89a4de258c0702932a1c --no-deps
42+
pip install --no-build-isolation "transformer_engine[pytorch]==2.8.0"
43+
pip install flash-linear-attention==0.4.0
44+
NVCC_APPEND_FLAGS="--threads 4" \
3745
pip -v install --disable-pip-version-check --no-cache-dir \
3846
--no-build-isolation \
39-
--config-settings "--build-option=--cpp_ext --cuda_ext --parallel 8" git+https://github.com/NVIDIA/apex.git
40-
# transformer engine
41-
TORCH_CUDA_ARCH_LIST="9.0;9.0a" \
42-
pip -v install transformer_engine[pytorch]
43-
# flash attn
44-
# the newest version megatron supports is v2.7.4.post1
45-
MAX_JOBS=64 pip -v install flash-attn==2.7.4.post1
47+
--config-settings "--build-option=--cpp_ext --cuda_ext --parallel 8" git+https://github.com/NVIDIA/apex.git@10417aceddd7d5d05d7cbf7b0fc2daad1105f8b4
48+
49+
git clone https://github.com/NVIDIA/Megatron-LM.git --recursive && \
50+
cd Megatron-LM && git checkout ${MEGATRON_COMMIT} && \
51+
pip install -e .
52+
53+
pip install git+https://github.com/fzyzcjy/torch_memory_saver.git@9b8b788fdeb9c2ee528183214cef65a99b71e7d5 --no-cache-dir --force-reinstall
54+
pip install git+https://github.com/fzyzcjy/Megatron-Bridge.git@dev_rl --no-build-isolation
55+
pip install nvidia-modelopt[torch]>=0.37.0 --no-build-isolation
56+
4657
# megatron
4758
cd $BASE_DIR
48-
git clone https://github.com/NVIDIA/Megatron-LM.git
49-
cd Megatron-LM/
50-
git checkout 48406695c4efcf1026a7ed70bb390793918dd97b
51-
pip install -e .
52-
53-
# mbridge
54-
pip install git+https://github.com/ISEEKYAN/mbridge.git --no-deps
59+
git clone https://github.com/NVIDIA/Megatron-LM.git --recursive && \
60+
cd Megatron-LM/ && git checkout core_v0.14.0 && \
61+
pip install -e .
5562

5663
# install slime and apply patches
5764

@@ -67,9 +74,8 @@ else
6774
pip install -e .
6875
fi
6976

70-
7177
# apply patch
7278
cd $BASE_DIR/sglang
73-
git apply $SLIME_DIR/docker/patch/v0.5.0rc0-cu126/sglang.patch
79+
git apply $SLIME_DIR/docker/patch/v0.5.5.post1/sglang.patch
7480
cd $BASE_DIR/Megatron-LM
75-
git apply $SLIME_DIR/docker/patch/v0.5.0rc0-cu126/megatron.patch
81+
git apply $SLIME_DIR/docker/patch/v0.5.5.post1/megatron.patch

0 commit comments

Comments
 (0)