Skip to content

Commit b7a311c

Browse files
authored
Merge branch 'verl-project:main' into main
2 parents fbb1ef0 + ad404cb commit b7a311c

File tree

15 files changed

+978
-75
lines changed

15 files changed

+978
-75
lines changed

.github/workflows/e2e_ascend.yml

Lines changed: 0 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -221,45 +221,3 @@ jobs:
221221
ray stop --force
222222
bash tests/special_npu/run_qwen2_5_vl_3b_npu.sh
223223
rm -rf $HOME/ckpts
224-
225-
experimental_job:
226-
if: github.repository_owner == 'verl-project'
227-
name: E2E Ascend testing for experimental features
228-
runs-on: linux-aarch64-a2-4
229-
timeout-minutes: 60
230-
container:
231-
image: swr.ap-southeast-1.myhuaweicloud.com/base_image/ascend-ci/verl/verl:verl-8.3.rc1-910b-ubuntu22.04-py3.11-latest
232-
options: >-
233-
--shm-size 16g
234-
env:
235-
HF_ENDPOINT: "https://hf-mirror.com"
236-
HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
237-
steps:
238-
- name: Check npu and CANN info
239-
run: |
240-
cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
241-
npu-smi info
242-
- name: Check initial pip list from image
243-
run: |
244-
pip list
245-
- name: Checkout volcengine/verl repo
246-
uses: actions/checkout@v4
247-
with:
248-
fetch-depth: 0
249-
clean: true
250-
- name: Install the current repository
251-
run: |
252-
pip install -r requirements-npu.txt
253-
pip install -e .
254-
- name: Check final pip list
255-
run: |
256-
pip list
257-
- name: Preprocess gsm8k dataset
258-
run: |
259-
python examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/.cache/datasets/openai/gsm8k
260-
# TODO(wuxibin): temporary disable until we refactor with checkpoint engine
261-
# - name: Running the E2E test with one_step_off_policy algorithm on ASCEND NPU (FSDP2)
262-
# run: |
263-
# ray stop --force
264-
# bash tests/special_npu/run_one_step_off_policy.sh
265-
# rm -rf $HOME/ckpts
Lines changed: 236 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,236 @@
1+
# # Tests layout
2+
3+
# Each folder under tests/ corresponds to a test category for a sub-namespace in verl. For instance:
4+
# - `tests/trainer` for testing functionality related to `verl/trainer`
5+
# - `tests/models` for testing functionality related to `verl/models`
6+
# - ...
7+
8+
# There are a few folders with `special_` prefix, created for special purposes:
9+
# - `special_distributed`: unit tests that must run with multiple GPUs
10+
# - `special_e2e`: end-to-end tests with training/generation scripts
11+
# - `special_npu`: tests for NPUs
12+
# - `special_sanity`: a suite of quick sanity tests
13+
# - `special_standalone`: a set of test that are designed to run in dedicated environments
14+
15+
# Accelerators for tests
16+
# - By default tests are run with GPU available, except for the ones under `special_npu`, and any test script whose name ends with `on_cpu.py`.
17+
# - For test scripts with `on_cpu.py` name suffix would be tested on CPU resources in linux environment.
18+
19+
# # Workflow layout
20+
21+
# All CI tests are configured by yaml files in `.github/workflows/`. Here's an overview of all test configs:
22+
# 1. A list of always triggered CPU sanity tests: `check-pr-title.yml`, `secrets_scan.yml`, `check-pr-title,yml`, `pre-commit.yml`, `doc.yml`
23+
# 2. Some heavy multi-GPU unit tests, such as `model.yml`, `vllm.yml`, `sgl.yml`
24+
# 3. End-to-end tests: `e2e_*.yml`
25+
# 4. Unit tests
26+
# - `cpu_unit_tests.yml`, run pytest on all scripts with file name pattern `tests/**/test_*_on_cpu.py`
27+
# - `gpu_unit_tests.yml`, run pytest on all scripts with file without the `on_cpu.py` suffix.
28+
# - Since cpu/gpu unit tests by default runs all tests under `tests`, please make sure tests are manually excluded in them when
29+
# - new workflow yaml is added to `.github/workflows`
30+
# - new tests are added to workflow mentioned in 2.
31+
32+
name: e2e_ppo_trainer_megatron_vllm_2_ascend
33+
34+
on:
35+
# Trigger the workflow on push or pull request,
36+
# but only for the main branch.
37+
# For push, for now only anti-patterns are specified so it is more conservative
38+
# and achieves higher coverage.
39+
push:
40+
branches:
41+
- main
42+
- v0.*
43+
paths:
44+
- "**/*.py"
45+
# Other entrypoints
46+
- "!verl/trainer/fsdp_sft_trainer.py"
47+
# FSDP
48+
- "!verl/workers/**/*dp_*.py"
49+
- "!verl/utils/fsdp_utils.py"
50+
- "!verl/utils/checkpoint/fsdp_checkpoint_manager.py"
51+
- "!verl/model_merger/fsdp_model_merger.py"
52+
pull_request:
53+
branches:
54+
- main
55+
- v0.*
56+
paths:
57+
- "**/*.py"
58+
# Other entrypoints
59+
- "!docker/**"
60+
# Docs
61+
- "!**/*.md"
62+
- "!docs/**"
63+
- "!examples/**"
64+
- "!tests/**"
65+
- "!verl/trainer/main_*.py"
66+
- "!verl/trainer/fsdp_sft_trainer.py"
67+
# FSDP
68+
- "!verl/workers/**/*dp_*.py"
69+
- "!verl/utils/fsdp_utils.py"
70+
- "!verl/utils/checkpoint/fsdp_checkpoint_manager.py"
71+
- "!verl/model_merger/fsdp_model_merger.py"
72+
# Entrypoints
73+
- ".github/workflows/e2e_ppo_trainer_megatron_vllm_2_ascend.yml"
74+
- "examples/data_preprocess/gsm8k.py"
75+
- "examples/data_preprocess/geo3k.py"
76+
- "tests/special_e2e/run_ppo_trainer_megatron.sh"
77+
- "verl/trainer/main_ppo.py"
78+
- "verl/trainer/config/ppo_megatron_trainer.yaml"
79+
80+
# Cancel jobs on the same ref if a new one is triggered
81+
concurrency:
82+
group: ${{ github.workflow }}-${{ github.ref }}
83+
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
84+
85+
# Declare permissions just read content.
86+
permissions:
87+
contents: read
88+
89+
jobs:
90+
e2e_ppo_trainer_fsdp_vllm_ascend:
91+
if: github.repository_owner == 'verl-project'
92+
runs-on: linux-aarch64-a2-8
93+
timeout-minutes: 90 # Increase this timeout value as needed
94+
container:
95+
image: swr.ap-southeast-1.myhuaweicloud.com/base_image/ascend-ci/verl/verl:verl-8.3.rc1-910b-ubuntu22.04-py3.11-latest
96+
options: >-
97+
--shm-size 16g
98+
env:
99+
HF_ENDPOINT: "https://hf-mirror.com"
100+
HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
101+
steps:
102+
- name: Check npu and CANN info
103+
run: |
104+
cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
105+
npu-smi info
106+
- name: Check initial pip list from image
107+
run: |
108+
pip list
109+
- name: Checkout verl-project/verl repo
110+
uses: actions/checkout@v4
111+
with:
112+
fetch-depth: 0
113+
clean: true
114+
- name: Install the current repository
115+
run: |
116+
pip install -r requirements-npu.txt
117+
pip install --no-deps -e .
118+
- name: Check final pip list
119+
run: |
120+
pip list
121+
- name: Prepare weights
122+
run: |
123+
ln -s /root/.cache/models ~/models
124+
- name: Prepare GSM8K dataset
125+
run: |
126+
python examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/.cache/datasets/openai/gsm8k
127+
# Function RM
128+
- name: Running GSM8K E2E training tests on 8 L20 GPUs with rmpad using function rm with validation and saving (DDP_SIZE=2, FSDP_SIZE=4)
129+
run: |
130+
ray stop --force
131+
VAL_BEFORE_TRAIN=True TEST_FREQ=1 SAVE_FREQ=1 SAVE_HF_MODEL=True FSDP_SIZE=4 USE_KL=True VERL_EXP_NAME="qwen2.5-0.5b-function-reward-minimal-ddp-size2-fsdp-size4" bash tests/special_e2e/ppo_trainer/run_function_reward.sh
132+
- name: Test merging DDP+FSDP checkpoints (Qwen Actor)
133+
run: |
134+
exp_name="qwen2.5-0.5b-function-reward-minimal-ddp-size2-fsdp-size4"
135+
python -m verl.model_merger test --backend fsdp --local_dir checkpoints/verl-test/${exp_name}/global_step_1/actor --test_hf_dir checkpoints/verl-test/${exp_name}/global_step_1/actor/huggingface
136+
- name: Running GSM8K E2E training tests on 8 L20 GPUs with rmpad using function rm with validation and saving (FSDP2)
137+
run: |
138+
ray stop --force
139+
VAL_BEFORE_TRAIN=True TEST_FREQ=1 SAVE_FREQ=1 SAVE_HF_MODEL=True VERL_EXP_NAME="qwen2.5-0.5b-function-reward-minimal-fsdp2-size8" STRATEGY=fsdp2 bash tests/special_e2e/ppo_trainer/run_function_reward.sh
140+
- name: Test merging FSDP2 checkpoints (Qwen Actor)
141+
run: |
142+
exp_name="qwen2.5-0.5b-function-reward-minimal-fsdp2-size8"
143+
python -m verl.model_merger test --backend fsdp --local_dir checkpoints/verl-test/${exp_name}/global_step_1/actor --test_hf_dir checkpoints/verl-test/${exp_name}/global_step_1/actor/huggingface
144+
- name: Running GSM8K E2E without rmpad using function rm
145+
run: |
146+
ray stop --force
147+
RM_PAD=False bash tests/special_e2e/ppo_trainer/run_function_reward.sh
148+
- name: Running GSM8K E2E training tests on 8 L20 GPUs with rmpad using function rm (GRPO)
149+
run: |
150+
ray stop --force
151+
CUSTOM_REWARD_FN=True ADV_ESTIMATOR=grpo USE_KL=True bash tests/special_e2e/ppo_trainer/run_function_reward.sh
152+
# TODO
153+
# vllm0.11.0 not support them, will be opened until vllm0.13.0 ok
154+
# - name: Running GSM8K E2E training tests on 8 L20 GPUs with grpo lora using function rm with use_shm and layered_summon
155+
# run: |
156+
# ray stop --force
157+
# ADV_ESTIMATOR=grpo USE_SHM=True LORA_RANK=32 LOAD_FORMAT=safetensors LAYERED_SUMMON=True TOTAL_TRAIN_STEPS=1 SAVE_FREQ=1 FSDP_SIZE=4 VERL_EXP_NAME="qwen2.5-0.5b-function-reward-minimal" bash tests/special_e2e/ppo_trainer/run_function_reward.sh
158+
# - name: Test GRPO LoRA checkpoints merging function
159+
# run: |
160+
# export EXP_NAME="qwen2.5-0.5b-function-reward-minimal"
161+
# ls checkpoints/verl-test/${EXP_NAME}/global_step_1/actor
162+
# cat checkpoints/verl-test/${EXP_NAME}/global_step_1/actor/huggingface/config.json
163+
# python3 -m verl.model_merger merge --backend fsdp --local_dir checkpoints/verl-test/${EXP_NAME}/global_step_1/actor/ --target_dir checkpoints/verl-test/${EXP_NAME}/global_step_1/actor/huggingface
164+
# - name: Running GSM8K E2E training tests on 8 L20 GPUs with grpo lora using function rm with use_shm and layered_summon with fsdp2
165+
# run: |
166+
# ray stop --force
167+
# ADV_ESTIMATOR=grpo USE_SHM=True LORA_RANK=32 LOAD_FORMAT=safetensors LAYERED_SUMMON=True STRATEGY=fsdp2 bash tests/special_e2e/ppo_trainer/run_function_reward.sh
168+
# TODO
169+
# vllm0.11 not support them, will be open after vllm0.13
170+
# e2e_ppo_trainer_fsdp-qwen2_5vl-3b_ascend:
171+
# if: github.repository_owner == 'verl-project'
172+
# runs-on: linux-aarch64-a2-8
173+
# timeout-minutes: 60 # Increase this timeout value as needed
174+
# container:
175+
# image: swr.ap-southeast-1.myhuaweicloud.com/base_image/ascend-ci/verl/verl:verl-8.3.rc1-910b-ubuntu22.04-py3.11-latest
176+
# options: >-
177+
# --shm-size 16g
178+
# env:
179+
# HF_ENDPOINT: "https://hf-mirror.com"
180+
# HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
181+
# steps:
182+
# - name: Check npu and CANN info
183+
# run: |
184+
# cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
185+
# npu-smi info
186+
# - name: Check initial pip list from image
187+
# run: |
188+
# pip list
189+
# - name: Checkout verl-project/verl repo
190+
# uses: actions/checkout@v4
191+
# with:
192+
# fetch-depth: 0
193+
# clean: true
194+
# - name: Install the current repository
195+
# run: |
196+
# pip install -r requirements-npu.txt
197+
# pip install --no-deps -e .
198+
# pip install trl
199+
# - name: Check final pip list
200+
# run: |
201+
# pip list
202+
# - name: Prepare weights
203+
# run: |
204+
# ln -s /root/.cache/models ~/models
205+
# # Geo3k
206+
# - name: Prepare GEO3K dataset
207+
# run: |
208+
# python examples/data_preprocess/geo3k.py --local_dataset_path ${HOME}/.cache/datasets/hiyouga/geometry3k
209+
# - name: Running GEO3K VLM GRPO E2E training tests on 8 L20 GPUs with rmpad using function rm
210+
# run: |
211+
# ray stop --force
212+
# TRAIN_FILES=$HOME/data/geo3k/train.parquet VAL_FILES=$HOME/data/geo3k/test.parquet \
213+
# MAX_PROMPT_LEN=1536 MAX_RESPONSE_LEN=1536 \
214+
# MODEL_ID=Qwen/Qwen2.5-VL-3B-Instruct \
215+
# ADV_ESTIMATOR=grpo RM_PAD=True USE_KL=True ENABLE_CHUNKED_PREFILL=False \
216+
# SP_SIZE=2 \
217+
# bash tests/special_e2e/ppo_trainer/run_function_reward.sh
218+
# - name: Running GEO3K VLM PPO E2E training tests on 8 L20 GPUs with rmpad using function rm
219+
# run: |
220+
# ray stop --force
221+
# TRAIN_FILES=$HOME/data/geo3k/train.parquet VAL_FILES=$HOME/data/geo3k/test.parquet \
222+
# MAX_PROMPT_LEN=1536 MAX_RESPONSE_LEN=1536 \
223+
# MODEL_ID=Qwen/Qwen2.5-VL-3B-Instruct \
224+
# ADV_ESTIMATOR=gae RM_PAD=True USE_KL=True ENABLE_CHUNKED_PREFILL=False \
225+
# SP_SIZE=2 \
226+
# bash tests/special_e2e/ppo_trainer/run_function_reward.sh
227+
# - name: Running GEO3K VLM GRPO E2E lora training tests on 8 L20 GPUs with rmpad using function rm
228+
# run: |
229+
# ray stop --force
230+
# TRAIN_FILES=$HOME/data/geo3k/train.parquet VAL_FILES=$HOME/data/geo3k/test.parquet \
231+
# MAX_PROMPT_LEN=1536 MAX_RESPONSE_LEN=1536 \
232+
# MODEL_ID=Qwen/Qwen2.5-VL-3B-Instruct \
233+
# ADV_ESTIMATOR=grpo RM_PAD=True USE_KL=True ENABLE_CHUNKED_PREFILL=False \
234+
# SP_SIZE=2 \
235+
# LORA_RANK=32 LORA_EXCLUDE=".*visual.*" \
236+
# bash tests/special_e2e/ppo_trainer/run_function_reward.sh

0 commit comments

Comments
 (0)