Skip to content

Commit fbb1ef0

Browse files
authored
Merge branch 'verl-project:main' into main
2 parents dcaacfe + e4915bd commit fbb1ef0

File tree

142 files changed

+7666
-655
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

142 files changed

+7666
-655
lines changed

.github/workflows/e2e_ppo_grpo_trainer_trtllm.yml

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,32 @@ jobs:
106106
faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
107107
mlp-image: "${{ env.IMAGE }}"
108108

109+
trtllm_unit_tests:
110+
needs: setup
111+
runs-on: ["${{ needs.setup.outputs.runner-label || 'L20x8' }}"]
112+
timeout-minutes: 30 # Increase this timeout value as needed
113+
env:
114+
HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
115+
HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
116+
NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
117+
HF_ENDPOINT: "https://hf-mirror.com"
118+
HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
119+
steps:
120+
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
121+
with:
122+
fetch-depth: 0
123+
- name: Install the current repository
124+
run: |
125+
pip3 install pytest-asyncio
126+
pip3 install -r requirements-test.txt
127+
pip3 install --no-deps -e .
128+
- name: Run TRTLLM unit tests
129+
run: |
130+
export TRTLLM_TEST_MODEL_PATH_ROOT="${HOME}/models"
131+
pytest -v -s \
132+
tests/workers/rollout/rollout_trtllm/test_adapter.py \
133+
tests/workers/rollout/rollout_trtllm/test_async_server.py
134+
109135
e2e_grpo_trainer_fsdp-qwen2:
110136
needs: setup
111137
runs-on: ["${{ needs.setup.outputs.runner-label || 'L20x8' }}"]
@@ -179,7 +205,7 @@ jobs:
179205
180206
cleanup:
181207
runs-on: ubuntu-latest
182-
needs: [setup, e2e_grpo_trainer_fsdp-qwen2, e2e_grpo_trainer_megatron-qwen2]
208+
needs: [setup, trtllm_unit_tests, e2e_grpo_trainer_fsdp-qwen2, e2e_grpo_trainer_megatron-qwen2]
183209
if: always()
184210
steps:
185211
- id: destroy-runner

.github/workflows/gpu_unit_tests.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ jobs:
113113
pip3 install --ignore-installed mlflow "numpy<2.0"
114114
- name: Run all GPU unit tests
115115
run: |
116-
pytest -s -x --ignore-glob="*on_npu.py" --ignore-glob="*test_special_*.py" --ignore-glob='*on_cpu.py' --ignore-glob="*test_vllm*" --ignore-glob="*_sglang*" --ignore-glob="*_hf_rollout*" --ignore-glob="tests/models/" --ignore-glob='tests/special*' --ignore-glob="tests/experimental" --ignore-glob="tests/workers/reward_model" --ignore-glob="*test_shared_memory*" tests/
116+
pytest -s -x --ignore-glob="*on_npu.py" --ignore-glob="*test_special_*.py" --ignore-glob='*on_cpu.py' --ignore-glob="*test_vllm*" --ignore-glob="*_sglang*" --ignore-glob="*_hf_rollout*" --ignore-glob="tests/models/" --ignore-glob='tests/special*' --ignore-glob="tests/experimental" --ignore-glob="tests/workers/reward_model" --ignore-glob="*test_shared_memory*" --ignore-glob="tests/workers/rollout/rollout_trtllm" tests/
117117
- name: Testing LinearCrossEntropyTP Correctness, Computation Time and Memory Consumption
118118
run: |
119119
LOW_MEMORY=True torchrun --standalone --nnodes=1 --nproc-per-node=8 tests/utils/test_special_linear_cross_entropy_tp.py

.github/workflows/npu_unit_tests.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ jobs:
109109
- name: Run all NPU unit tests
110110
run: |
111111
export PYTHONPATH=$PYTHONPATH:/Megatron-LM
112-
pytest -s -x --ignore-glob="*test_special_*.py" --ignore-glob="*on_cpu.py" --ignore-glob="*test_vllm*" --ignore-glob="*_sglang*" --ignore-glob="*_hf_rollout*" --ignore-glob="tests/models/" --ignore-glob="tests/special*" --ignore-glob="tests/experimental" --ignore-glob="tests/workers/reward_model" --ignore-glob="*test_rvdz*" --ignore-glob="*test_ray_collectives*" --ignore-glob="*test_nvtx_profile*" --ignore-glob="tests/checkpoint_engine" --ignore-glob="*test_shared_memory*" tests/
112+
pytest -s -x --ignore-glob="*test_special_*.py" --ignore-glob="*on_cpu.py" --ignore-glob="*test_vllm*" --ignore-glob="*_sglang*" --ignore-glob="*_hf_rollout*" --ignore-glob="tests/models/" --ignore-glob="tests/special*" --ignore-glob="tests/experimental" --ignore-glob="tests/workers/reward_model" --ignore-glob="*test_rvdz*" --ignore-glob="*test_ray_collectives*" --ignore-glob="*test_nvtx_profile*" --ignore-glob="tests/checkpoint_engine" --ignore-glob="*test_shared_memory*" --ignore-glob="tests/workers/rollout/rollout_trtllm" tests/
113113
- name: Testing FSDP2 actor functionality
114114
run: |
115115
torchrun --standalone --nnodes=1 --nproc-per-node=2 tests/workers/actor/test_special_dp_actor.py

.github/workflows/reward_model_sglang.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ jobs:
115115
- name: Running sglang agent loop with reward manager tests on 8 L20 GPUs
116116
run: |
117117
unset http_proxy https_proxy HTTP_PROXY HTTPS_PROXY
118-
ROLLOUT_NAME=sglang pytest -s -x tests/experimental/reward_loop/test_agent_loop_reward_manager.py
118+
ROLLOUT_NAME=sglang pytest -s -x tests/experimental/reward_loop/test_agent_reward_loop_standalone.py
119119
- name: Running sglang agent loop with reward model colocate tests on 8 L20 GPUs
120120
run: |
121121
unset http_proxy https_proxy HTTP_PROXY HTTPS_PROXY

.github/workflows/reward_model_vllm.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ jobs:
115115
- name: Running vllm agent loop with reward manager tests on 8 L20 GPUs
116116
run: |
117117
unset http_proxy https_proxy HTTP_PROXY HTTPS_PROXY
118-
ROLLOUT_NAME=vllm pytest -s -x tests/experimental/reward_loop/test_agent_loop_reward_manager.py
118+
ROLLOUT_NAME=vllm pytest -s -x tests/experimental/reward_loop/test_agent_reward_loop_standalone.py
119119
- name: Running vllm agent loop with reward model colocate tests on 8 L20 GPUs
120120
run: |
121121
unset http_proxy https_proxy HTTP_PROXY HTTPS_PROXY

.github/workflows/reward_model_vllm_ascend.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ jobs:
105105
ROLLOUT_NAME=vllm pytest -s -x tests/experimental/reward_loop/test_reward_model_disrm.py
106106
- name: Running vllm agent loop with reward manager tests on 8 NPUs
107107
run: |
108-
ROLLOUT_NAME=vllm pytest -s -x tests/experimental/reward_loop/test_agent_loop_reward_manager.py
108+
ROLLOUT_NAME=vllm pytest -s -x tests/experimental/reward_loop/test_agent_reward_loop_standalone.py
109109
- name: Running vllm agent loop with reward model colocate tests on 8 NPUs
110110
run: |
111111
export HCCL_HOST_SOCKET_PORT_RANGE=auto

.github/workflows/vllm.yml

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -109,12 +109,13 @@ jobs:
109109
run: |
110110
pip3 install -r requirements-test.txt
111111
pip3 install --no-deps -e .
112+
pip3 install --upgrade "transformers<5.0"
112113
# - name: Download Model to Use
113114
# run: |
114-
# huggingface-cli download Qwen/Qwen2.5-0.5B-Instruct --local-dir ${HOME}/models/Qwen/Qwen2.5-0.5B-Instruct
115-
# huggingface-cli download Qwen/Qwen2.5-1.5B-Instruct --local-dir ${HOME}/models/Qwen/Qwen2.5-1.5B-Instruct
116-
# huggingface-cli download Qwen/Qwen2.5-VL-3B-Instruct --local-dir ${HOME}/models/Qwen/Qwen2.5-VL-3B-Instruct
117-
# huggingface-cli download OldKingMeister/Qwen2.5-1.5B-Instruct-YaRN --local-dir ${HOME}/models/OldKingMeister/Qwen2.5-1.5B-Instruct-YaRN
115+
# hf download Qwen/Qwen2.5-0.5B-Instruct --local-dir ${HOME}/models/Qwen/Qwen2.5-0.5B-Instruct
116+
# hf download Qwen/Qwen2.5-1.5B-Instruct --local-dir ${HOME}/models/Qwen/Qwen2.5-1.5B-Instruct
117+
# hf download Qwen/Qwen2.5-VL-3B-Instruct --local-dir ${HOME}/models/Qwen/Qwen2.5-VL-3B-Instruct
118+
# hf download OldKingMeister/Qwen2.5-1.5B-Instruct-YaRN --local-dir ${HOME}/models/OldKingMeister/Qwen2.5-1.5B-Instruct-YaRN
118119
# export HF_HUB_OFFLINE=1
119120
- name: Prepare gsm8k dataset
120121
run: |
@@ -146,6 +147,7 @@ jobs:
146147
pip3 install cupy-cuda12x pytest-asyncio
147148
pip3 install -r requirements-test.txt
148149
pip3 install --no-deps -e .
150+
pip3 install --upgrade "transformers<5.0"
149151
- name: Test vLLM ServerAdapter with Checkpoint Engine (NCCL)
150152
run: |
151153
ROLLOUT_NAME=vllm pytest -svvv tests/checkpoint_engine/test_special_server_adapter.py

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -283,6 +283,7 @@ Welcome to register your awesome project build with `verl` for other developers'
283283
- [DAPO](https://dapo-sia.github.io/): the fully open source SOTA RL algorithm that beats DeepSeek-R1-zero-32B ![GitHub Repo stars](https://img.shields.io/github/stars/volcengine/verl)
284284
- [NoisyRollout](https://github.com/NUS-TRAIL/NoisyRollout): Reinforcing Visual Reasoning with Data Augmentation ![GitHub Repo stars](https://img.shields.io/github/stars/NUS-TRAIL/NoisyRollout)
285285
- [SPEAR](https://github.com/TencentYoutuResearch/SPEAR): **Self-imitation** with **Progressive Exploration** for Agentic Reinforcement Learning (ICLR 2026) ![GitHub Repo stars](https://img.shields.io/github/stars/TencentYoutuResearch/SPEAR)
286+
- [RuleReasoner](https://github.com/bigai-nlco/RuleReasoner): **RuleReasoner:** Reinforced Rule-based Reasoning via **Domain-aware Dynamic Sampling** (ICLR 2026) ![GitHub Repo stars](https://img.shields.io/github/stars/bigai-nlco/RuleReasoner)
286287

287288
## Contribution Guide
288289

docs/advance/ppo_lora.rst

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
RL(HF) algorithms with LoRA Support
22
===========================================
33

4-
Last updated: 12/17/2025.
4+
Last updated: 02/03/2026.
55

66
We support LoRA (Low-Rank Adaptation) for reinforcement learning algorithms such as PPO, GRPO, and others.
77

@@ -42,6 +42,8 @@ FSDP Backend Usage Guide
4242
- `actor_rollout_ref.model.lora_adapter_path`: string, path to a pretrained LoRA adapter directory.
4343
If provided, loads existing adapter instead of creating new one. Enables multi-stage training from previously saved adapters.
4444
Directory need contain `adapter_model.safetensors` and `adapter_config.json`.
45+
- `actor_rollout_ref.model.lora.merge`: bool, whether to merge LoRA adapters into the base model weights before transferring to vLLM.
46+
If True, it will merge LoRA adapters into the base model weights before transferring to vLLM. If False, it will transfer only adapters to vLLM. This option is currently supported **only for engine-based rollout workers** (i.e. vLLM engine workers using the new worker implementation with ``trainer.use_legacy_worker_impl`` disabled) and is not available when using the legacy worker implementation.
4547

4648
5. Recommend options:
4749

@@ -137,6 +139,10 @@ Make sure you use Megatron-Bridge later than 0.2.0, and we recommended using `th
137139
# Path to pre-trained LoRA adapter weights (null to train from scratch)
138140
adapter_path: null
139141
142+
# Whether to fully shard LoRA adapters. Defaults to False
143+
# https://docs.vllm.ai/en/latest/api/vllm/config/lora/#vllm.config.lora.LoRAConfig.fully_sharded_loras
144+
fully_sharded_loras: bool
145+
140146
# VLMLoRA additionally allows the user to specify whether the language or vision models should be frozen.
141147
# For example, a common finetuning workload for multimodal models is to apply adapters to language model and fully
142148
# finetune the vision model.

docs/algo/spin.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ The following steps outline how to set up the environment and run the SPIN recip
118118
python3 examples/data_preprocess/gsm8k.py --local_save_dir ~/data/gsm8k # Adjusted path
119119
120120
# Download the base model (Example: Qwen2.5-3B-Instruct)
121-
huggingface-cli download Qwen/Qwen2.5-3B-Instruct --local-dir $HOME/models/Qwen2.5-3B-Instruct
121+
hf download Qwen/Qwen2.5-3B-Instruct --local-dir $HOME/models/Qwen2.5-3B-Instruct
122122
```
123123

124124
4. **Configure:**

0 commit comments

Comments
 (0)