1+ # TODO: This workflow will not pass in CI yet. It needs:
2+ # - uploading dummy reward model to verl CI
3+
14# # Tests layout
25
36# Each folder under tests/ corresponds to a test category for a sub-namespace in verl. For instance:
@@ -171,7 +174,7 @@ jobs:
171174 run : |
172175 rm -rf checkpoints
173176
174- e2e_ppo_trainer_megatron-qwen3 :
177+ e2e_ppo_trainer_megatron-qwen2 :
175178 needs : setup
176179 runs-on : ["${{ needs.setup.outputs.runner-label || 'L20x8' }}"]
177180 timeout-minutes : 10 # Increase this timeout value as needed
@@ -193,32 +196,40 @@ jobs:
193196 - name : Prepare GSM8K dataset
194197 run : |
195198 python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
196- - name : Running GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with tie-embedding Megatron (Qwen) with train tp > infer tp
199+ - name : Running GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with Megatron (Qwen), dummy RM, and train tp > infer tp
197200 run : |
198201 ray stop --force
199202 ENGINE=trtllm \
203+ TOTAL_TRAIN_STEPS=1 \
200204 VAL_BEFORE_TRAIN=True \
201205 TEST_FREQ=1 \
202206 SAVE_FREQ=1 \
203207 TRAIN_TP=2 \
204208 INFER_TP=1 \
205- MODEL_ID=Qwen/Qwen3-0.6B \
209+ RM_TP=8 \
210+ RM_NUM_WORKERS=2 \
211+ MODEL_ID=Qwen/Qwen2.5-1.5B \
206212 bash tests/special_e2e/run_ppo_trainer_megatron.sh \
207- actor_rollout_ref.rollout.mode=async \
208- actor_rollout_ref.rollout.calculate_log_probs=True
209- - name : Running GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with tie-embedding Megatron (Qwen) with train tp < infer tp
213+ actor_rollout_ref.rollout.mode="async" \
214+ actor_rollout_ref.rollout.calculate_log_probs=True \
215+ +reward_model.rollout.engine_kwargs.trtllm.disable_overlap_scheduler=True
216+ - name : Running GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with Megatron (Qwen), dummy RM, and train tp < infer tp
210217 run : |
211218 ray stop --force
212219 ENGINE=trtllm \
220+ TOTAL_TRAIN_STEPS=1 \
213221 VAL_BEFORE_TRAIN=True \
214222 TEST_FREQ=1 \
215223 SAVE_FREQ=1 \
216224 TRAIN_TP=1 \
217225 INFER_TP=2 \
218- MODEL_ID=Qwen/Qwen3-0.6B \
226+ RM_TP=8 \
227+ RM_NUM_WORKERS=2 \
228+ MODEL_ID=Qwen/Qwen2.5-1.5B \
219229 bash tests/special_e2e/run_ppo_trainer_megatron.sh \
220- actor_rollout_ref.rollout.mode=async \
221- actor_rollout_ref.rollout.calculate_log_probs=True
230+ actor_rollout_ref.rollout.mode="async" \
231+ actor_rollout_ref.rollout.calculate_log_probs=True \
232+ +reward_model.rollout.engine_kwargs.trtllm.disable_overlap_scheduler=True
222233 - name : clean up
223234 run : |
224235 rm -rf checkpoints
0 commit comments