Skip to content

Commit 96c181a

Browse files
authored
chore(ci): support FSDP2 for multi-turn SGLangRollout with tool calling (#1650)
1 parent 0528ba1 commit 96c181a

File tree

2 files changed

+8
-1
lines changed

2 files changed

+8
-1
lines changed

.github/workflows/e2e_ppo_trainer.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,10 @@ jobs:
269269
run: |
270270
ray stop --force
271271
bash tests/e2e/run_gsm8k_fsdp_sgl_multiturn_w_tool.sh
272+
- name: Running GSM8K with tool E2E training tests with FSDP2
273+
run: |
274+
ray stop --force
275+
FSDP_STRATEGY=fsdp2 bash tests/e2e/run_gsm8k_fsdp_sgl_multiturn_w_tool.sh
272276
273277
e2e_ppo_trainer_sglang_vlm:
274278
runs-on: [L20x8]

tests/e2e/run_gsm8k_fsdp_sgl_multiturn_w_tool.sh

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ ulimit -n 65535
99

1010
PROJECT_DIR="$(pwd)"
1111
CONFIG_PATH="$PROJECT_DIR/examples/sglang_multiturn/config"
12+
FSDP_STRATEGY=${FSDP_STRATEGY:-fsdp}
1213

1314
python3 -m verl.trainer.main_ppo \
1415
--config-path="$CONFIG_PATH" \
@@ -30,6 +31,7 @@ python3 -m verl.trainer.main_ppo \
3031
actor_rollout_ref.actor.kl_loss_type=low_var_kl \
3132
actor_rollout_ref.actor.entropy_coeff=0 \
3233
actor_rollout_ref.model.enable_gradient_checkpointing=True \
34+
actor_rollout_ref.actor.strategy=$FSDP_STRATEGY \
3335
actor_rollout_ref.actor.fsdp_config.param_offload=False \
3436
actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
3537
actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=32 \
@@ -38,12 +40,13 @@ python3 -m verl.trainer.main_ppo \
3840
actor_rollout_ref.rollout.gpu_memory_utilization=0.5 \
3941
actor_rollout_ref.rollout.n=8 \
4042
actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=32 \
43+
actor_rollout_ref.ref.strategy=$FSDP_STRATEGY \
4144
actor_rollout_ref.ref.fsdp_config.param_offload=True \
4245
algorithm.use_kl_in_reward=False \
4346
trainer.critic_warmup=0 \
4447
trainer.logger=['console'] \
4548
trainer.project_name='gsm8k_async_rl' \
46-
trainer.experiment_name='qwen2.5-3b_function_rm-gsm8k-async-sgl-multi-w-tool-rebased-0427-verify-n16' \
49+
trainer.experiment_name=qwen2.5-3b_function_rm-gsm8k-async-sgl-multi-w-tool-$FSDP_STRATEGY-rebased-0427-verify-n16 \
4750
trainer.n_gpus_per_node=8 \
4851
trainer.nnodes=1 \
4952
trainer.save_freq=-1 \

0 commit comments

Comments
 (0)