@@ -9,6 +9,7 @@ ulimit -n 65535
9
9
10
10
PROJECT_DIR=" $( pwd) "
11
11
CONFIG_PATH=" $PROJECT_DIR /examples/sglang_multiturn/config"
12
+ FSDP_STRATEGY=${FSDP_STRATEGY:- fsdp}
12
13
13
14
python3 -m verl.trainer.main_ppo \
14
15
--config-path=" $CONFIG_PATH " \
@@ -30,6 +31,7 @@ python3 -m verl.trainer.main_ppo \
30
31
actor_rollout_ref.actor.kl_loss_type=low_var_kl \
31
32
actor_rollout_ref.actor.entropy_coeff=0 \
32
33
actor_rollout_ref.model.enable_gradient_checkpointing=True \
34
+ actor_rollout_ref.actor.strategy=$FSDP_STRATEGY \
33
35
actor_rollout_ref.actor.fsdp_config.param_offload=False \
34
36
actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
35
37
actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=32 \
@@ -38,12 +40,13 @@ python3 -m verl.trainer.main_ppo \
38
40
actor_rollout_ref.rollout.gpu_memory_utilization=0.5 \
39
41
actor_rollout_ref.rollout.n=8 \
40
42
actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=32 \
43
+ actor_rollout_ref.ref.strategy=$FSDP_STRATEGY \
41
44
actor_rollout_ref.ref.fsdp_config.param_offload=True \
42
45
algorithm.use_kl_in_reward=False \
43
46
trainer.critic_warmup=0 \
44
47
trainer.logger=[' console' ] \
45
48
trainer.project_name=' gsm8k_async_rl' \
46
- trainer.experiment_name=' qwen2.5-3b_function_rm-gsm8k-async-sgl-multi-w-tool-rebased-0427-verify-n16' \
49
+ trainer.experiment_name=qwen2.5-3b_function_rm-gsm8k-async-sgl-multi-w-tool-$FSDP_STRATEGY - rebased-0427-verify-n16 \
47
50
trainer.n_gpus_per_node=8 \
48
51
trainer.nnodes=1 \
49
52
trainer.save_freq=-1 \
0 commit comments