avoid fsdp2 shard of lm_head to enable fused_kernels #70
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env bash | ||
| set -xeuo pipefail | ||
| # Test script for fully_async_policy E2E regression testing | ||
| # This script runs fully async PPO training with both FSDP2 and Megatron backends | ||
| # to ensure the asynchronous training mechanism works correctly | ||
| NUM_GPUS=${NUM_GPUS:-8} | ||
| ACTOR_STRATEGY=${ACTOR_STRATEGY:-"fsdp2"} # fsdp2 or megatron | ||
| ROLLOUT_NAME=${ROLLOUT_NAME:-"vllm"} # vllm, sglang, or trtllm | ||
| # Download model if not exists | ||
| MODEL_ID=${MODEL_ID:-Qwen/Qwen2.5-0.5B-Instruct} | ||
| MODEL_PATH=${MODEL_PATH:-${HOME}/models/${MODEL_ID}} | ||
| # hf download "${MODEL_ID}" --local-dir "${MODEL_PATH}" | ||
| rollout_mode="async" | ||
| rollout_name="${ROLLOUT_NAME}" | ||
| return_raw_chat="True" | ||
| if [ "$rollout_name" = "vllm" ]; then | ||
| export VLLM_USE_V1=1 | ||
| fi | ||
| # Algorithm parameters | ||
| adv_estimator=grpo | ||
| use_kl_in_reward=False | ||
| kl_coef=0.0 | ||
| use_kl_loss=False | ||
| kl_loss_coef=0.0 | ||
| clip_ratio_low=0.2 | ||
| clip_ratio_high=0.28 | ||
| # Response length parameters | ||
| max_prompt_length=1024 | ||
| max_response_length=2048 | ||
| enable_overlong_buffer=True | ||
| overlong_buffer_len=128 | ||
| overlong_penalty_factor=1.0 | ||
| # Training parameters | ||
| loss_agg_mode="token-mean" | ||
| # Temperature parameters | ||
| temperature=1.0 | ||
| top_p=1.0 | ||
| top_k=-1 | ||
| val_top_p=0.7 | ||
| # Fully async specific parameters | ||
| # Split GPUs evenly between rollout and training. | ||
| n_gpus_rollout=${N_GPUS_ROLLOUT:-$((NUM_GPUS / 2))} | ||
| n_gpus_training=${N_GPUS_TRAINING:-$((NUM_GPUS / 2))} | ||
| train_prompt_bsz=0 | ||
| gen_prompt_bsz=1 | ||
| n_resp_per_prompt=16 | ||
| train_prompt_mini_bsz=16 | ||
| total_rollout_steps=$(((128))) | ||
| test_freq=-1 | ||
| staleness_threshold=0.5 | ||
| trigger_parameter_sync_step=4 | ||
| partial_rollout=True | ||
| use_trainer_do_validate=False | ||
| exp_name="$(basename "${MODEL_ID,,}")-fully-async-policy-${rollout_name}-${ACTOR_STRATEGY}-minimal" | ||
| echo "Running fully_async_policy with ${ACTOR_STRATEGY} strategy" | ||
| echo "Total GPUs: ${NUM_GPUS}, Rollout GPUs: ${n_gpus_rollout}, Training GPUs: ${n_gpus_training}" | ||
| # Common parameters for both FSDP2 and Megatron | ||
| common_params=( | ||
| data.train_files="${HOME}/data/gsm8k/train.parquet" | ||
| data.val_files="${HOME}/data/gsm8k/test.parquet" | ||
| data.prompt_key=prompt | ||
| data.truncation='left' | ||
| data.max_prompt_length=${max_prompt_length} | ||
| data.max_response_length=${max_response_length} | ||
| data.train_batch_size=${train_prompt_bsz} | ||
| data.gen_batch_size=${gen_prompt_bsz} | ||
| data.return_raw_chat=${return_raw_chat} | ||
| actor_rollout_ref.rollout.n=${n_resp_per_prompt} | ||
| actor_rollout_ref.rollout.calculate_log_probs=True | ||
| algorithm.adv_estimator=${adv_estimator} | ||
| algorithm.use_kl_in_reward=${use_kl_in_reward} | ||
| algorithm.kl_ctrl.kl_coef=${kl_coef} | ||
| actor_rollout_ref.hybrid_engine=False | ||
| actor_rollout_ref.actor.use_kl_loss=${use_kl_loss} | ||
| actor_rollout_ref.actor.kl_loss_coef=${kl_loss_coef} | ||
| actor_rollout_ref.actor.clip_ratio_low=${clip_ratio_low} | ||
| actor_rollout_ref.actor.clip_ratio_high=${clip_ratio_high} | ||
| actor_rollout_ref.actor.clip_ratio_c=10.0 | ||
| actor_rollout_ref.model.path="${MODEL_PATH}" | ||
| actor_rollout_ref.actor.optim.lr=1e-6 | ||
| actor_rollout_ref.actor.optim.lr_warmup_steps=-1 | ||
| actor_rollout_ref.actor.optim.weight_decay=0.1 | ||
| actor_rollout_ref.actor.ppo_mini_batch_size=${train_prompt_mini_bsz} | ||
| actor_rollout_ref.actor.entropy_coeff=0 | ||
| actor_rollout_ref.actor.loss_agg_mode=${loss_agg_mode} | ||
| actor_rollout_ref.rollout.gpu_memory_utilization=0.80 | ||
| actor_rollout_ref.rollout.temperature=${temperature} | ||
| actor_rollout_ref.rollout.top_p=${top_p} | ||
| actor_rollout_ref.rollout.top_k=${top_k} | ||
| actor_rollout_ref.rollout.val_kwargs.temperature=${temperature} | ||
| actor_rollout_ref.rollout.val_kwargs.top_p=${val_top_p} | ||
| actor_rollout_ref.rollout.val_kwargs.top_k=${top_k} | ||
| actor_rollout_ref.rollout.val_kwargs.do_sample=True | ||
| actor_rollout_ref.rollout.val_kwargs.n=1 | ||
| actor_rollout_ref.rollout.enable_chunked_prefill=True | ||
| actor_rollout_ref.rollout.name=${rollout_name} | ||
| actor_rollout_ref.rollout.mode=${rollout_mode} | ||
| actor_rollout_ref.rollout.disable_log_stats=False | ||
| reward.reward_manager.name=dapo | ||
| +reward.reward_kwargs.overlong_buffer_cfg.enable=${enable_overlong_buffer} | ||
| +reward.reward_kwargs.overlong_buffer_cfg.len=${overlong_buffer_len} | ||
| +reward.reward_kwargs.overlong_buffer_cfg.penalty_factor=${overlong_penalty_factor} | ||
| +reward.reward_kwargs.overlong_buffer_cfg.log=False | ||
| +reward.reward_kwargs.max_resp_len=${max_response_length} | ||
| trainer.logger=['console'] | ||
| trainer.project_name='verl-test-fully-async' | ||
| trainer.experiment_name="${exp_name}" | ||
| trainer.val_before_train=True | ||
| trainer.save_freq=-1 | ||
| trainer.resume_mode=disable | ||
| trainer.nnodes=1 | ||
| trainer.n_gpus_per_node=${n_gpus_training} | ||
| trainer.log_val_generations=10 | ||
| rollout.nnodes=1 | ||
| rollout.n_gpus_per_node=${n_gpus_rollout} | ||
| rollout.total_rollout_steps=${total_rollout_steps} | ||
| trainer.total_epochs=2 | ||
| trainer.test_freq=${test_freq} | ||
| # Fully async specific configurations | ||
| async_training.staleness_threshold=${staleness_threshold} | ||
| async_training.partial_rollout="${partial_rollout}" | ||
| async_training.trigger_parameter_sync_step="${trigger_parameter_sync_step}" | ||
| async_training.use_trainer_do_validate=${use_trainer_do_validate} | ||
| actor_rollout_ref.rollout.checkpoint_engine.backend='nccl' | ||
| actor_rollout_ref.rollout.checkpoint_engine.update_weights_bucket_megabytes=1024 | ||
| ) | ||
| # Detect device | ||
| device_name=$(python3 - <<'EOF' | ||
| from verl.utils.device import get_device_name | ||
| print(get_device_name()) | ||
| EOF | ||
| ) | ||
| if [ "${ACTOR_STRATEGY}" == "fsdp2" ]; then | ||
| echo "Running fully async training with FSDP2 strategy..." | ||
| # FSDP2 specific parameters | ||
| # trtllm: one replica uses all rollout GPUs as a single TP group. | ||
| # vllm/sglang: TP=1, rely on data parallelism across replicas. | ||
| if [ "${rollout_name}" = "trtllm" ]; then | ||
| gen_tp=${GEN_TP:-${n_gpus_rollout}} | ||
| else | ||
| gen_tp=2 | ||
| fi | ||
| sp_size=1 | ||
| fsdp_size=2 | ||
| ref_offload=True | ||
| actor_offload=False | ||
| if [ -n "$device_name" ] && [ "$device_name" == "npu" ]; then | ||
| common_params+=( | ||
| # Todo The checkpoint_engine.backend should be unified to nccl | ||
| # actor_rollout_ref.rollout.checkpoint_engine.backend='hccl' | ||
| actor_rollout_ref.rollout.gpu_memory_utilization=0.70 | ||
| ) | ||
| actor_offload=True | ||
| fi | ||
| python3 -m verl.experimental.fully_async_policy.fully_async_main \ | ||
| "${common_params[@]}" \ | ||
| actor_rollout_ref.model.enable_gradient_checkpointing=True \ | ||
| actor_rollout_ref.actor.fsdp_config.strategy=fsdp2 \ | ||
| critic.strategy=fsdp2 \ | ||
| actor_rollout_ref.actor.grad_clip=1.0 \ | ||
| actor_rollout_ref.model.use_remove_padding=True \ | ||
| actor_rollout_ref.actor.use_dynamic_bsz=True \ | ||
| actor_rollout_ref.ref.log_prob_use_dynamic_bsz=True \ | ||
| actor_rollout_ref.rollout.log_prob_use_dynamic_bsz=True \ | ||
| actor_rollout_ref.actor.fsdp_config.param_offload=${actor_offload} \ | ||
| actor_rollout_ref.actor.fsdp_config.optimizer_offload=${actor_offload} \ | ||
| actor_rollout_ref.actor.ulysses_sequence_parallel_size=${sp_size} \ | ||
| actor_rollout_ref.rollout.tensor_model_parallel_size=${gen_tp} \ | ||
| actor_rollout_ref.ref.fsdp_config.param_offload=${ref_offload} \ | ||
| actor_rollout_ref.ref.ulysses_sequence_parallel_size=${sp_size} \ | ||
| actor_rollout_ref.actor.fsdp_config.fsdp_size=${fsdp_size} $@ | ||
| elif [ "${ACTOR_STRATEGY}" == "megatron" ]; then | ||
| echo "Running fully async training with Megatron strategy..." | ||
| # Megatron specific parameters | ||
| if [ "${rollout_name}" = "trtllm" ]; then | ||
| gen_tp=${GEN_TP:-${n_gpus_rollout}} | ||
| else | ||
| gen_tp=2 | ||
| fi | ||
| train_tp=2 | ||
| train_pp=$((n_gpus_training / train_tp)) | ||
| ref_offload=True | ||
| actor_offload=True | ||
| common_params+=( | ||
| actor_rollout_ref.rollout.gpu_memory_utilization=0.60 | ||
| ) | ||
| python3 -m verl.experimental.fully_async_policy.fully_async_main \ | ||
| --config-path=config \ | ||
| --config-name='fully_async_ppo_megatron_trainer.yaml' \ | ||
| "${common_params[@]}" \ | ||
| actor_rollout_ref.actor.strategy=megatron \ | ||
| critic.strategy=megatron \ | ||
| actor_rollout_ref.actor.optim.lr_decay_steps=10000000 \ | ||
| actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=2 \ | ||
| actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=1 \ | ||
| actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=1 \ | ||
| actor_rollout_ref.actor.megatron.param_offload=${actor_offload} \ | ||
| actor_rollout_ref.actor.megatron.optimizer_offload=${actor_offload} \ | ||
| actor_rollout_ref.actor.megatron.grad_offload=${actor_offload} \ | ||
| actor_rollout_ref.actor.megatron.pipeline_model_parallel_size=${train_pp} \ | ||
| actor_rollout_ref.actor.megatron.tensor_model_parallel_size=${train_tp} \ | ||
| actor_rollout_ref.rollout.tensor_model_parallel_size=${gen_tp} \ | ||
| actor_rollout_ref.ref.megatron.pipeline_model_parallel_size=${train_pp} \ | ||
| actor_rollout_ref.ref.megatron.tensor_model_parallel_size=${train_tp} \ | ||
| actor_rollout_ref.ref.megatron.param_offload=${ref_offload} $@ | ||
| else | ||
| echo "Error: Unknown strategy ${ACTOR_STRATEGY}. Please use 'fsdp2' or 'megatron'" | ||
| exit 1 | ||
| fi | ||
| echo "Fully async policy E2E test completed successfully with ${ACTOR_STRATEGY} strategy" | ||