Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions verl/trainer/config/_generated_ppo_megatron_trainer.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,7 @@ actor_rollout_ref:
max_num_seqs: 1024
enable_chunked_prefill: true
enable_prefix_caching: true
logprobs_mode: processed_logprobs
load_format: dummy
log_prob_micro_batch_size: null
log_prob_micro_batch_size_per_gpu: null
Expand Down
1 change: 1 addition & 0 deletions verl/trainer/config/_generated_ppo_trainer.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,7 @@ actor_rollout_ref:
max_num_seqs: 1024
enable_chunked_prefill: true
enable_prefix_caching: true
logprobs_mode: processed_logprobs
load_format: dummy
log_prob_micro_batch_size: null
log_prob_micro_batch_size_per_gpu: null
Expand Down
3 changes: 3 additions & 0 deletions verl/trainer/config/rollout/rollout.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,9 @@ enable_chunked_prefill: True
# Prefix caching kv-cache blocks is a popular optimization in LLM inference to avoid redundant prompt computations.
enable_prefix_caching: True

# logprobs mode for rollout logprobs
logprobs_mode: processed_logprobs

# Which loader to use for rollout model weights: dummy, hf, megatron, etc.
# safetensors (for huge model, and set use_shm=True); dummy: randomly init model weight
load_format: dummy
Expand Down
1 change: 1 addition & 0 deletions verl/workers/config/rollout.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ class RolloutConfig(BaseConfig):
tensor_model_parallel_size: int = 2
pipeline_model_parallel_size: int = 1
max_num_batched_tokens: int = 8192
logprobs_mode: Optional[str] = "processed_logprobs"

# TODO: enable train_kwargs
# train_sampling_config: SamplingConfig = field(default_factory=SamplingConfig)
Expand Down
1 change: 1 addition & 0 deletions verl/workers/rollout/vllm_rollout/vllm_async_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,7 @@ async def launch_server(self, master_address: str = None, master_port: int = Non
"max_num_batched_tokens": self.config.max_num_batched_tokens,
"enable_prefix_caching": self.config.enable_prefix_caching,
"enable_sleep_mode": self.config.enable_sleep_mode,
"logprobs_mode": self.config.logprobs_mode,
"disable_custom_all_reduce": True,
"enforce_eager": self.config.enforce_eager,
"gpu_memory_utilization": self.config.gpu_memory_utilization,
Expand Down
Loading