Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions verl/trainer/config/rollout/rollout.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,9 @@ enable_chunked_prefill: True
# Prefix caching kv-cache blocks is a popular optimization in LLM inference to avoid redundant prompt computations.
enable_prefix_caching: True

# logprobs mode for rollout logprobs
logprobs_mode: processed_logprobs

# Which loader to use for rollout model weights: dummy, hf, megatron, etc.
# safetensors (for huge model, and set use_shm=True); dummy: randomly init model weight
load_format: dummy
Expand Down
1 change: 1 addition & 0 deletions verl/workers/config/rollout.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ class RolloutConfig(BaseConfig):
tensor_model_parallel_size: int = 2
pipeline_model_parallel_size: int = 1
max_num_batched_tokens: int = 8192
logprobs_mode: str = "processed_logprobs"

# TODO: enable train_kwargs
# train_sampling_config: SamplingConfig = field(default_factory=SamplingConfig)
Expand Down
1 change: 1 addition & 0 deletions verl/workers/rollout/vllm_rollout/vllm_async_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,7 @@ async def launch_server(self, master_address: str = None, master_port: int = Non
"max_num_batched_tokens": self.config.max_num_batched_tokens,
"enable_prefix_caching": self.config.enable_prefix_caching,
"enable_sleep_mode": self.config.enable_sleep_mode,
"logprobs_mode": self.config.logprobs_mode,
"disable_custom_all_reduce": True,
"enforce_eager": self.config.enforce_eager,
"gpu_memory_utilization": self.config.gpu_memory_utilization,
Expand Down