Skip to content

Commit 7a82f2e

Browse files
authored
[rollout] feat: Add vllm logprob mode and default processed_logprob (#4755)
1 parent b16e048 commit 7a82f2e

File tree

5 files changed

+7
-0
lines changed

5 files changed

+7
-0
lines changed

verl/trainer/config/_generated_ppo_megatron_trainer.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,7 @@ actor_rollout_ref:
220220
max_num_seqs: 1024
221221
enable_chunked_prefill: true
222222
enable_prefix_caching: true
223+
logprobs_mode: processed_logprobs
223224
load_format: dummy
224225
log_prob_micro_batch_size: null
225226
log_prob_micro_batch_size_per_gpu: null

verl/trainer/config/_generated_ppo_trainer.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,7 @@ actor_rollout_ref:
209209
max_num_seqs: 1024
210210
enable_chunked_prefill: true
211211
enable_prefix_caching: true
212+
logprobs_mode: processed_logprobs
212213
load_format: dummy
213214
log_prob_micro_batch_size: null
214215
log_prob_micro_batch_size_per_gpu: null

verl/trainer/config/rollout/rollout.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,9 @@ enable_chunked_prefill: True
7373
# Prefix caching kv-cache blocks is a popular optimization in LLM inference to avoid redundant prompt computations.
7474
enable_prefix_caching: True
7575

76+
# logprobs mode for rollout logprobs
77+
logprobs_mode: processed_logprobs
78+
7679
# Which loader to use for rollout model weights: dummy, hf, megatron, etc.
7780
# safetensors (for huge model, and set use_shm=True); dummy: randomly init model weight
7881
load_format: dummy

verl/workers/config/rollout.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,7 @@ class RolloutConfig(BaseConfig):
149149
tensor_model_parallel_size: int = 2
150150
pipeline_model_parallel_size: int = 1
151151
max_num_batched_tokens: int = 8192
152+
logprobs_mode: Optional[str] = "processed_logprobs"
152153

153154
# TODO: enable train_kwargs
154155
# train_sampling_config: SamplingConfig = field(default_factory=SamplingConfig)

verl/workers/rollout/vllm_rollout/vllm_async_server.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,7 @@ async def launch_server(self, master_address: str = None, master_port: int = Non
299299
"max_num_batched_tokens": self.config.max_num_batched_tokens,
300300
"enable_prefix_caching": self.config.enable_prefix_caching,
301301
"enable_sleep_mode": self.config.enable_sleep_mode,
302+
"logprobs_mode": self.config.logprobs_mode,
302303
"disable_custom_all_reduce": True,
303304
"enforce_eager": self.config.enforce_eager,
304305
"gpu_memory_utilization": self.config.gpu_memory_utilization,

0 commit comments

Comments
 (0)