[rollout] fix: remove unexpected concurrency bound at 1000 (verl-project#5402)

tongyuantongyu · web-flow · commit 4ce150f1d62d · 2026-03-02T14:20:46.000+08:00
### What does this PR do? Remove the unexpected concurrency bound of 1000 that preventing rollout engine from reaching `actor_rollout_ref.rollout.max_num_seqs` if it's larger than 1000. [Ray doc](https://docs.ray.io/en/latest/ray-core/api/doc/ray.actor.ActorClass.options.html#ray.actor.ActorClass.options:~:text=calls%20is%20unlimited.-,max_concurrency,-%E2%80%93%20The%20max%20number) says: ``` max_concurrency: The max number of concurrent calls to allow for this actor. This only works with direct actor calls. The max concurrency defaults to 1 for threaded execution, and 1000 for asyncio execution. Note that the execution order is not guaranteed when max_concurrency > 1. ``` and the call to `{TRTLLM,vLLM,SGLang}HttpServer.generate` is an async remote call: https://github.com/verl-project/verl/blob/6f4942b1153b23720e74564e00817526b342198c/verl/experimental/agent_loop/agent_loop.py#L114-L120 So the default value will limit the request concurrency to 1000. This PR sets `max_concurrency` based on `actor_rollout_ref.rollout.max_num_seqs` so that a higher concurrency configured by the user can be achieved. ### Checklist Before Starting - [x] Search for similar PRs. Paste at least one query link here: https://github.com/verl-project/verl/pulls?q=is%3Apr+is%3Aopen+rollout+concurrency+ - [x] Format the PR title as `[{modules}] {type}: {description}` (This will be checked by the CI) - `{modules}` include `fsdp`, `megatron`, `veomni`, `sglang`, `vllm`, `rollout`, `trainer`, `ci`, `training_utils`, `recipe`, `hardware`, `deployment`, `ray`, `worker`, `single_controller`, `misc`, `perf`, `model`, `algo`, `env`, `tool`, `ckpt`, `doc`, `data`, `cfg`, `reward` - If this PR involves multiple modules, separate them with `,` like `[megatron, fsdp, doc]` - `{type}` is in `feat`, `fix`, `refactor`, `chore`, `test` - If this PR breaks any API (CLI arguments, config, function signature, etc.), add `[BREAKING]` to the beginning of the title. - Example: `[BREAKING][fsdp, megatron] feat: dynamic batching` ### Test Current tests should be enough to ensure it does not break anything. ### Checklist Before Submitting > [!IMPORTANT] > Please check all the following items before requesting a review, otherwise the reviewer might deprioritize this PR for review. - [x] Read the [Contribute Guide](https://github.com/volcengine/verl/blob/main/CONTRIBUTING.md). - [x] Apply [pre-commit checks](https://github.com/volcengine/verl/blob/main/CONTRIBUTING.md#code-linting-and-formatting): `pre-commit install && pre-commit run --all-files --show-diff-on-failure --color=always` - [ ] Add / Update [the documentation](https://github.com/volcengine/verl/tree/main/docs). - [ ] Add unit or end-to-end test(s) to [the CI workflow](https://github.com/volcengine/verl/tree/main/.github/workflows) to cover all the code. If not feasible, explain why: ... - [ ] Once your PR is ready for CI, send a message in [the `ci-request` channel](https://verl-project.slack.com/archives/C091TCESWB1) in [the `verl` Slack workspace](https://join.slack.com/t/verl-project/shared_invite/zt-3855yhg8g-CTkqXu~hKojPCmo7k_yXTQ). (If not accessible, please try [the Feishu group (飞书群)](https://applink.larkoffice.com/client/chat/chatter/add_by_link?link_token=772jd4f1-cd91-441e-a820-498c6614126a).) - [ ] If your PR is related to the `recipe` submodule, please also update the reference to the submodule commit via `git submodule update --remote` or `cd recipe && git pull origin main`.
diff --git a/verl/workers/rollout/replica.py b/verl/workers/rollout/replica.py
@@ -31,6 +31,11 @@
 logger = logging.getLogger(__file__)
 
 
+# Max number of concurrent calls to the methods of Rollout,
+# excluding calls to generate method.
+CONTROL_METHOD_CONCURRENCY = 16
+
+
 class TokenOutput(BaseModel):
     token_ids: list[int]
     """response token ids"""
@@ -92,7 +97,7 @@ def __init__(
         is_reward_model: bool = False,
     ) -> None:
         self.replica_rank = replica_rank
-        self.config = omega_conf_to_dataclass(config)
+        self.config: RolloutConfig = omega_conf_to_dataclass(config)
         self.model_config: HFModelConfig = model_config
 
         self.world_size = (
@@ -229,6 +234,12 @@ def server_handle(self) -> ActorHandle:
         """Get rollout server handle for Token-in-token-out generation."""
         return self._server_handle
 
+    @property
+    def max_concurrency(self) -> int:
+        # 1000 is Ray's default max_concurrency for async execution.
+        # Add some margin to account for control method call.
+        return max(1000, self.config.max_num_seqs + CONTROL_METHOD_CONCURRENCY)
+
     def rollout_worker_use_gpu(self) -> bool:
         return True
 
diff --git a/verl/workers/rollout/sglang_rollout/async_sglang_server.py b/verl/workers/rollout/sglang_rollout/async_sglang_server.py
@@ -491,6 +491,7 @@ async def launch_servers(self):
                 ),
                 runtime_env={"env_vars": {f"RAY_EXPERIMENTAL_NOSET_{visible_devices_keyword}": "1"}},
                 name=name,
+                max_concurrency=self.max_concurrency,
             ).remote(
                 config=self.config,
                 model_config=self.model_config,
diff --git a/verl/workers/rollout/trtllm_rollout/trtllm_async_server.py b/verl/workers/rollout/trtllm_rollout/trtllm_async_server.py
@@ -339,6 +339,7 @@ async def launch_servers(self):
             ),
             runtime_env={"env_vars": {"RAY_EXPERIMENTAL_NOSET_CUDA_VISIBLE_DEVICES": "1"}},
             name=name,
+            max_concurrency=self.max_concurrency,
         ).remote(
             config=self.config,
             model_config=self.model_config,
diff --git a/verl/workers/rollout/vllm_rollout/vllm_async_server.py b/verl/workers/rollout/vllm_rollout/vllm_async_server.py
@@ -839,6 +839,7 @@ async def launch_servers(self):
                     }
                 },
                 name=name,
+                max_concurrency=self.max_concurrency,
             ).remote(
                 config=self.config,
                 model_config=self.model_config,

Original file line number	Diff line number	Diff line change
`@@ -839,6 +839,7 @@ async def launch_servers(self):`
`839`	`839`	`}`
`840`	`840`	`},`
`841`	`841`	`name=name,`
	`842`	`+ max_concurrency=self.max_concurrency,`
`842`	`843`	`).remote(`
`843`	`844`	`config=self.config,`
`844`	`845`	`model_config=self.model_config,`