-
Notifications
You must be signed in to change notification settings - Fork 7.7k
[serve.llm] Delegate P/D orchestration to the KV-connector backend #63950
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
beb5436
b2fd6e5
e3ffc5a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,5 +1,5 @@ | ||
| import copy | ||
| from typing import TYPE_CHECKING | ||
| from typing import TYPE_CHECKING, List | ||
|
|
||
| from ray.llm._internal.serve.engines.vllm.kv_transfer.base import ( | ||
| BaseConnectorBackend, | ||
|
|
@@ -13,8 +13,18 @@ | |
|
|
||
|
|
||
| class MultiConnectorBackend(BaseConnectorBackend): | ||
| """Wraps multiple sub-connectors. | ||
|
|
||
| The P/D protocol (``prepare_prefill_request`` / ``prepare_decode_request`` and | ||
| the ``requires_peer_binding`` / ``concurrent_handoff`` policy) is delegated to | ||
| the *first* (top-most) sub-connector listed in ``connectors`` — that | ||
| connector's policy governs request shaping and handoff for the group. Each | ||
| sub-connector's ``setup()`` still runs. | ||
| """ | ||
|
|
||
| def __init__(self, llm_config: "LLMConfig"): | ||
| super().__init__(llm_config) | ||
| self._connector_backends: List[BaseConnectorBackend] = [] | ||
|
|
||
| def setup(self) -> None: | ||
| """Setup all connectors listed in the kv_transfer_config.""" | ||
|
|
@@ -49,3 +59,29 @@ def setup(self) -> None: | |
| connector_backend_str, sub_llm_config | ||
| ) | ||
| connector_backend.setup() | ||
| self._connector_backends.append(connector_backend) | ||
|
|
||
| @property | ||
| def _primary(self) -> BaseConnectorBackend: | ||
| """The top-most sub-connector, whose protocol governs the group.""" | ||
| if not self._connector_backends: | ||
| raise ValueError( | ||
| "MultiConnectorBackend has no sub-connectors; was setup() called?" | ||
| ) | ||
| return self._connector_backends[0] | ||
|
|
||
| @property | ||
| def requires_peer_binding(self) -> bool: | ||
| return bool(self._connector_backends) and self._primary.requires_peer_binding | ||
|
|
||
| @property | ||
| def concurrent_handoff(self) -> bool: | ||
| return bool(self._connector_backends) and self._primary.concurrent_handoff | ||
|
|
||
| def prepare_prefill_request(self, *, request, peer): | ||
| return self._primary.prepare_prefill_request(request=request, peer=peer) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Empty MultiConnector crashes prepare callsMedium Severity If Reviewed by Cursor Bugbot for commit e3ffc5a. Configure here. |
||
|
|
||
| def prepare_decode_request(self, *, request, peer, prefill_response): | ||
| return self._primary.prepare_decode_request( | ||
| request=request, peer=peer, prefill_response=prefill_response | ||
| ) | ||


Uh oh!
There was an error while loading. Please reload this page.