Skip to content

Commit 275f8b9

Browse files
authored
[Serve LLM] Handle missing state attributes from vLLM's task-conditional init_app_state (#60812)
Signed-off-by: Kourosh Hakhamaneshi <kourosh@anyscale.com>
1 parent b93fc26 commit 275f8b9

File tree

1 file changed

+74
-41
lines changed

1 file changed

+74
-41
lines changed

python/ray/llm/_internal/serve/engines/vllm/vllm_engine.py

Lines changed: 74 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -322,13 +322,17 @@ async def start(self) -> None:
322322
args=args,
323323
)
324324

325-
self._oai_models = state.openai_serving_models
326-
self._oai_serving_chat = state.openai_serving_chat
327-
self._oai_serving_completion = state.openai_serving_completion
328-
self._oai_serving_embedding = state.openai_serving_embedding
329-
self._oai_serving_transcription = state.openai_serving_transcription
330-
self._oai_serving_scores = state.openai_serving_scores
331-
self._oai_serving_tokenization = state.openai_serving_tokenization
325+
self._oai_models = getattr(state, "openai_serving_models", None)
326+
self._oai_serving_chat = getattr(state, "openai_serving_chat", None)
327+
self._oai_serving_completion = getattr(state, "openai_serving_completion", None)
328+
self._oai_serving_embedding = getattr(state, "openai_serving_embedding", None)
329+
self._oai_serving_transcription = getattr(
330+
state, "openai_serving_transcription", None
331+
)
332+
self._oai_serving_scores = getattr(state, "openai_serving_scores", None)
333+
self._oai_serving_tokenization = getattr(
334+
state, "openai_serving_tokenization", None
335+
)
332336

333337
self._validate_openai_serving_models()
334338
self._validate_engine_client()
@@ -346,38 +350,53 @@ def _validate_openai_serving_models(self):
346350
self._oai_models, "load_lora_adapter"
347351
), "oai_models must have a load_lora_adapter attribute"
348352

349-
def _validate_openai_serving_chat(self):
350-
assert hasattr(
351-
self._oai_serving_chat, "create_chat_completion"
352-
), "oai_serving_chat must have a create_chat_completion attribute"
353+
@staticmethod
354+
def _make_error(message: str) -> ErrorResponse:
355+
return ErrorResponse(
356+
error=ErrorInfo(message=message, type="invalid_request_error", code=400)
357+
)
353358

354-
def _validate_openai_serving_completion(self):
355-
assert hasattr(
356-
self._oai_serving_completion, "create_completion"
357-
), "oai_serving_completion must have a create_completion attribute"
359+
def _validate_openai_serving_chat(self) -> Optional[ErrorResponse]:
360+
if self._oai_serving_chat is None:
361+
return self._make_error(
362+
"This model does not support the 'generate' task. "
363+
"The chat completion endpoint is not available for this model."
364+
)
358365

359-
def _validate_openai_serving_embedding(self):
360-
assert hasattr(
361-
self._oai_serving_embedding, "create_embedding"
362-
), "oai_serving_embedding must have a create_embedding attribute"
366+
def _validate_openai_serving_completion(self) -> Optional[ErrorResponse]:
367+
if self._oai_serving_completion is None:
368+
return self._make_error(
369+
"This model does not support the 'generate' task. "
370+
"The completion endpoint is not available for this model."
371+
)
363372

364-
def _validate_openai_serving_transcription(self):
365-
assert hasattr(
366-
self._oai_serving_transcription, "create_transcription"
367-
), "oai_serving_transcription must have a create_transcription attribute"
373+
def _validate_openai_serving_embedding(self) -> Optional[ErrorResponse]:
374+
if self._oai_serving_embedding is None:
375+
return self._make_error(
376+
"This model does not support the 'embed' task. "
377+
"The embedding endpoint is not available for this model."
378+
)
368379

369-
def _validate_openai_serving_scores(self):
370-
assert hasattr(
371-
self._oai_serving_scores, "create_score"
372-
), "oai_serving_scores must have a create_score attribute"
380+
def _validate_openai_serving_transcription(self) -> Optional[ErrorResponse]:
381+
if self._oai_serving_transcription is None:
382+
return self._make_error(
383+
"This model does not support the 'transcription' task. "
384+
"The transcription endpoint is not available for this model."
385+
)
373386

374-
def _validate_openai_serving_tokenization(self):
375-
assert hasattr(
376-
self._oai_serving_tokenization, "create_tokenize"
377-
), "oai_serving_tokenization must have a create_tokenize attribute"
378-
assert hasattr(
379-
self._oai_serving_tokenization, "create_detokenize"
380-
), "oai_serving_tokenization must have a create_detokenize attribute"
387+
def _validate_openai_serving_scores(self) -> Optional[ErrorResponse]:
388+
if self._oai_serving_scores is None:
389+
return self._make_error(
390+
"This model does not support the 'score' task. "
391+
"The score endpoint is not available for this model."
392+
)
393+
394+
def _validate_openai_serving_tokenization(self) -> Optional[ErrorResponse]:
395+
if self._oai_serving_tokenization is None:
396+
return self._make_error(
397+
"This model does not support the 'tokenization' task. "
398+
"The tokenization endpoint is not available for this model."
399+
)
381400

382401
def _validate_engine_client(self):
383402
assert hasattr(
@@ -486,7 +505,9 @@ async def chat(
486505
request: ChatCompletionRequest,
487506
raw_request_info: Optional[RawRequestInfo] = None,
488507
) -> AsyncGenerator[Union[str, ChatCompletionResponse, ErrorResponse], None]:
489-
self._validate_openai_serving_chat()
508+
if error := self._validate_openai_serving_chat():
509+
yield error
510+
return
490511

491512
raw_request: Optional[Request] = RawRequestInfo.to_starlette_request_optional(
492513
raw_request_info
@@ -514,7 +535,9 @@ async def completions(
514535
request: CompletionRequest,
515536
raw_request_info: Optional[RawRequestInfo] = None,
516537
) -> AsyncGenerator[Union[str, CompletionResponse, ErrorResponse], None]:
517-
self._validate_openai_serving_completion()
538+
if error := self._validate_openai_serving_completion():
539+
yield error
540+
return
518541

519542
raw_request: Optional[Request] = RawRequestInfo.to_starlette_request_optional(
520543
raw_request_info
@@ -544,7 +567,9 @@ async def embeddings(
544567
request: EmbeddingRequest,
545568
raw_request_info: Optional[RawRequestInfo] = None,
546569
) -> AsyncGenerator[Union[EmbeddingResponse, ErrorResponse], None]:
547-
self._validate_openai_serving_embedding()
570+
if error := self._validate_openai_serving_embedding():
571+
yield error
572+
return
548573

549574
raw_request: Optional[Request] = RawRequestInfo.to_starlette_request_optional(
550575
raw_request_info
@@ -566,7 +591,9 @@ async def transcriptions(
566591
request: TranscriptionRequest,
567592
raw_request_info: Optional[RawRequestInfo] = None,
568593
) -> AsyncGenerator[Union[str, TranscriptionResponse, ErrorResponse], None]:
569-
self._validate_openai_serving_transcription()
594+
if error := self._validate_openai_serving_transcription():
595+
yield error
596+
return
570597

571598
# Extract audio data from the request file
572599
audio_data = await request.file.read()
@@ -600,7 +627,9 @@ async def score(
600627
request: ScoreRequest,
601628
raw_request_info: Optional[RawRequestInfo] = None,
602629
) -> AsyncGenerator[Union[ScoreResponse, ErrorResponse], None]:
603-
self._validate_openai_serving_scores()
630+
if error := self._validate_openai_serving_scores():
631+
yield error
632+
return
604633

605634
raw_request: Optional[Request] = RawRequestInfo.to_starlette_request_optional(
606635
raw_request_info
@@ -620,7 +649,9 @@ async def tokenize(
620649
request: TokenizeRequest,
621650
raw_request_info: Optional[RawRequestInfo] = None,
622651
) -> AsyncGenerator[Union[TokenizeResponse, ErrorResponse], None]:
623-
self._validate_openai_serving_tokenization()
652+
if error := self._validate_openai_serving_tokenization():
653+
yield error
654+
return
624655

625656
raw_request: Optional[Request] = RawRequestInfo.to_starlette_request_optional(
626657
raw_request_info
@@ -640,7 +671,9 @@ async def detokenize(
640671
request: DetokenizeRequest,
641672
raw_request_info: Optional[RawRequestInfo] = None,
642673
) -> AsyncGenerator[Union[DetokenizeResponse, ErrorResponse], None]:
643-
self._validate_openai_serving_tokenization()
674+
if error := self._validate_openai_serving_tokenization():
675+
yield error
676+
return
644677

645678
raw_request: Optional[Request] = RawRequestInfo.to_starlette_request_optional(
646679
raw_request_info

0 commit comments

Comments
 (0)