|
| 1 | +# Copyright The Marin Authors |
| 2 | +# SPDX-License-Identifier: Apache-2.0 |
| 3 | + |
| 4 | +from __future__ import annotations |
| 5 | + |
| 6 | +from dataclasses import dataclass |
| 7 | +from typing import Any, Protocol |
| 8 | + |
| 9 | +from openai import OpenAI |
| 10 | +from openai.types.chat import ChatCompletion |
| 11 | + |
| 12 | + |
| 13 | +@dataclass(frozen=True) |
| 14 | +class ChatCompletionRequest: |
| 15 | + """OpenAI-compatible chat completion request parameters.""" |
| 16 | + |
| 17 | + messages: tuple[dict[str, str], ...] |
| 18 | + num_completions: int |
| 19 | + temperature: float |
| 20 | + top_p: float = 1.0 |
| 21 | + max_tokens: int | None = None |
| 22 | + seed: int | None = None |
| 23 | + logprobs: bool = False |
| 24 | + |
| 25 | + def __post_init__(self) -> None: |
| 26 | + if self.num_completions <= 0: |
| 27 | + raise ValueError("num_completions must be positive") |
| 28 | + if self.temperature < 0: |
| 29 | + raise ValueError("temperature must be non-negative") |
| 30 | + if not 0 < self.top_p <= 1.0: |
| 31 | + raise ValueError("top_p must be in the interval (0, 1]") |
| 32 | + if self.max_tokens is not None and self.max_tokens <= 0: |
| 33 | + raise ValueError("max_tokens must be positive when set") |
| 34 | + |
| 35 | + |
| 36 | +class CompletionProvider(Protocol): |
| 37 | + """Protocol for chat completion backends used by inference clients.""" |
| 38 | + |
| 39 | + def complete_messages(self, request: ChatCompletionRequest) -> ChatCompletion: |
| 40 | + """Return an OpenAI-compatible chat completion response.""" |
| 41 | + |
| 42 | + |
| 43 | +class OpenAIChatCompletionProvider: |
| 44 | + """Minimal synchronous OpenAI-compatible completion provider.""" |
| 45 | + |
| 46 | + def __init__( |
| 47 | + self, |
| 48 | + *, |
| 49 | + server_url: str, |
| 50 | + model: str, |
| 51 | + api_key: str = "marin-tts", |
| 52 | + timeout: float | None = None, |
| 53 | + extra_request_kwargs: dict[str, Any] | None = None, |
| 54 | + ) -> None: |
| 55 | + self._client = OpenAI(base_url=server_url, api_key=api_key, timeout=timeout) |
| 56 | + self._model = model |
| 57 | + self._extra_request_kwargs = dict(extra_request_kwargs or {}) |
| 58 | + |
| 59 | + def complete_messages(self, request: ChatCompletionRequest) -> ChatCompletion: |
| 60 | + request_kwargs: dict[str, Any] = { |
| 61 | + "model": self._model, |
| 62 | + "messages": list(request.messages), |
| 63 | + "n": request.num_completions, |
| 64 | + "temperature": request.temperature, |
| 65 | + "top_p": request.top_p, |
| 66 | + "logprobs": request.logprobs, |
| 67 | + **self._extra_request_kwargs, |
| 68 | + } |
| 69 | + if request.max_tokens is not None: |
| 70 | + request_kwargs["max_tokens"] = request.max_tokens |
| 71 | + if request.seed is not None: |
| 72 | + request_kwargs["seed"] = request.seed |
| 73 | + |
| 74 | + return self._client.chat.completions.create(**request_kwargs) |
0 commit comments