|
1 | 1 | from enum import StrEnum |
2 | 2 | from http import HTTPMethod |
3 | | -from typing import Annotated |
| 3 | +from typing import Annotated, Literal |
4 | 4 |
|
5 | 5 | import pycountry |
6 | | -from pydantic import BaseModel, Field |
| 6 | +from pydantic import Field |
7 | 7 |
|
8 | | -from api.domain import EntitiesPage |
| 8 | +from api.domain import BaseModel, EntitiesPage |
9 | 9 | from api.domain.embeddings.entities import CreateEmbeddingsBody, Embeddings |
10 | 10 | from api.domain.model.entities import Models, ModelType |
11 | 11 | from api.domain.rerank.entities import CreateRerankBody, Rerank |
@@ -33,6 +33,11 @@ class QoSMetric(StrEnum): |
33 | 33 | PERFORMANCE = "performance" # custom performance metric |
34 | 34 |
|
35 | 35 |
|
| 36 | +class BasicAuth(BaseModel): |
| 37 | + username: str |
| 38 | + password: str |
| 39 | + |
| 40 | + |
36 | 41 | class ProviderType(StrEnum): |
37 | 42 | ALBERT = "albert" |
38 | 43 | OPENAI = "openai" |
@@ -96,6 +101,7 @@ class Provider(BaseModel): |
96 | 101 | type: ProviderType |
97 | 102 | url: str |
98 | 103 | key: str | None = None |
| 104 | + basic_auth: BasicAuth | None = None |
99 | 105 | timeout: int |
100 | 106 | model_name: str |
101 | 107 | model_hosting_zone: HostingZone = HostingZone.WOR |
@@ -146,24 +152,31 @@ class ProviderOriginalRequest(BaseModel): |
146 | 152 | files: Annotated[dict | None, Field(default=None, description="The files to use for the request.")] |
147 | 153 |
|
148 | 154 |
|
| 155 | +class ResponseMetrics(BaseModel): |
| 156 | + latency: Annotated[int, Field(default=0, description="The latency of the response.")] |
| 157 | + ttft: Annotated[int | None, Field(default=None, description="The time to first byte of the response.")] |
| 158 | + |
| 159 | + |
149 | 160 | class ProviderFormattedRequest(BaseModel): |
150 | 161 | method: Annotated[HTTPMethod, Field(description="The HTTP method to build the request.")] |
151 | 162 | url: Annotated[str, Field(description="The model API URL to build the request.")] |
| 163 | + auth: Annotated[BasicAuth | None, Field(default=None, description="The authentication to use for the request.")] |
152 | 164 | body: Annotated[dict, Field(default={}, description="The JSON body to use for the request.")] |
153 | 165 | form: Annotated[dict, Field(default={}, description="The form-encoded data to use for the request.")] |
154 | 166 | files: Annotated[dict, Field(default={}, description="The files to use for the request.")] |
155 | 167 |
|
156 | 168 |
|
157 | | -class ResponseMetrics(BaseModel): |
158 | | - latency: Annotated[int, Field(default=0, description="The latency of the response.")] |
159 | | - ttft: Annotated[int | None, Field(default=None, description="The time to first byte of the response.")] |
160 | | - |
161 | | - |
162 | 169 | class ProviderOriginalResponse(BaseModel): |
163 | | - data: Annotated[dict | list, Field(default={}, description="The JSON data to use for the response.")] |
| 170 | + data: Annotated[dict | list | None, Field(default=None, description="The JSON data to use for the response.")] |
164 | 171 | text: Annotated[str | None, Field(default=None, description="The text data to use for the response.")] |
165 | 172 |
|
166 | 173 |
|
| 174 | +class ProviderMetrics(BaseModel): |
| 175 | + object: Literal["providerMetrics"] = "providerMetrics" |
| 176 | + waiting_requests: float |
| 177 | + running_requests: float |
| 178 | + |
| 179 | + |
167 | 180 | class ProviderFormattedResponse(BaseModel): |
168 | | - data: Annotated[AudioTranscription | ChatCompletion | ChatCompletionChunk | Embeddings | Models | OCR | Rerank | None, Field(default=None, description="The JSON data to use for the response.")] # fmt: off |
| 181 | + data: Annotated[AudioTranscription | ChatCompletion | ChatCompletionChunk | Embeddings | Models | OCR | ProviderMetrics | Rerank | None, Field(default=None, description="The JSON data to use for the response.")] # fmt: off |
169 | 182 | text: Annotated[str | None, Field(default=None, description="The text data to use for the response.")] |
0 commit comments