11import json
2- import backoff
32import aiohttp
43from typing import Dict , Union
54from backend .models .protocols import (
65 ModelResponse ,
7- RetryConstantError ,
8- RetryExpoError ,
9- UnknownLLMError ,
6+ BackendHTTPError ,
107 LLMRequest ,
118 LLMCompletionsRequest ,
129)
@@ -122,18 +119,6 @@ async def response_generator_raw(response):
122119 active_requests -= 1
123120
124121
125- def handle_llm_exception (e : Exception ):
126- if isinstance (e , aiohttp .ClientResponseError ):
127- if e .status in [408 , 429 , 500 , 502 , 503 , 504 ]:
128- raise RetryExpoError (f"HTTP { e .status } : { e .message } " ) from e
129- else :
130- raise RetryConstantError (f"HTTP { e .status } : { e .message } " ) from e
131- elif isinstance (e , (aiohttp .ClientError , aiohttp .ServerTimeoutError )):
132- raise RetryConstantError (str (e )) from e
133- else :
134- raise UnknownLLMError from e
135-
136-
137122class StreamWrapper :
138123 def __init__ (self , gen , headers = None ):
139124 self .gen = gen
@@ -173,11 +158,7 @@ async def _execute_http_request(
173158 text = str (resp .status )
174159 await req_cm .__aexit__ (None , None , None )
175160 await session .close ()
176-
177- if resp .status in [429 , 500 , 502 , 503 , 504 ]:
178- raise RetryExpoError (f"HTTP { resp .status } : { text } " )
179- else :
180- raise RetryConstantError (f"HTTP { resp .status } : { text } " )
161+ raise BackendHTTPError (status_code = resp .status , body = text )
181162
182163 response_headers = dict (resp .headers )
183164 if stream :
@@ -274,26 +255,18 @@ async def _shared_proxy_handler(
274255
275256 return resp
276257
277- except Exception as e :
258+ except BackendHTTPError :
278259 active_requests -= 1
279260 if not session .closed :
280261 await session .close ()
281- handle_llm_exception (e )
262+ raise
263+ except Exception :
264+ active_requests -= 1
265+ if not session .closed :
266+ await session .close ()
267+ raise
282268
283269
284- @backoff .on_exception (
285- wait_gen = backoff .constant ,
286- exception = RetryConstantError ,
287- max_tries = 3 ,
288- interval = 3 ,
289- )
290- @backoff .on_exception (
291- wait_gen = backoff .expo ,
292- exception = RetryExpoError ,
293- jitter = backoff .full_jitter ,
294- max_value = 100 ,
295- factor = 1.5 ,
296- )
297270async def llm_proxy (endpoint , api_key , request : LLMRequest ) -> ModelResponse :
298271 return await _shared_proxy_handler (
299272 endpoint = endpoint ,
@@ -306,19 +279,6 @@ async def llm_proxy(endpoint, api_key, request: LLMRequest) -> ModelResponse:
306279 )
307280
308281
309- @backoff .on_exception (
310- wait_gen = backoff .constant ,
311- exception = RetryConstantError ,
312- max_tries = 3 ,
313- interval = 3 ,
314- )
315- @backoff .on_exception (
316- wait_gen = backoff .expo ,
317- exception = RetryExpoError ,
318- jitter = backoff .full_jitter ,
319- max_value = 100 ,
320- factor = 1.5 ,
321- )
322282async def llm_proxy_completions (
323283 endpoint , api_key , request : LLMCompletionsRequest
324284) -> ModelResponse :
@@ -333,19 +293,6 @@ async def llm_proxy_completions(
333293 )
334294
335295
336- @backoff .on_exception (
337- wait_gen = backoff .constant ,
338- exception = RetryConstantError ,
339- max_tries = 3 ,
340- interval = 3 ,
341- )
342- @backoff .on_exception (
343- wait_gen = backoff .expo ,
344- exception = RetryExpoError ,
345- jitter = backoff .full_jitter ,
346- max_value = 100 ,
347- factor = 1.5 ,
348- )
349296async def llm_proxy_embeddings (endpoint , api_key , ** kwargs ) -> ModelResponse :
350297 embedding_params = {
351298 "model" : kwargs .get ("model" ),
@@ -368,19 +315,6 @@ async def llm_proxy_embeddings(endpoint, api_key, **kwargs) -> ModelResponse:
368315 )
369316
370317
371- @backoff .on_exception (
372- wait_gen = backoff .constant ,
373- exception = RetryConstantError ,
374- max_tries = 3 ,
375- interval = 3 ,
376- )
377- @backoff .on_exception (
378- wait_gen = backoff .expo ,
379- exception = RetryExpoError ,
380- jitter = backoff .full_jitter ,
381- max_value = 100 ,
382- factor = 1.5 ,
383- )
384318async def llm_proxy_responses (
385319 endpoint , api_key , payload : dict , stream : bool , model : str
386320):
@@ -396,19 +330,6 @@ async def llm_proxy_responses(
396330 )
397331
398332
399- @backoff .on_exception (
400- wait_gen = backoff .constant ,
401- exception = RetryConstantError ,
402- max_tries = 3 ,
403- interval = 3 ,
404- )
405- @backoff .on_exception (
406- wait_gen = backoff .expo ,
407- exception = RetryExpoError ,
408- jitter = backoff .full_jitter ,
409- max_value = 100 ,
410- factor = 1.5 ,
411- )
412333async def llm_proxy_rerank (endpoint , api_key , payload : dict , model : str ):
413334 return await _shared_proxy_handler (
414335 endpoint = endpoint ,
@@ -422,19 +343,6 @@ async def llm_proxy_rerank(endpoint, api_key, payload: dict, model: str):
422343 )
423344
424345
425- @backoff .on_exception (
426- wait_gen = backoff .constant ,
427- exception = RetryConstantError ,
428- max_tries = 3 ,
429- interval = 3 ,
430- )
431- @backoff .on_exception (
432- wait_gen = backoff .expo ,
433- exception = RetryExpoError ,
434- jitter = backoff .full_jitter ,
435- max_value = 100 ,
436- factor = 1.5 ,
437- )
438346async def llm_proxy_score (endpoint , api_key , payload : dict , model : str ):
439347 return await _shared_proxy_handler (
440348 endpoint = endpoint ,
@@ -448,19 +356,6 @@ async def llm_proxy_score(endpoint, api_key, payload: dict, model: str):
448356 )
449357
450358
451- @backoff .on_exception (
452- wait_gen = backoff .constant ,
453- exception = RetryConstantError ,
454- max_tries = 3 ,
455- interval = 3 ,
456- )
457- @backoff .on_exception (
458- wait_gen = backoff .expo ,
459- exception = RetryExpoError ,
460- jitter = backoff .full_jitter ,
461- max_value = 100 ,
462- factor = 1.5 ,
463- )
464359async def llm_proxy_tokenize (endpoint , api_key , payload : dict , model : str ):
465360 return await _shared_proxy_handler (
466361 endpoint = endpoint ,
@@ -474,19 +369,6 @@ async def llm_proxy_tokenize(endpoint, api_key, payload: dict, model: str):
474369 )
475370
476371
477- @backoff .on_exception (
478- wait_gen = backoff .constant ,
479- exception = RetryConstantError ,
480- max_tries = 3 ,
481- interval = 3 ,
482- )
483- @backoff .on_exception (
484- wait_gen = backoff .expo ,
485- exception = RetryExpoError ,
486- jitter = backoff .full_jitter ,
487- max_value = 100 ,
488- factor = 1.5 ,
489- )
490372async def llm_proxy_detokenize (endpoint , api_key , payload : dict , model : str ):
491373 return await _shared_proxy_handler (
492374 endpoint = endpoint ,
0 commit comments