Skip to content

Commit d0962ba

Browse files
committed
Fail fast on non-retriable LiteLLM status codes
1 parent 33acf35 commit d0962ba

File tree

2 files changed

+119
-0
lines changed

2 files changed

+119
-0
lines changed

src/lighteval/models/endpoints/litellm_model.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838

3939

4040
logger = logging.getLogger(__name__)
41+
NON_RETRIABLE_STATUS_CODES = {401, 403, 404}
4142

4243
if is_package_available("litellm"):
4344
import litellm
@@ -242,6 +243,8 @@ def __call_api(self, prompt, return_logits, max_new_tokens, num_samples, stop_se
242243
logger.warning(f"{error_string}. Returning empty response.")
243244
return LitellmModelResponse()
244245
except Exception as e:
246+
if getattr(e, "status_code", None) in NON_RETRIABLE_STATUS_CODES:
247+
raise
245248
wait_time = min(
246249
64, self.API_RETRY_SLEEP * (self.API_RETRY_MULTIPLIER**attempt)
247250
) # Exponential backoff with max 64s
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
# MIT License
2+
3+
# Copyright (c) 2026 The HuggingFace Team
4+
5+
# Permission is hereby granted, free of charge, to any person obtaining a copy
6+
# of this software and associated documentation files (the "Software"), to deal
7+
# in the Software without restriction, including without limitation the rights
8+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
# copies of the Software, and to permit persons to whom the Software is
10+
# furnished to do so, subject to the following conditions:
11+
12+
# The above copyright notice and this permission notice shall be included in all
13+
# copies or substantial portions of the Software.
14+
15+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
# SOFTWARE.
22+
23+
from types import SimpleNamespace
24+
from unittest.mock import Mock
25+
26+
import pytest
27+
28+
29+
pytest.importorskip("litellm")
30+
31+
from lighteval.models.endpoints.litellm_model import LiteLLMClient, LiteLLMModelConfig, litellm
32+
33+
34+
def make_response(content: str):
35+
return SimpleNamespace(choices=[SimpleNamespace(message=SimpleNamespace(content=content))])
36+
37+
38+
def make_client(cache_dir: str, api_max_retry: int = 3) -> LiteLLMClient:
39+
return LiteLLMClient(
40+
LiteLLMModelConfig(
41+
model_name="openai/gpt-4o-mini",
42+
provider="openai",
43+
api_key="test-key",
44+
api_max_retry=api_max_retry,
45+
cache_dir=cache_dir,
46+
)
47+
)
48+
49+
50+
class TestLiteLLMClientRetries:
51+
def test_rate_limit_error_still_retries(self, monkeypatch, tmp_path):
52+
client = make_client(str(tmp_path), api_max_retry=3)
53+
calls = []
54+
responses = [
55+
litellm.RateLimitError(
56+
message="rate limited",
57+
llm_provider="openai",
58+
model="openai/gpt-4o-mini",
59+
),
60+
litellm.RateLimitError(
61+
message="rate limited",
62+
llm_provider="openai",
63+
model="openai/gpt-4o-mini",
64+
),
65+
make_response("ok"),
66+
]
67+
68+
def fake_completion(**kwargs):
69+
calls.append(kwargs)
70+
response = responses.pop(0)
71+
if isinstance(response, Exception):
72+
raise response
73+
return response
74+
75+
monkeypatch.setattr("lighteval.models.endpoints.litellm_model.litellm.completion", fake_completion)
76+
sleep_calls = []
77+
monkeypatch.setattr("lighteval.models.endpoints.litellm_model.time.sleep", sleep_calls.append)
78+
79+
response = client._LiteLLMClient__call_api(
80+
prompt=[{"role": "user", "content": "hi"}],
81+
return_logits=False,
82+
max_new_tokens=10,
83+
num_samples=1,
84+
stop_sequence=None,
85+
)
86+
87+
assert response.choices[0].message.content == "ok"
88+
assert len(calls) == 3
89+
assert sleep_calls == [1.0, 2.0]
90+
91+
def test_non_retriable_status_code_fails_fast(self, monkeypatch, tmp_path):
92+
client = make_client(str(tmp_path))
93+
calls = []
94+
95+
class FakeException(Exception):
96+
status_code = 401
97+
98+
def fake_completion(**kwargs):
99+
calls.append(kwargs)
100+
raise FakeException("unauthorized")
101+
102+
monkeypatch.setattr("lighteval.models.endpoints.litellm_model.litellm.completion", fake_completion)
103+
sleep = Mock()
104+
monkeypatch.setattr("lighteval.models.endpoints.litellm_model.time.sleep", sleep)
105+
106+
with pytest.raises(FakeException):
107+
client._LiteLLMClient__call_api(
108+
prompt=[{"role": "user", "content": "hi"}],
109+
return_logits=False,
110+
max_new_tokens=10,
111+
num_samples=1,
112+
stop_sequence=None,
113+
)
114+
115+
assert len(calls) == 1
116+
sleep.assert_not_called()

0 commit comments

Comments
 (0)