Skip to content

Commit 15bc586

Browse files
committed
0.7.1, better error handling and display, adjusted tests
1 parent 17e29ee commit 15bc586

File tree

7 files changed

+286
-6
lines changed

7 files changed

+286
-6
lines changed

CHANGELOG.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,21 @@ All notable changes to this project will be documented in this file.
55
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
66
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
77

8+
## [0.7.1] - 2025-01-30
9+
10+
### Added
11+
- Comprehensive error handling tests for all major LiteLLM error types
12+
- User-friendly error messages for different error scenarios:
13+
- Authentication errors now show which API key to check
14+
- Model not found errors suggest checking the model name
15+
- Rate limit errors advise trying again later
16+
- Connection errors for Ollama specifically mention starting the server
17+
- Generic errors are truncated to 80 characters for readability
18+
19+
### Changed
20+
- Simplified error handling by focusing on the most common error types
21+
- Improved error message formatting with consistent capitalization
22+
823
## [0.7.0] - 2025-01-30
924

1025
### Added

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "tacho"
3-
version = "0.7.0"
3+
version = "0.7.1"
44
description = "CLI tool for measuring and comparing LLM inference speeds"
55
readme = "README.md"
66
authors = [

tacho/ai.py

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
11
import time
22

33
import litellm
4+
from litellm import (
5+
AuthenticationError,
6+
NotFoundError,
7+
RateLimitError,
8+
APIConnectionError,
9+
)
410

511
BENCHMARK_PROMPT = """Generate a ~2000 word summary of the history of the USA."""
612
VALIDATION_PROMPT = "Do you have time to help? (yes/no)"
@@ -16,8 +22,42 @@ async def ping_model(model: str, console) -> bool:
1622
await llm(model, VALIDATION_PROMPT, 1)
1723
console.print(f"[green]✓[/green] {model}")
1824
return True
25+
except AuthenticationError as e:
26+
error_msg = "Authentication Failed."
27+
if hasattr(e, "llm_provider") and e.llm_provider:
28+
provider = e.llm_provider.upper()
29+
if provider == "OPENAI":
30+
error_msg += " (OPENAI_API_KEY)"
31+
elif provider == "ANTHROPIC":
32+
error_msg += " (ANTHROPIC_API_KEY)"
33+
elif provider == "GEMINI":
34+
error_msg += " (GEMINI_API_KEY)"
35+
elif provider == "BEDROCK":
36+
error_msg += (
37+
" (AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_REGION_NAME)"
38+
)
39+
console.print(f"[red]✗[/red] {model} - {error_msg}")
40+
return False
41+
except NotFoundError:
42+
console.print(f"[red]✗[/red] {model} - Model Not Found")
43+
return False
44+
except RateLimitError:
45+
console.print(f"[red]✗[/red] {model} - Rate Limit Exceeded")
46+
return False
47+
except APIConnectionError as e:
48+
error_msg = str(e)
49+
if "ollama" in model.lower() and "localhost:11434" in error_msg.lower():
50+
console.print(
51+
f"[red]✗[/red] {model} - Ollama server not running. Start with 'ollama serve'"
52+
)
53+
else:
54+
console.print(
55+
f"[red]✗[/red] {model} - Connection failed. Check network/service availability"
56+
)
57+
return False
1958
except Exception as e:
20-
console.print(f"[red]✗[/red] {model} - {str(e)}")
59+
# Fallback for any other errors
60+
console.print(f"[red]✗[/red] {model} - {str(e)[:80]}...")
2161
return False
2262

2363

tacho/display.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ def display_results(models: list[str], runs: int, results: list):
6868
f"{m[1]:.1f}",
6969
f"{m[2]:.1f}",
7070
f"{m[3]:.1f}",
71-
f"{m[4]:.1f}s",
71+
f"{m[4]:.1f}s",#
7272
f"{m[5]:.0f}",
7373
)
7474

tests/test_ai.py

Lines changed: 109 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,15 @@
11
from unittest.mock import MagicMock
22

33
import pytest
4+
from litellm import (
5+
AuthenticationError,
6+
BadRequestError,
7+
NotFoundError,
8+
RateLimitError,
9+
APIConnectionError,
10+
ContextWindowExceededError,
11+
ContentPolicyViolationError,
12+
)
413

514
from tacho.ai import llm, ping_model, bench_model, BENCHMARK_PROMPT, VALIDATION_PROMPT
615

@@ -60,9 +69,9 @@ async def test_ping_model_failure(self, mock_litellm):
6069
# Verify failure
6170
assert result is False
6271

63-
# Verify error output
72+
# Verify error output - now includes "..." for generic exceptions
6473
mock_console_instance.print.assert_called_once_with(
65-
"[red]✗[/red] invalid-model - API Error"
74+
"[red]✗[/red] invalid-model - API Error..."
6675
)
6776

6877
@pytest.mark.asyncio
@@ -152,3 +161,101 @@ async def test_bench_model_with_empty_completion_details(
152161
# Should only count regular completion tokens
153162
assert duration == 2.0
154163
assert tokens == 100
164+
165+
@pytest.mark.asyncio
166+
async def test_ping_model_authentication_error(self, mock_litellm):
167+
"""Test ping_model handling of authentication errors"""
168+
mock_litellm.side_effect = AuthenticationError(
169+
message="Invalid API key provided. You can find your API key at https://platform.openai.com/api-keys.",
170+
llm_provider="openai",
171+
model="gpt-4o-mini"
172+
)
173+
mock_console_instance = MagicMock()
174+
175+
result = await ping_model("gpt-4o-mini", mock_console_instance)
176+
177+
assert result is False
178+
mock_console_instance.print.assert_called_once()
179+
call_args = mock_console_instance.print.call_args[0][0]
180+
assert "[red]✗[/red] gpt-4o-mini" in call_args
181+
assert "Authentication Failed" in call_args
182+
assert "OPENAI_API_KEY" in call_args
183+
184+
@pytest.mark.asyncio
185+
async def test_ping_model_not_found_error(self, mock_litellm):
186+
"""Test ping_model handling of model not found errors"""
187+
mock_litellm.side_effect = NotFoundError(
188+
message="The model 'gpt-8' does not exist",
189+
llm_provider="openai",
190+
model="gpt-8"
191+
)
192+
mock_console_instance = MagicMock()
193+
194+
result = await ping_model("gpt-8", mock_console_instance)
195+
196+
assert result is False
197+
mock_console_instance.print.assert_called_once()
198+
call_args = mock_console_instance.print.call_args[0][0]
199+
assert "[red]✗[/red] gpt-8" in call_args
200+
assert "Model Not Found" in call_args
201+
202+
@pytest.mark.asyncio
203+
async def test_ping_model_rate_limit_error(self, mock_litellm):
204+
"""Test ping_model handling of rate limit errors"""
205+
mock_litellm.side_effect = RateLimitError(
206+
message="Rate limit exceeded. Please retry after 60 seconds",
207+
llm_provider="openai",
208+
model="gpt-4o-mini"
209+
)
210+
mock_console_instance = MagicMock()
211+
212+
result = await ping_model("gpt-4o-mini", mock_console_instance)
213+
214+
assert result is False
215+
mock_console_instance.print.assert_called_once()
216+
call_args = mock_console_instance.print.call_args[0][0]
217+
assert "[red]✗[/red] gpt-4o-mini" in call_args
218+
assert "Rate Limit Exceeded" in call_args
219+
220+
@pytest.mark.asyncio
221+
async def test_ping_model_api_connection_error(self, mock_litellm):
222+
"""Test ping_model handling of connection errors"""
223+
mock_litellm.side_effect = APIConnectionError(
224+
message="Failed to connect to Ollama server at localhost:11434",
225+
llm_provider="ollama",
226+
model="ollama/deepseek-r1"
227+
)
228+
mock_console_instance = MagicMock()
229+
230+
result = await ping_model("ollama/deepseek-r1", mock_console_instance)
231+
232+
assert result is False
233+
mock_console_instance.print.assert_called_once()
234+
call_args = mock_console_instance.print.call_args[0][0]
235+
assert "[red]✗[/red] ollama/deepseek-r1" in call_args
236+
assert "Ollama server not running" in call_args
237+
assert "ollama serve" in call_args
238+
239+
@pytest.mark.asyncio
240+
async def test_bench_model_authentication_error(self, mock_litellm):
241+
"""Test that authentication errors propagate from bench_model"""
242+
mock_litellm.side_effect = AuthenticationError(
243+
message="Invalid API key",
244+
llm_provider="openai",
245+
model="gpt-4o-mini"
246+
)
247+
248+
with pytest.raises(AuthenticationError, match="Invalid API key"):
249+
await bench_model("gpt-4o-mini", 500)
250+
251+
@pytest.mark.asyncio
252+
async def test_bench_model_context_window_error(self, mock_litellm):
253+
"""Test that context window errors propagate from bench_model"""
254+
mock_litellm.side_effect = ContextWindowExceededError(
255+
message="This model's maximum context length is 4096 tokens",
256+
llm_provider="openai",
257+
model="gpt-3.5-turbo"
258+
)
259+
260+
with pytest.raises(ContextWindowExceededError, match="context length"):
261+
await bench_model("gpt-3.5-turbo", 5000)

tests/test_providers.py

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,3 +152,121 @@ async def test_invalid_model_handling():
152152
# Test invalid provider prefix
153153
with pytest.raises((BadRequestError, NotFoundError)):
154154
await llm("invalid-provider/gpt-4", "Hi", tokens=1)
155+
156+
157+
@pytest.mark.unit
158+
@pytest.mark.asyncio
159+
async def test_litellm_error_types():
160+
"""Test handling of specific LiteLLM error types."""
161+
from unittest.mock import AsyncMock, patch
162+
163+
# Test AuthenticationError (401) - Invalid API key
164+
with patch('litellm.acompletion', new_callable=AsyncMock) as mock_completion:
165+
mock_completion.side_effect = AuthenticationError(
166+
message="Invalid API key provided",
167+
llm_provider="openai",
168+
model="gpt-4o-mini"
169+
)
170+
171+
with pytest.raises(AuthenticationError) as exc_info:
172+
await llm("gpt-4o-mini", "Hi", tokens=1)
173+
174+
assert "Invalid API key" in str(exc_info.value)
175+
assert hasattr(exc_info.value, "llm_provider")
176+
assert exc_info.value.llm_provider == "openai"
177+
178+
# Test NotFoundError (404) - Model not found
179+
with patch('litellm.acompletion', new_callable=AsyncMock) as mock_completion:
180+
mock_completion.side_effect = NotFoundError(
181+
message="The model 'gpt-8' does not exist",
182+
llm_provider="openai",
183+
model="gpt-8"
184+
)
185+
186+
with pytest.raises(NotFoundError) as exc_info:
187+
await llm("gpt-8", "Hi", tokens=1)
188+
189+
assert "does not exist" in str(exc_info.value)
190+
191+
# Test RateLimitError (429) - Rate limit exceeded
192+
with patch('litellm.acompletion', new_callable=AsyncMock) as mock_completion:
193+
mock_completion.side_effect = RateLimitError(
194+
message="Rate limit exceeded. Please retry after 60 seconds",
195+
llm_provider="openai",
196+
model="gpt-4o-mini"
197+
)
198+
199+
with pytest.raises(RateLimitError) as exc_info:
200+
await llm("gpt-4o-mini", "Hi", tokens=1)
201+
202+
assert "Rate limit" in str(exc_info.value)
203+
204+
# Test BadRequestError (400) - Invalid request
205+
with patch('litellm.acompletion', new_callable=AsyncMock) as mock_completion:
206+
mock_completion.side_effect = BadRequestError(
207+
message="Invalid request parameters",
208+
llm_provider="anthropic",
209+
model="claude-sonnet-4"
210+
)
211+
212+
with pytest.raises(BadRequestError) as exc_info:
213+
await llm("claude-sonnet-4", "Hi", tokens=1)
214+
215+
assert "Invalid request" in str(exc_info.value)
216+
217+
# Test APIConnectionError (500) - Connection issues
218+
with patch('litellm.acompletion', new_callable=AsyncMock) as mock_completion:
219+
mock_completion.side_effect = APIConnectionError(
220+
message="Failed to connect to API server",
221+
llm_provider="ollama",
222+
model="ollama/deepseek-r1"
223+
)
224+
225+
with pytest.raises(APIConnectionError) as exc_info:
226+
await llm("ollama/deepseek-r1", "Hi", tokens=1)
227+
228+
assert "Failed to connect" in str(exc_info.value)
229+
230+
231+
@pytest.mark.unit
232+
@pytest.mark.asyncio
233+
async def test_context_window_exceeded_error():
234+
"""Test specific handling of context window exceeded errors."""
235+
from unittest.mock import AsyncMock, patch
236+
from litellm import ContextWindowExceededError
237+
238+
with patch('litellm.acompletion', new_callable=AsyncMock) as mock_completion:
239+
mock_completion.side_effect = ContextWindowExceededError(
240+
message="This model's maximum context length is 4096 tokens",
241+
model="gpt-3.5-turbo",
242+
llm_provider="openai"
243+
)
244+
245+
with pytest.raises(ContextWindowExceededError) as exc_info:
246+
await llm("gpt-3.5-turbo", "Very long prompt..." * 1000, tokens=1000)
247+
248+
assert "context length" in str(exc_info.value).lower()
249+
# ContextWindowExceededError is a subclass of BadRequestError
250+
assert isinstance(exc_info.value, BadRequestError)
251+
252+
253+
@pytest.mark.unit
254+
@pytest.mark.asyncio
255+
async def test_content_policy_violation_error():
256+
"""Test specific handling of content policy violation errors."""
257+
from unittest.mock import AsyncMock, patch
258+
from litellm import ContentPolicyViolationError
259+
260+
with patch('litellm.acompletion', new_callable=AsyncMock) as mock_completion:
261+
mock_completion.side_effect = ContentPolicyViolationError(
262+
message="Your request was rejected due to content policy violations",
263+
model="gpt-4o-mini",
264+
llm_provider="openai"
265+
)
266+
267+
with pytest.raises(ContentPolicyViolationError) as exc_info:
268+
await llm("gpt-4o-mini", "Inappropriate content", tokens=100)
269+
270+
assert "content policy" in str(exc_info.value).lower()
271+
# ContentPolicyViolationError is a subclass of BadRequestError
272+
assert isinstance(exc_info.value, BadRequestError)

uv.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)