0.7.1, better error handling and display, adjusted tests

pietz · pietz · commit 15bc586257f0 · 2025-06-30T10:50:56.000+02:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,21 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [0.7.1] - 2025-01-30
+
+### Added
+- Comprehensive error handling tests for all major LiteLLM error types
+- User-friendly error messages for different error scenarios:
+  - Authentication errors now show which API key to check
+  - Model not found errors suggest checking the model name
+  - Rate limit errors advise trying again later
+  - Connection errors for Ollama specifically mention starting the server
+  - Generic errors are truncated to 80 characters for readability
+
+### Changed
+- Simplified error handling by focusing on the most common error types
+- Improved error message formatting with consistent capitalization
+
 ## [0.7.0] - 2025-01-30
 
 ### Added
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "tacho"
-version = "0.7.0"
+version = "0.7.1"
 description = "CLI tool for measuring and comparing LLM inference speeds"
 readme = "README.md"
 authors = [
diff --git a/tacho/ai.py b/tacho/ai.py
@@ -1,6 +1,12 @@
 import time
 
 import litellm
+from litellm import (
+    AuthenticationError,
+    NotFoundError,
+    RateLimitError,
+    APIConnectionError,
+)
 
 BENCHMARK_PROMPT = """Generate a ~2000 word summary of the history of the USA."""
 VALIDATION_PROMPT = "Do you have time to help? (yes/no)"
@@ -16,8 +22,42 @@ async def ping_model(model: str, console) -> bool:
         await llm(model, VALIDATION_PROMPT, 1)
         console.print(f"[green]✓[/green] {model}")
         return True
+    except AuthenticationError as e:
+        error_msg = "Authentication Failed."
+        if hasattr(e, "llm_provider") and e.llm_provider:
+            provider = e.llm_provider.upper()
+            if provider == "OPENAI":
+                error_msg += " (OPENAI_API_KEY)"
+            elif provider == "ANTHROPIC":
+                error_msg += " (ANTHROPIC_API_KEY)"
+            elif provider == "GEMINI":
+                error_msg += " (GEMINI_API_KEY)"
+            elif provider == "BEDROCK":
+                error_msg += (
+                    " (AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_REGION_NAME)"
+                )
+        console.print(f"[red]✗[/red] {model} - {error_msg}")
+        return False
+    except NotFoundError:
+        console.print(f"[red]✗[/red] {model} - Model Not Found")
+        return False
+    except RateLimitError:
+        console.print(f"[red]✗[/red] {model} - Rate Limit Exceeded")
+        return False
+    except APIConnectionError as e:
+        error_msg = str(e)
+        if "ollama" in model.lower() and "localhost:11434" in error_msg.lower():
+            console.print(
+                f"[red]✗[/red] {model} - Ollama server not running. Start with 'ollama serve'"
+            )
+        else:
+            console.print(
+                f"[red]✗[/red] {model} - Connection failed. Check network/service availability"
+            )
+        return False
     except Exception as e:
-        console.print(f"[red]✗[/red] {model} - {str(e)}")
+        # Fallback for any other errors
+        console.print(f"[red]✗[/red] {model} - {str(e)[:80]}...")
         return False
 
 
diff --git a/tacho/display.py b/tacho/display.py
@@ -68,7 +68,7 @@ def display_results(models: list[str], runs: int, results: list):
             f"{m[1]:.1f}",
             f"{m[2]:.1f}",
             f"{m[3]:.1f}",
-            f"{m[4]:.1f}s",
+            f"{m[4]:.1f}s",#
             f"{m[5]:.0f}",
         )
 
diff --git a/tests/test_ai.py b/tests/test_ai.py
@@ -1,6 +1,15 @@
 from unittest.mock import MagicMock
 
 import pytest
+from litellm import (
+    AuthenticationError,
+    BadRequestError,
+    NotFoundError,
+    RateLimitError,
+    APIConnectionError,
+    ContextWindowExceededError,
+    ContentPolicyViolationError,
+)
 
 from tacho.ai import llm, ping_model, bench_model, BENCHMARK_PROMPT, VALIDATION_PROMPT
 
@@ -60,9 +69,9 @@ async def test_ping_model_failure(self, mock_litellm):
         # Verify failure
         assert result is False
 
-        # Verify error output
+        # Verify error output - now includes "..." for generic exceptions
         mock_console_instance.print.assert_called_once_with(
-            "[red]✗[/red] invalid-model - API Error"
+            "[red]✗[/red] invalid-model - API Error..."
         )
 
     @pytest.mark.asyncio
@@ -152,3 +161,101 @@ async def test_bench_model_with_empty_completion_details(
         # Should only count regular completion tokens
         assert duration == 2.0
         assert tokens == 100
+
+    @pytest.mark.asyncio
+    async def test_ping_model_authentication_error(self, mock_litellm):
+        """Test ping_model handling of authentication errors"""
+        mock_litellm.side_effect = AuthenticationError(
+            message="Invalid API key provided. You can find your API key at https://platform.openai.com/api-keys.",
+            llm_provider="openai",
+            model="gpt-4o-mini"
+        )
+        mock_console_instance = MagicMock()
+
+        result = await ping_model("gpt-4o-mini", mock_console_instance)
+
+        assert result is False
+        mock_console_instance.print.assert_called_once()
+        call_args = mock_console_instance.print.call_args[0][0]
+        assert "[red]✗[/red] gpt-4o-mini" in call_args
+        assert "Authentication Failed" in call_args
+        assert "OPENAI_API_KEY" in call_args
+
+    @pytest.mark.asyncio
+    async def test_ping_model_not_found_error(self, mock_litellm):
+        """Test ping_model handling of model not found errors"""
+        mock_litellm.side_effect = NotFoundError(
+            message="The model 'gpt-8' does not exist",
+            llm_provider="openai",
+            model="gpt-8"
+        )
+        mock_console_instance = MagicMock()
+
+        result = await ping_model("gpt-8", mock_console_instance)
+
+        assert result is False
+        mock_console_instance.print.assert_called_once()
+        call_args = mock_console_instance.print.call_args[0][0]
+        assert "[red]✗[/red] gpt-8" in call_args
+        assert "Model Not Found" in call_args
+
+    @pytest.mark.asyncio
+    async def test_ping_model_rate_limit_error(self, mock_litellm):
+        """Test ping_model handling of rate limit errors"""
+        mock_litellm.side_effect = RateLimitError(
+            message="Rate limit exceeded. Please retry after 60 seconds",
+            llm_provider="openai",
+            model="gpt-4o-mini"
+        )
+        mock_console_instance = MagicMock()
+
+        result = await ping_model("gpt-4o-mini", mock_console_instance)
+
+        assert result is False
+        mock_console_instance.print.assert_called_once()
+        call_args = mock_console_instance.print.call_args[0][0]
+        assert "[red]✗[/red] gpt-4o-mini" in call_args
+        assert "Rate Limit Exceeded" in call_args
+
+    @pytest.mark.asyncio
+    async def test_ping_model_api_connection_error(self, mock_litellm):
+        """Test ping_model handling of connection errors"""
+        mock_litellm.side_effect = APIConnectionError(
+            message="Failed to connect to Ollama server at localhost:11434",
+            llm_provider="ollama",
+            model="ollama/deepseek-r1"
+        )
+        mock_console_instance = MagicMock()
+
+        result = await ping_model("ollama/deepseek-r1", mock_console_instance)
+
+        assert result is False
+        mock_console_instance.print.assert_called_once()
+        call_args = mock_console_instance.print.call_args[0][0]
+        assert "[red]✗[/red] ollama/deepseek-r1" in call_args
+        assert "Ollama server not running" in call_args
+        assert "ollama serve" in call_args
+
+    @pytest.mark.asyncio
+    async def test_bench_model_authentication_error(self, mock_litellm):
+        """Test that authentication errors propagate from bench_model"""
+        mock_litellm.side_effect = AuthenticationError(
+            message="Invalid API key",
+            llm_provider="openai",
+            model="gpt-4o-mini"
+        )
+
+        with pytest.raises(AuthenticationError, match="Invalid API key"):
+            await bench_model("gpt-4o-mini", 500)
+
+    @pytest.mark.asyncio
+    async def test_bench_model_context_window_error(self, mock_litellm):
+        """Test that context window errors propagate from bench_model"""
+        mock_litellm.side_effect = ContextWindowExceededError(
+            message="This model's maximum context length is 4096 tokens",
+            llm_provider="openai",
+            model="gpt-3.5-turbo"
+        )
+
+        with pytest.raises(ContextWindowExceededError, match="context length"):
+            await bench_model("gpt-3.5-turbo", 5000)
diff --git a/tests/test_providers.py b/tests/test_providers.py
@@ -152,3 +152,121 @@ async def test_invalid_model_handling():
     # Test invalid provider prefix
     with pytest.raises((BadRequestError, NotFoundError)):
         await llm("invalid-provider/gpt-4", "Hi", tokens=1)
+
+
+@pytest.mark.unit
+@pytest.mark.asyncio
+async def test_litellm_error_types():
+    """Test handling of specific LiteLLM error types."""
+    from unittest.mock import AsyncMock, patch
+    
+    # Test AuthenticationError (401) - Invalid API key
+    with patch('litellm.acompletion', new_callable=AsyncMock) as mock_completion:
+        mock_completion.side_effect = AuthenticationError(
+            message="Invalid API key provided",
+            llm_provider="openai",
+            model="gpt-4o-mini"
+        )
+        
+        with pytest.raises(AuthenticationError) as exc_info:
+            await llm("gpt-4o-mini", "Hi", tokens=1)
+        
+        assert "Invalid API key" in str(exc_info.value)
+        assert hasattr(exc_info.value, "llm_provider")
+        assert exc_info.value.llm_provider == "openai"
+    
+    # Test NotFoundError (404) - Model not found
+    with patch('litellm.acompletion', new_callable=AsyncMock) as mock_completion:
+        mock_completion.side_effect = NotFoundError(
+            message="The model 'gpt-8' does not exist",
+            llm_provider="openai",
+            model="gpt-8"
+        )
+        
+        with pytest.raises(NotFoundError) as exc_info:
+            await llm("gpt-8", "Hi", tokens=1)
+        
+        assert "does not exist" in str(exc_info.value)
+    
+    # Test RateLimitError (429) - Rate limit exceeded
+    with patch('litellm.acompletion', new_callable=AsyncMock) as mock_completion:
+        mock_completion.side_effect = RateLimitError(
+            message="Rate limit exceeded. Please retry after 60 seconds",
+            llm_provider="openai",
+            model="gpt-4o-mini"
+        )
+        
+        with pytest.raises(RateLimitError) as exc_info:
+            await llm("gpt-4o-mini", "Hi", tokens=1)
+        
+        assert "Rate limit" in str(exc_info.value)
+    
+    # Test BadRequestError (400) - Invalid request
+    with patch('litellm.acompletion', new_callable=AsyncMock) as mock_completion:
+        mock_completion.side_effect = BadRequestError(
+            message="Invalid request parameters",
+            llm_provider="anthropic",
+            model="claude-sonnet-4"
+        )
+        
+        with pytest.raises(BadRequestError) as exc_info:
+            await llm("claude-sonnet-4", "Hi", tokens=1)
+        
+        assert "Invalid request" in str(exc_info.value)
+    
+    # Test APIConnectionError (500) - Connection issues
+    with patch('litellm.acompletion', new_callable=AsyncMock) as mock_completion:
+        mock_completion.side_effect = APIConnectionError(
+            message="Failed to connect to API server",
+            llm_provider="ollama",
+            model="ollama/deepseek-r1"
+        )
+        
+        with pytest.raises(APIConnectionError) as exc_info:
+            await llm("ollama/deepseek-r1", "Hi", tokens=1)
+        
+        assert "Failed to connect" in str(exc_info.value)
+
+
+@pytest.mark.unit
+@pytest.mark.asyncio
+async def test_context_window_exceeded_error():
+    """Test specific handling of context window exceeded errors."""
+    from unittest.mock import AsyncMock, patch
+    from litellm import ContextWindowExceededError
+    
+    with patch('litellm.acompletion', new_callable=AsyncMock) as mock_completion:
+        mock_completion.side_effect = ContextWindowExceededError(
+            message="This model's maximum context length is 4096 tokens",
+            model="gpt-3.5-turbo",
+            llm_provider="openai"
+        )
+        
+        with pytest.raises(ContextWindowExceededError) as exc_info:
+            await llm("gpt-3.5-turbo", "Very long prompt..." * 1000, tokens=1000)
+        
+        assert "context length" in str(exc_info.value).lower()
+        # ContextWindowExceededError is a subclass of BadRequestError
+        assert isinstance(exc_info.value, BadRequestError)
+
+
+@pytest.mark.unit
+@pytest.mark.asyncio
+async def test_content_policy_violation_error():
+    """Test specific handling of content policy violation errors."""
+    from unittest.mock import AsyncMock, patch
+    from litellm import ContentPolicyViolationError
+    
+    with patch('litellm.acompletion', new_callable=AsyncMock) as mock_completion:
+        mock_completion.side_effect = ContentPolicyViolationError(
+            message="Your request was rejected due to content policy violations",
+            model="gpt-4o-mini",
+            llm_provider="openai"
+        )
+        
+        with pytest.raises(ContentPolicyViolationError) as exc_info:
+            await llm("gpt-4o-mini", "Inappropriate content", tokens=100)
+        
+        assert "content policy" in str(exc_info.value).lower()
+        # ContentPolicyViolationError is a subclass of BadRequestError
+        assert isinstance(exc_info.value, BadRequestError)
diff --git a/uv.lock b/uv.lock

Original file line number	Diff line number	Diff line change
`@@ -68,7 +68,7 @@ def display_results(models: list[str], runs: int, results: list):`
`68`	`68`	`f"{m[1]:.1f}",`
`69`	`69`	`f"{m[2]:.1f}",`
`70`	`70`	`f"{m[3]:.1f}",`
`71`		`- f"{m[4]:.1f}s",`
	`71`	`+ f"{m[4]:.1f}s",#`
`72`	`72`	`f"{m[5]:.0f}",`
`73`	`73`	`)`
`74`	`74`