bug fix

pietz · pietz · commit 86d9f8cec71b · 2025-12-17T22:48:14.000+01:00
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "tacho"
-version = "0.8.7"
+version = "0.8.8"
 description = "CLI tool for measuring and comparing LLM inference speeds"
 readme = "README.md"
 authors = [
diff --git a/tacho/cli.py b/tacho/cli.py
@@ -21,6 +21,16 @@ def version_callback(value: bool):
         raise typer.Exit()
 
 
+async def run_cli(models: list[str], runs: int, tokens: int):
+    """Run pings and benchmarks in a single event loop"""
+    res = await run_pings(models)
+    valid_models = [models[i] for i in range(len(models)) if res[i]]
+    if not valid_models:
+        return None, None
+    bench_res = await run_benchmarks(valid_models, runs, tokens)
+    return valid_models, bench_res
+
+
 @app.command()
 def cli(
     models: list[str] = typer.Argument(
@@ -36,11 +46,9 @@ def cli(
     ),
 ):
     """CLI tool for measuring LLM inference speeds"""
-    res = asyncio.run(run_pings(models))
-    valid_models = [models[i] for i in range(len(models)) if res[i]]
-    if not valid_models:
+    valid_models, res = asyncio.run(run_cli(models, runs, tokens))
+    if valid_models is None:
         raise typer.Exit(1)
-    res = asyncio.run(run_benchmarks(valid_models, runs, tokens))
     display_results(valid_models, runs, res)
 
 
diff --git a/tests/test_cli.py b/tests/test_cli.py
@@ -32,24 +32,23 @@ def test_version_callback_not_called(self):
 
     def test_bench_command_success(self, runner, mocker):
         """Test successful benchmark command"""
-        # Test the cli function directly to avoid callback issues
-        mock_asyncio_run = mocker.patch("tacho.cli.asyncio.run")
-
-        # First call to run_pings
-        mock_asyncio_run.side_effect = [
-            [True, True],  # run_pings result
-            [
-                (2.0, 100),
-                (2.1, 102),
-                (2.2, 104),
-                (2.3, 106),
-                (2.4, 108),  # run_benchmarks results
-                (1.5, 95),
-                (1.6, 97),
-                (1.7, 99),
-                (1.8, 101),
-                (1.9, 103),
-            ],
+        mock_run_pings = mocker.patch("tacho.cli.run_pings", new_callable=AsyncMock)
+        mock_run_pings.return_value = [True, True]
+
+        mock_run_benchmarks = mocker.patch(
+            "tacho.cli.run_benchmarks", new_callable=AsyncMock
+        )
+        mock_run_benchmarks.return_value = [
+            (2.0, 100),
+            (2.1, 102),
+            (2.2, 104),
+            (2.3, 106),
+            (2.4, 108),
+            (1.5, 95),
+            (1.6, 97),
+            (1.7, 99),
+            (1.8, 101),
+            (1.9, 103),
         ]
 
         mock_display = mocker.patch("tacho.cli.display_results")
@@ -75,13 +74,13 @@ def test_bench_command_no_valid_models(self, runner, mocker):
 
     def test_bench_command_with_options(self, runner, mocker):
         """Test benchmark command with custom options"""
-        mock_asyncio_run = mocker.patch("tacho.cli.asyncio.run")
+        mock_run_pings = mocker.patch("tacho.cli.run_pings", new_callable=AsyncMock)
+        mock_run_pings.return_value = [True]
 
-        # Configure asyncio.run mock responses
-        mock_asyncio_run.side_effect = [
-            [True],  # run_pings result
-            [(2.0 + i * 0.1, 100 + i) for i in range(10)],  # run_benchmarks results
-        ]
+        mock_run_benchmarks = mocker.patch(
+            "tacho.cli.run_benchmarks", new_callable=AsyncMock
+        )
+        mock_run_benchmarks.return_value = [(2.0 + i * 0.1, 100 + i) for i in range(10)]
 
         mock_display = mocker.patch("tacho.cli.display_results")
 
@@ -96,18 +95,22 @@ def test_bench_command_with_options(self, runner, mocker):
 
     def test_cli_with_partial_valid_models(self, runner, mocker):
         """Test CLI when some models fail validation"""
-        mock_asyncio_run = mocker.patch("tacho.cli.asyncio.run")
-        mock_asyncio_run.side_effect = [
-            [True, False, True],  # run_pings: gpt-4 valid, invalid fails, claude-3 valid
-            [(2.0, 100), (1.8, 95)],  # run_benchmarks for valid models
-        ]
+        mock_run_pings = mocker.patch("tacho.cli.run_pings", new_callable=AsyncMock)
+        mock_run_pings.return_value = [True, False, True]
+
+        mock_run_benchmarks = mocker.patch(
+            "tacho.cli.run_benchmarks", new_callable=AsyncMock
+        )
+        mock_run_benchmarks.return_value = [(2.0, 100), (1.8, 95)]
+
         mock_display = mocker.patch("tacho.cli.display_results")
 
         # Call cli with mixed valid/invalid models
         cli(["gpt-4", "invalid", "claude-3"], runs=1, tokens=250)
 
         # Should not raise Exit since at least one model succeeded
-        assert mock_asyncio_run.call_count == 2
+        mock_run_pings.assert_called_once()
+        mock_run_benchmarks.assert_called_once()
         mock_display.assert_called_once()
 
     def test_cli_all_models_fail(self, runner, mocker):
@@ -147,23 +150,19 @@ def test_help_command(self, runner):
 
     def test_cli_function_partial_valid_models(self, mocker):
         """Test cli function filters out invalid models"""
-        mock_asyncio_run = mocker.patch("tacho.cli.asyncio.run")
-
-        # Configure responses: first ping returns mixed results
-        mock_asyncio_run.side_effect = [
-            [
-                True,
-                False,
-                True,
-            ],  # run_pings: gpt-4 valid, invalid fails, claude-3 valid
-            [
-                (2.0, 100),
-                (2.1, 102),
-                (2.2, 104),  # Results for gpt-4 (3 runs)
-                (1.8, 95),
-                (1.9, 97),
-                (2.0, 99),
-            ],  # Results for claude-3 (3 runs)
+        mock_run_pings = mocker.patch("tacho.cli.run_pings", new_callable=AsyncMock)
+        mock_run_pings.return_value = [True, False, True]
+
+        mock_run_benchmarks = mocker.patch(
+            "tacho.cli.run_benchmarks", new_callable=AsyncMock
+        )
+        mock_run_benchmarks.return_value = [
+            (2.0, 100),
+            (2.1, 102),
+            (2.2, 104),
+            (1.8, 95),
+            (1.9, 97),
+            (2.0, 99),
         ]
 
         mock_display = mocker.patch("tacho.cli.display_results")
diff --git a/uv.lock b/uv.lock