@@ -78,9 +78,13 @@ async def test_bench_model_success(self, mock_litellm, mocker):
7878 mock_time = mocker .patch ('tacho.ai.time.time' )
7979 mock_time .side_effect = [100.0 , 102.5 ] # 2.5 second duration
8080
81- # Configure mock response with usage data
81+ # Configure mock response with usage data (no reasoning tokens)
8282 mock_response = MagicMock ()
83- mock_response .usage .completion_tokens = 150
83+ mock_usage = MagicMock ()
84+ mock_usage .completion_tokens = 150
85+ # Explicitly configure to not have completion_tokens_details
86+ mock_usage .completion_tokens_details = None
87+ mock_response .usage = mock_usage
8488 mock_litellm .return_value = mock_response
8589
8690 duration , tokens = await bench_model ("gpt-4" , 500 )
@@ -120,4 +124,55 @@ async def test_bench_model_exception_handling(self, mock_litellm):
120124 mock_litellm .side_effect = Exception ("Network error" )
121125
122126 with pytest .raises (Exception , match = "Network error" ):
123- await bench_model ("gpt-4" , 500 )
127+ await bench_model ("gpt-4" , 500 )
128+
129+ @pytest .mark .asyncio
130+ async def test_bench_model_with_reasoning_tokens (self , mock_litellm , mocker ):
131+ """Test benchmark with reasoning models that have completion_tokens_details"""
132+ # Mock time
133+ mock_time = mocker .patch ('tacho.ai.time.time' )
134+ mock_time .side_effect = [100.0 , 103.0 ] # 3 second duration
135+
136+ # Configure mock response with reasoning tokens
137+ mock_response = MagicMock ()
138+ mock_response .usage .completion_tokens = 50 # Regular completion tokens
139+
140+ # Mock completion_tokens_details with reasoning_tokens
141+ mock_details = MagicMock ()
142+ mock_details .reasoning_tokens = 200 # Reasoning tokens
143+ mock_response .usage .completion_tokens_details = mock_details
144+
145+ mock_litellm .return_value = mock_response
146+
147+ duration , tokens = await bench_model ("o1-mini" , 500 )
148+
149+ # Verify results - should include both completion and reasoning tokens
150+ assert duration == 3.0
151+ assert tokens == 250 # 50 completion + 200 reasoning
152+
153+ # Verify LLM was called correctly
154+ mock_litellm .assert_called_once_with (
155+ "o1-mini" ,
156+ [{"role" : "user" , "content" : BENCHMARK_PROMPT }],
157+ max_tokens = 500
158+ )
159+
160+ @pytest .mark .asyncio
161+ async def test_bench_model_with_empty_completion_details (self , mock_litellm , mocker ):
162+ """Test benchmark when completion_tokens_details exists but has no reasoning_tokens"""
163+ # Mock time
164+ mock_time = mocker .patch ('tacho.ai.time.time' )
165+ mock_time .side_effect = [100.0 , 102.0 ]
166+
167+ # Configure mock response with completion_tokens_details but no reasoning_tokens
168+ mock_response = MagicMock ()
169+ mock_response .usage .completion_tokens = 100
170+ mock_response .usage .completion_tokens_details = MagicMock (spec = []) # No reasoning_tokens attribute
171+
172+ mock_litellm .return_value = mock_response
173+
174+ duration , tokens = await bench_model ("gpt-4" , 500 )
175+
176+ # Should only count regular completion tokens
177+ assert duration == 2.0
178+ assert tokens == 100
0 commit comments