Added finish_reason to LLMResult (#410)

jamesbraza · web-flow · commit 18d16c885ef9 · 2026-02-19T14:17:11.000-08:00
diff --git a/packages/lmi/src/lmi/llms.py b/packages/lmi/src/lmi/llms.py
@@ -1044,6 +1044,7 @@ async def acompletion(self, messages: list[Message], **kwargs) -> list[LLMResult
                     cost=cost,
                     system_fingerprint=completions.system_fingerprint,
                     reasoning_content=reasoning_content,
+                    finish_reason=choice.finish_reason,
                 )
             )
         return results
@@ -1081,6 +1082,7 @@ async def acompletion_iter(
         outputs = []
         logprobs = []
         role = None
+        finish_reason: str | None = None
         reasoning_content = []
         used_model = None
         async for completion in stream_completions:
@@ -1094,6 +1096,9 @@ async def acompletion_iter(
                 logprobs.append(logprob_content[0].logprob or 0)
             outputs.append(delta.content or "")
             role = delta.role or role
+            # The usage-only chunk (when include_usage=True) has finish_reason=None,
+            # so retain the last non-None finish_reason value
+            finish_reason = choice.finish_reason or finish_reason
             if hasattr(delta, "reasoning_content"):
                 reasoning_content.append(delta.reasoning_content or "")
         text = "".join(outputs)
@@ -1123,6 +1128,7 @@ async def acompletion_iter(
             cache_read_tokens=cache_read,
             cache_creation_tokens=cache_creation,
             cost=cost,
+            finish_reason=finish_reason,
         )
 
         if text:
diff --git a/packages/lmi/src/lmi/types.py b/packages/lmi/src/lmi/types.py
@@ -123,6 +123,13 @@ class LLMResult(BaseModel):
     reasoning_content: str | None = Field(
         default=None, description="Reasoning content from LLMs such as DeepSeek-R1."
     )
+    finish_reason: str | None = Field(
+        default=None,
+        description=(
+            "The reason the model stopped generating tokens, or None if not available."
+        ),
+        examples=["stop", "length", "tool_calls", "refusal"],
+    )
 
     def __str__(self) -> str:
         return self.text or ""
diff --git a/packages/lmi/tests/test_llms.py b/packages/lmi/tests/test_llms.py
@@ -381,12 +381,14 @@ def accum(x) -> None:
         assert completion.completion_count > 0
         assert str(completion) == "".join(outputs)
         assert completion.cost > 0
+        assert completion.finish_reason == "stop"
 
         completion = await llm.call_single(
             messages=messages,
         )
         assert completion.seconds_to_last_token > 0
         assert completion.cost > 0
+        assert completion.finish_reason == "stop"
 
         # check with mixed callbacks
         async def ac(x) -> None:
@@ -507,13 +509,15 @@ def _build_mock_completion(
             delta_content: str = "",
             delta_reasoning_content: str = "hmmm",
             delta_role: str = "assistant",
+            finish_reason: str = "unknown",
             usage: Any = None,
         ) -> Mock:
             return Mock(
                 model=model,
                 choices=[
                     Mock(
                         logprobs=logprobs,
+                        finish_reason=finish_reason,
                         delta=Mock(
                             content=delta_content,
                             reasoning_content=delta_reasoning_content,
@@ -545,9 +549,10 @@ def _build_mock_completion(
                 logprobs=Mock(content=[Mock(logprob=-0.5)])
             )
 
-            # Mock completion with usage info
+            # Mock completion with usage info (final chunk has finish_reason)
             mock_completion_usage = _build_mock_completion(
-                usage=Mock(prompt_tokens=10, completion_tokens=5)
+                usage=Mock(prompt_tokens=10, completion_tokens=5),
+                finish_reason="stop",
             )
 
             # Create async generator that yields mock completions
@@ -576,6 +581,7 @@ async def mock_stream_iter():  # noqa: RUF029
             assert result.logprob == -0.5
             assert result.prompt_count == 10
             assert result.completion_count == 5
+            assert result.finish_reason == "stop"
 
 
 class DummyOutputSchema(BaseModel):
@@ -787,6 +793,7 @@ async def test_single_completion(self, model_name: str) -> None:
         assert len(result.messages) == 1
         assert result.messages[0].content
         assert not hasattr(result.messages[0], "tool_calls"), "Expected normal message"
+        assert result.finish_reason == "stop"
 
         model = self.MODEL_CLS(name=model_name, config={"n": 2})
         result = await model.call_single(messages)
@@ -795,6 +802,7 @@ async def test_single_completion(self, model_name: str) -> None:
         assert len(result.messages) == 1
         assert result.messages[0].content
         assert not hasattr(result.messages[0], "tool_calls"), "Expected normal message"
+        assert result.finish_reason == "stop"
 
     @pytest.mark.asyncio
     @pytest.mark.vcr
@@ -857,17 +865,17 @@ def double(x: int) -> int:
             messages, tools=tools, tool_choice=LiteLLMModel.MODEL_CHOOSES_TOOL
         )
         assert isinstance(results, list)
-        assert isinstance(results[0].messages, list)
-
-        tool_message = results[0].messages[0]
-
+        (result,) = results
+        assert isinstance(result.messages, list)
+        tool_message = result.messages[0]
         assert isinstance(tool_message, ToolRequestMessage), (
             "It should have selected a tool"
         )
         assert not tool_message.content
         assert tool_message.tool_calls[0].function.arguments["x"] == 8, (
             "LLM failed in select the correct tool or arguments"
         )
+        assert result.finish_reason == "tool_calls"
 
         # Simulate the observation
         observation = ToolResponseMessage(
@@ -882,9 +890,11 @@ def double(x: int) -> int:
             messages, tools=tools, tool_choice=LiteLLMModel.MODEL_CHOOSES_TOOL
         )
         assert isinstance(results, list)
-        assert isinstance(results[0].messages, list)
-        assert results[0].messages[0].content
-        assert "16" in results[0].messages[0].content
+        (result,) = results
+        assert isinstance(result.messages, list)
+        assert result.messages[0].content
+        assert "16" in result.messages[0].content
+        assert result.finish_reason == "stop"
 
     @pytest.mark.asyncio
     @pytest.mark.parametrize(
@@ -1201,6 +1211,7 @@ def mock_router_method(_self, _override_config=None):
 
     assert results.text == "I'm sorry, but I can't assist with that request."
     assert results.model == CommonLLMNames.GPT_41.value
+    assert results.finish_reason == "stop"
     assert "the llm request was refused" in caplog.text.lower()
     assert "attempting to fallback" in caplog.text.lower()