BerriAI · RheagalFire · Mar 22, 2026 · Mar 19, 2026 · Mar 19, 2026 · Mar 19, 2026
diff --git a/litellm/integrations/arize/_utils.py b/litellm/integrations/arize/_utils.py
@@ -236,13 +236,28 @@ def _set_usage_outputs(span: "Span", response_obj, span_attrs):
     prompt_tokens = usage.get("prompt_tokens") or usage.get("input_tokens")
     if prompt_tokens:
         safe_set_attribute(span, span_attrs.LLM_TOKEN_COUNT_PROMPT, prompt_tokens)
-    reasoning_tokens = usage.get("output_tokens_details", {}).get("reasoning_tokens")
-    if reasoning_tokens:
-        safe_set_attribute(
-            span,
-            span_attrs.LLM_TOKEN_COUNT_COMPLETION_DETAILS_REASONING,
-            reasoning_tokens,
-        )
+    completion_tokens_details = usage.get("completion_tokens_details") or usage.get(
+        "output_tokens_details"
+    )
+    if completion_tokens_details is not None:
+        reasoning_tokens = getattr(completion_tokens_details, "reasoning_tokens", None)
+        if reasoning_tokens:
+            safe_set_attribute(
+                span,
+                span_attrs.LLM_TOKEN_COUNT_COMPLETION_DETAILS_REASONING,
+                reasoning_tokens,
+            )
+    prompt_tokens_details = usage.get("prompt_tokens_details") or usage.get(
+        "input_tokens_details"
+    )
+    if prompt_tokens_details is not None:
+        cached_tokens = getattr(prompt_tokens_details, "cached_tokens", None)
+        if cached_tokens:
+            safe_set_attribute(
+                span,
+                span_attrs.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ,
+                cached_tokens,
+            )
 
 
 def _infer_open_inference_span_kind(call_type: Optional[str]) -> str:

diff --git a/tests/test_litellm/integrations/arize/test_arize_utils.py b/tests/test_litellm/integrations/arize/test_arize_utils.py
@@ -374,3 +374,127 @@ def test_construct_dynamic_arize_headers():
         "arize-space-id": "test_space_key",
         "api_key": "test_api_key"
     }
+
+
+def test_set_usage_outputs_chat_completion_tokens_details():
+    """
+    Test that _set_usage_outputs correctly extracts reasoning_tokens from
+    completion_tokens_details (Chat Completions API) and cached_tokens from
+    prompt_tokens_details.
+    """
+    from unittest.mock import MagicMock
+
+    from litellm.integrations.arize._utils import _set_usage_outputs
+    from litellm.types.utils import (
+        CompletionTokensDetailsWrapper,
+        ModelResponse,
+        PromptTokensDetailsWrapper,
+        Usage,
+    )
+
+    span = MagicMock()
+
+    response_obj = ModelResponse(
+        usage=Usage(
+            total_tokens=200,
+            completion_tokens=120,
+            prompt_tokens=80,
+            completion_tokens_details=CompletionTokensDetailsWrapper(
+                reasoning_tokens=45
+            ),
+            prompt_tokens_details=PromptTokensDetailsWrapper(cached_tokens=30),
+        ),
+        choices=[
+            Choices(
+                message={"role": "assistant", "content": "test"}, finish_reason="stop"
+            )
+        ],
+        model="gpt-4o",
+    )
+
+    _set_usage_outputs(span, response_obj, SpanAttributes)
+
+    span.set_attribute.assert_any_call(SpanAttributes.LLM_TOKEN_COUNT_TOTAL, 200)
+    span.set_attribute.assert_any_call(SpanAttributes.LLM_TOKEN_COUNT_COMPLETION, 120)
+    span.set_attribute.assert_any_call(SpanAttributes.LLM_TOKEN_COUNT_PROMPT, 80)
+    span.set_attribute.assert_any_call(
+        SpanAttributes.LLM_TOKEN_COUNT_COMPLETION_DETAILS_REASONING, 45
+    )
+    span.set_attribute.assert_any_call(
+        SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ, 30
+    )
+
+
+def test_set_usage_outputs_responses_api_output_tokens_details():
+    """
+    Test that _set_usage_outputs falls back to output_tokens_details (Responses API)
+    when completion_tokens_details is not present.
+    """
+    from unittest.mock import MagicMock
+
+    from litellm.integrations.arize._utils import _set_usage_outputs
+    from litellm.types.llms.openai import (
+        OutputTokensDetails,
+        ResponseAPIUsage,
+        ResponsesAPIResponse,
+    )
+
+    span = MagicMock()
+
+    response_obj = ResponsesAPIResponse(
+        id="response-456",
+        created_at=1625247600,
+        output=[],
+        usage=ResponseAPIUsage(
+            input_tokens=100,
+            output_tokens=200,
+            total_tokens=300,
+            output_tokens_details=OutputTokensDetails(reasoning_tokens=150),
+        ),
+    )
+
+    _set_usage_outputs(span, response_obj, SpanAttributes)
+
+    span.set_attribute.assert_any_call(SpanAttributes.LLM_TOKEN_COUNT_TOTAL, 300)
+    span.set_attribute.assert_any_call(SpanAttributes.LLM_TOKEN_COUNT_COMPLETION, 200)
+    span.set_attribute.assert_any_call(SpanAttributes.LLM_TOKEN_COUNT_PROMPT, 100)
+    span.set_attribute.assert_any_call(
+        SpanAttributes.LLM_TOKEN_COUNT_COMPLETION_DETAILS_REASONING, 150
+    )
+
+
+def test_set_usage_outputs_no_token_details():
+    """
+    Test that _set_usage_outputs works when neither completion_tokens_details
+    nor prompt_tokens_details are present (basic usage without details).
+    """
+    from unittest.mock import MagicMock
+
+    from litellm.integrations.arize._utils import _set_usage_outputs
+    from litellm.types.utils import ModelResponse, Usage
+
+    span = MagicMock()
+
+    response_obj = ModelResponse(
+        usage=Usage(
+            total_tokens=100,
+            completion_tokens=60,
+            prompt_tokens=40,
+        ),
+        choices=[
+            Choices(
+                message={"role": "assistant", "content": "test"}, finish_reason="stop"
+            )
+        ],
+        model="gpt-4o",
+    )
+
+    _set_usage_outputs(span, response_obj, SpanAttributes)
+
+    span.set_attribute.assert_any_call(SpanAttributes.LLM_TOKEN_COUNT_TOTAL, 100)
+    span.set_attribute.assert_any_call(SpanAttributes.LLM_TOKEN_COUNT_COMPLETION, 60)
+    span.set_attribute.assert_any_call(SpanAttributes.LLM_TOKEN_COUNT_PROMPT, 40)
+    # reasoning and cached should NOT be set
+    for call in span.set_attribute.call_args_list:
+        assert call[0][0] != SpanAttributes.LLM_TOKEN_COUNT_COMPLETION_DETAILS_REASONING
+        assert call[0][0] != SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ