Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -492,7 +492,7 @@ async def arun_stream(
model_name = model.name
span_name = f"{model_name}.ainvoke_stream"

with self._tracer.start_as_current_span(
span = self._tracer.start_span(
span_name,
attributes={
OPENINFERENCE_SPAN_KIND: LLM,
Expand All @@ -501,11 +501,15 @@ async def arun_stream(
**dict(_llm_input_messages(arguments)),
**dict(get_attributes_from_context()),
},
) as span:
)

# Manually attach context (instead of using use_span context manager)
token = context_api.attach(trace_api.set_span_in_context(span))

try:
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: Span never ended if context attachment fails

The span is created on line 495, but the try block doesn't start until line 509. The context_api.attach() call on line 507 sits between span creation and the try block. If context_api.attach() or trace_api.set_span_in_context() throws an exception, the span will never be ended because the finally block containing span.end() won't execute. The try block should wrap both the context attachment and the span to ensure span.end() is always called.

Fix in Cursor Fix in Web

span.set_status(trace_api.StatusCode.OK)
span.set_attribute(LLM_MODEL_NAME, model.id)
span.set_attribute(LLM_PROVIDER, model.provider)
# Token usage will be set after streaming completes based on final response

responses = []
async for chunk in wrapped(*args, **kwargs): # type: ignore[attr-defined]
Expand All @@ -528,14 +532,12 @@ async def arun_stream(
if final_response_with_metrics and final_response_with_metrics.response_usage:
metrics = final_response_with_metrics.response_usage

# Set token usage attributes
if hasattr(metrics, "input_tokens") and metrics.input_tokens:
span.set_attribute(LLM_TOKEN_COUNT_PROMPT, metrics.input_tokens)

if hasattr(metrics, "output_tokens") and metrics.output_tokens:
span.set_attribute(LLM_TOKEN_COUNT_COMPLETION, metrics.output_tokens)

# Set cache-related tokens if available
if hasattr(metrics, "cache_read_tokens") and metrics.cache_read_tokens:
span.set_attribute(
LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ, metrics.cache_read_tokens
Expand All @@ -546,6 +548,19 @@ async def arun_stream(
LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE, metrics.cache_write_tokens
)

except Exception as e:
span.set_status(trace_api.StatusCode.ERROR, str(e))
span.record_exception(e)
raise
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: CancelledError not caught, span status remains OK

The exception handler catches only Exception, but asyncio.CancelledError inherits from BaseException in Python 3.8+. When an async streaming operation is cancelled, CancelledError won't be caught, the span status remains OK (set on line 510), and no exception is recorded. Other similar instrumentations in this repository (like autogen-agentchat) explicitly catch BaseException and handle GeneratorExit separately to properly record cancellation errors while allowing graceful generator cleanup.

Fix in Cursor Fix in Web


finally:
# Wrap detach in try/except to handle context mismatch on cancellation
try:
context_api.detach(token)
except Exception:
pass
span.end()


# span attributes
INPUT_MIME_TYPE = SpanAttributes.INPUT_MIME_TYPE
Expand Down
Loading