.

spike-spiegel-21 · spike-spiegel-21 · commit 0ad4b6057fe9 · 2025-09-20T01:44:38.000+05:30
diff --git a/deepeval/integrations/pydantic_ai/agent.py b/deepeval/integrations/pydantic_ai/agent.py
@@ -1,6 +1,7 @@
 import inspect
 from typing import Optional, List
 from contextvars import ContextVar
+from contextlib import asynccontextmanager
 
 from deepeval.prompt import Prompt
 from deepeval.tracing.types import AgentSpan
@@ -96,10 +97,12 @@ async def run(
         bound = sig.bind_partial(*args, **kwargs)
         bound.apply_defaults()
         input = bound.arguments.get("user_prompt", None)
+
+        agent_name = super().name if super().name is not None else "Agent"
         
         with Observer(
             span_type="agent" if not _IS_RUN_SYNC.get() else "custom",
-            func_name="Agent" if not _IS_RUN_SYNC.get() else "run",
+            func_name=agent_name if not _IS_RUN_SYNC.get() else "run",
             function_kwargs={"input": input},
             metrics=self.agent_metrics if not _IS_RUN_SYNC.get() else None,
             metric_collection=self.agent_metric_collection if not _IS_RUN_SYNC.get() else None,
@@ -149,9 +152,11 @@ def run_sync(
         
         token = _IS_RUN_SYNC.set(True)
 
+        agent_name = super().name if super().name is not None else "Agent"
+
         with Observer(
             span_type="agent",
-            func_name="Agent",
+            func_name=agent_name,
             function_kwargs={"input": input},
             metrics=self.agent_metrics,
             metric_collection=self.agent_metric_collection,
@@ -187,7 +192,64 @@ def run_sync(
         
         return result
         
-    
+    @asynccontextmanager
+    async def run_stream(
+        self,
+        *args,
+        name: Optional[str] = None,
+        tags: Optional[List[str]] = None,
+        metadata: Optional[dict] = None,
+        thread_id: Optional[str] = None,
+        user_id: Optional[str] = None,
+        metric_collection: Optional[str] = None,
+        metrics: Optional[List[BaseMetric]] = None,
+        **kwargs
+    ):
+        sig = inspect.signature(super().run_stream)
+        super_params = sig.parameters
+        super_kwargs = {k: v for k, v in kwargs.items() if k in super_params}
+        bound = sig.bind_partial(*args, **super_kwargs)
+        bound.apply_defaults()
+        input = bound.arguments.get("user_prompt", None)
+
+        agent_name = super().name if super().name is not None else "Agent"
+
+        with Observer(
+            span_type="agent",
+            func_name=agent_name,
+            function_kwargs={"input": input},
+            metrics=self.agent_metrics,
+            metric_collection=self.agent_metric_collection,
+        ) as observer:
+            final_result = None
+            async with super().run_stream(*args, **super_kwargs) as result:
+                try:
+                    yield result
+                finally:
+                    try:
+                        final_result = await result.get_output()
+                        observer.result = final_result
+                    except Exception:
+                        pass
+
+                    update_trace_context(
+                        trace_name=name if name is not None else self.trace_name,
+                        trace_tags=tags if tags is not None else self.trace_tags,
+                        trace_metadata=metadata if metadata is not None else self.trace_metadata,
+                        trace_thread_id=thread_id if thread_id is not None else self.trace_thread_id,
+                        trace_user_id=user_id if user_id is not None else self.trace_user_id,
+                        trace_metric_collection=metric_collection if metric_collection is not None else self.trace_metric_collection,
+                        trace_metrics=metrics if metrics is not None else self.trace_metrics,
+                        trace_input=input,
+                        trace_output=(final_result if final_result is not None else None),
+                    )
+                    agent_span: AgentSpan = current_span_context.get()
+                    try:
+                        if final_result is not None:
+                            agent_span.tools_called = extract_tools_called(final_result)
+                    except:
+                        pass
+
     def tool(
         self,
         *args,
diff --git a/deepeval/integrations/pydantic_ai/utils.py b/deepeval/integrations/pydantic_ai/utils.py
@@ -1,3 +1,5 @@
+from time import perf_counter
+from contextlib import asynccontextmanager
 import inspect
 import functools
 from typing import Any, Callable, List, Optional
@@ -11,7 +13,7 @@
 from deepeval.tracing.tracing import Observer
 from deepeval.metrics.base_metric import BaseMetric
 from deepeval.test_case.llm_test_case import ToolCall
-from deepeval.tracing.context import current_trace_context
+from deepeval.tracing.context import current_trace_context, current_span_context
 from deepeval.tracing.types import AgentSpan, LlmOutput, LlmSpan, LlmToolCall
 
 # llm tools called
@@ -127,9 +129,47 @@ async def wrapper(*args, **kwargs):
             )
             observer.result = result
             return result
-
+    
     model.request = wrapper
 
+    stream_original_func = model.request_stream
+    stream_sig = inspect.signature(stream_original_func)
+
+    @asynccontextmanager
+    async def stream_wrapper(*args, **kwargs):
+        bound = stream_sig.bind_partial(*args, **kwargs)
+        bound.apply_defaults()
+        request = bound.arguments.get("messages", [])
+
+        with Observer(
+            span_type="llm",
+            func_name="LLM",
+            observe_kwargs={"model": model_name},
+            metrics=llm_metrics,
+            metric_collection=llm_metric_collection,
+        ) as observer:
+            llm_span: LlmSpan = current_span_context.get()
+            async with stream_original_func(*args, **kwargs) as streamed_response:
+                try:
+                    yield streamed_response
+                    print("streamed_response >>>>>")
+                    if not llm_span.token_intervals:
+                        llm_span.token_intervals = {perf_counter(): "NA"}
+                    else:
+                        llm_span.token_intervals[perf_counter()] = "NA"
+                finally:
+                    try:
+                        result = streamed_response.get()
+                        observer.update_span_properties = (
+                            lambda llm_span: set_llm_span_attributes(
+                                llm_span, request, result, llm_prompt
+                            )
+                        )
+                        observer.result = result
+                    except Exception:
+                        pass
+
+    model.request_stream = stream_wrapper
 
 def create_patched_tool(
     func: Callable,