Skip to content

Commit d450e89

Browse files
authored
feat: trace provider interface (#140)
* feat: add TraceProvider interface and trace data types Introduce an abstract TraceProvider base class for retrieving agent trace data from observability backends for evaluation. This includes: - TraceProvider ABC with get_session, list_sessions, and get_session_by_trace_id methods - SessionFilter dataclass for filtering session discovery - Custom error hierarchy (TraceProviderError, SessionNotFoundError, TraceNotFoundError, ProviderError) - Session and Trace data types with span tree construction and convenience accessors (input/output messages, token usage, duration) - New providers module exposed at package level - Comprehensive unit tests for providers and trace types * feat(providers): Add TraceProvider interface for observability backends Add abstract TraceProvider that retrieves agent trace data from observability backends and returns Session/Trace types the evals system already consumes. - TraceProvider ABC with get_session() (required), list_sessions() and get_session_by_trace_id() (optional, raise NotImplementedError) - SessionFilter dataclass for time-range and limit-based discovery - Exception hierarchy: TraceProviderError base with SessionNotFoundError, TraceNotFoundError, ProviderError - Export providers module from strands_evals package * feat(providers): Add TraceProvider interface for observability backends Add abstract TraceProvider that retrieves agent trace data from observability backends and returns Session/Trace types the evals system already consumes. - TraceProvider ABC with get_session() (required), list_sessions() and get_session_by_trace_id() (optional, raise NotImplementedError) - SessionFilter dataclass for time-range and limit-based discovery - Exception hierarchy: TraceProviderError base with SessionNotFoundError, TraceNotFoundError, ProviderError - Export providers module from strands_evals package * refactor: simplify TraceProvider by removing optional methods and SessionFilter Remove SessionFilter dataclass, list_sessions(), and get_evaluation_data_by_trace_id() from TraceProvider to keep the interface minimal. Only the core get_evaluation_data() abstract method remains. Associated tests and public exports are updated accordingly.
1 parent ccf9241 commit d450e89

File tree

7 files changed

+160
-4
lines changed

7 files changed

+160
-4
lines changed

src/strands_evals/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from . import evaluators, extractors, generators, simulation, telemetry, types
1+
from . import evaluators, extractors, generators, providers, simulation, telemetry, types
22
from .case import Case
33
from .experiment import Experiment
44
from .simulation import ActorSimulator, UserSimulator
@@ -9,6 +9,7 @@
99
"Case",
1010
"evaluators",
1111
"extractors",
12+
"providers",
1213
"types",
1314
"generators",
1415
"simulation",
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
from .exceptions import (
2+
ProviderError,
3+
SessionNotFoundError,
4+
TraceNotFoundError,
5+
TraceProviderError,
6+
)
7+
from .trace_provider import (
8+
TraceProvider,
9+
)
10+
11+
__all__ = [
12+
"ProviderError",
13+
"SessionNotFoundError",
14+
"TraceNotFoundError",
15+
"TraceProvider",
16+
"TraceProviderError",
17+
]
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
"""Exceptions for trace providers."""
2+
3+
4+
class TraceProviderError(Exception):
5+
"""Base exception for trace provider errors."""
6+
7+
pass
8+
9+
10+
class SessionNotFoundError(TraceProviderError):
11+
"""No traces found for the given session ID."""
12+
13+
pass
14+
15+
16+
class TraceNotFoundError(TraceProviderError):
17+
"""Trace with the given ID not found."""
18+
19+
pass
20+
21+
22+
class ProviderError(TraceProviderError):
23+
"""Provider is unreachable or returned an error."""
24+
25+
pass
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
"""TraceProvider interface for retrieving agent trace data from observability backends."""
2+
3+
from abc import ABC, abstractmethod
4+
5+
from ..types.evaluation import TaskOutput
6+
7+
8+
class TraceProvider(ABC):
9+
"""Retrieves agent trace data from observability backends for evaluation.
10+
11+
Implementations handle authentication, pagination, and conversion from
12+
provider-native formats to the types the evals system consumes.
13+
"""
14+
15+
@abstractmethod
16+
def get_evaluation_data(self, session_id: str) -> TaskOutput:
17+
"""Retrieve all data needed to evaluate a session.
18+
19+
This is the primary access pattern — given a session ID, fetch all
20+
traces, extract the agent output and trajectory, and return them
21+
in a format ready for evaluation.
22+
23+
Args:
24+
session_id: The session identifier (maps to Strands session_id)
25+
26+
Returns:
27+
TaskOutput with 'output' (final agent response) and
28+
'trajectory' (Session containing all traces/spans)
29+
30+
Raises:
31+
SessionNotFoundError: If no traces found for session_id
32+
ProviderError: If the provider is unreachable or returns an error
33+
"""
34+
...

src/strands_evals/types/evaluation.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@
88

99

1010
class Interaction(TypedDict, total=False):
11-
"""
12-
Represents a single interaction in a multi-agent or multi-step system.
11+
"""Represents a single interaction in a multi-agent or multi-step system.
12+
1313
1414
Used to capture the communication flow and dependencies between different
1515
components (agents, tools, or processing nodes) during task execution.
@@ -56,7 +56,7 @@ class TaskOutput(TypedDict, total=False):
5656
"""
5757

5858
output: Any
59-
trajectory: list[Any]
59+
trajectory: Union[list[Any], Session, None]
6060
interactions: list[Interaction]
6161
input: Any
6262

tests/strands_evals/providers/__init__.py

Whitespace-only changes.
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
"""Tests for TraceProvider ABC and exception hierarchy."""
2+
3+
import pytest
4+
5+
from strands_evals.providers.exceptions import (
6+
ProviderError,
7+
SessionNotFoundError,
8+
TraceNotFoundError,
9+
TraceProviderError,
10+
)
11+
from strands_evals.providers.trace_provider import (
12+
TraceProvider,
13+
)
14+
from strands_evals.types.evaluation import TaskOutput
15+
from strands_evals.types.trace import Session
16+
17+
18+
class ConcreteProvider(TraceProvider):
19+
"""Minimal concrete implementation for testing the ABC."""
20+
21+
def __init__(self, session: Session | None = None):
22+
self._session = session
23+
24+
def get_evaluation_data(self, session_id: str) -> TaskOutput:
25+
if self._session is None:
26+
raise SessionNotFoundError(f"No session found: {session_id}")
27+
return TaskOutput(
28+
output="test response",
29+
trajectory=self._session,
30+
)
31+
32+
33+
34+
35+
class TestExceptionHierarchy:
36+
def test_trace_provider_error_is_exception(self):
37+
assert issubclass(TraceProviderError, Exception)
38+
39+
def test_session_not_found_is_trace_provider_error(self):
40+
assert issubclass(SessionNotFoundError, TraceProviderError)
41+
42+
def test_trace_not_found_is_trace_provider_error(self):
43+
assert issubclass(TraceNotFoundError, TraceProviderError)
44+
45+
def test_provider_error_is_trace_provider_error(self):
46+
assert issubclass(ProviderError, TraceProviderError)
47+
48+
def test_exceptions_carry_message(self):
49+
err = SessionNotFoundError("session-123 not found")
50+
assert "session-123 not found" in str(err)
51+
52+
def test_catching_base_catches_all(self):
53+
"""All provider exceptions can be caught with TraceProviderError."""
54+
for exc_class in (SessionNotFoundError, TraceNotFoundError, ProviderError):
55+
with pytest.raises(TraceProviderError):
56+
raise exc_class("test")
57+
58+
59+
60+
class TestTraceProviderABC:
61+
def test_cannot_instantiate_without_get_evaluation_data(self):
62+
with pytest.raises(TypeError):
63+
TraceProvider() # type: ignore[abstract]
64+
65+
def test_concrete_provider_instantiates(self):
66+
provider = ConcreteProvider()
67+
assert isinstance(provider, TraceProvider)
68+
69+
def test_get_evaluation_data_returns_task_output(self):
70+
session = Session(session_id="s1", traces=[])
71+
provider = ConcreteProvider(session=session)
72+
result = provider.get_evaluation_data("s1")
73+
assert result["output"] == "test response"
74+
assert result["trajectory"] == session
75+
76+
def test_get_evaluation_data_raises_session_not_found(self):
77+
provider = ConcreteProvider(session=None)
78+
with pytest.raises(SessionNotFoundError, match="No session found"):
79+
provider.get_evaluation_data("missing")

0 commit comments

Comments
 (0)