Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/strands_evals/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from . import evaluators, extractors, generators, simulation, telemetry, types
from . import evaluators, extractors, generators, providers, simulation, telemetry, types
from .case import Case
from .experiment import Experiment
from .simulation import ActorSimulator, UserSimulator
Expand All @@ -9,6 +9,7 @@
"Case",
"evaluators",
"extractors",
"providers",
"types",
"generators",
"simulation",
Expand Down
17 changes: 17 additions & 0 deletions src/strands_evals/providers/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from .exceptions import (
ProviderError,
SessionNotFoundError,
TraceNotFoundError,
TraceProviderError,
)
from .trace_provider import (
TraceProvider,
)

__all__ = [
"ProviderError",
"SessionNotFoundError",
"TraceNotFoundError",
"TraceProvider",
"TraceProviderError",
]
25 changes: 25 additions & 0 deletions src/strands_evals/providers/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
"""Exceptions for trace providers."""


class TraceProviderError(Exception):
"""Base exception for trace provider errors."""

pass


class SessionNotFoundError(TraceProviderError):
"""No traces found for the given session ID."""

pass


class TraceNotFoundError(TraceProviderError):
"""Trace with the given ID not found."""

pass


class ProviderError(TraceProviderError):
"""Provider is unreachable or returned an error."""

pass
34 changes: 34 additions & 0 deletions src/strands_evals/providers/trace_provider.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
"""TraceProvider interface for retrieving agent trace data from observability backends."""

from abc import ABC, abstractmethod

from ..types.evaluation import TaskOutput


class TraceProvider(ABC):
"""Retrieves agent trace data from observability backends for evaluation.

Implementations handle authentication, pagination, and conversion from
provider-native formats to the types the evals system consumes.
"""

@abstractmethod
def get_evaluation_data(self, session_id: str) -> TaskOutput:
"""Retrieve all data needed to evaluate a session.

This is the primary access pattern — given a session ID, fetch all
traces, extract the agent output and trajectory, and return them
in a format ready for evaluation.

Args:
session_id: The session identifier (maps to Strands session_id)

Returns:
TaskOutput with 'output' (final agent response) and
'trajectory' (Session containing all traces/spans)

Raises:
SessionNotFoundError: If no traces found for session_id
ProviderError: If the provider is unreachable or returns an error
"""
...
6 changes: 3 additions & 3 deletions src/strands_evals/types/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@


class Interaction(TypedDict, total=False):
"""
Represents a single interaction in a multi-agent or multi-step system.
"""Represents a single interaction in a multi-agent or multi-step system.


Used to capture the communication flow and dependencies between different
components (agents, tools, or processing nodes) during task execution.
Expand Down Expand Up @@ -56,7 +56,7 @@ class TaskOutput(TypedDict, total=False):
"""

output: Any
trajectory: list[Any]
trajectory: Union[list[Any], Session, None]
interactions: list[Interaction]
input: Any

Expand Down
Empty file.
79 changes: 79 additions & 0 deletions tests/strands_evals/providers/test_trace_provider.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
"""Tests for TraceProvider ABC and exception hierarchy."""

import pytest

from strands_evals.providers.exceptions import (
ProviderError,
SessionNotFoundError,
TraceNotFoundError,
TraceProviderError,
)
from strands_evals.providers.trace_provider import (
TraceProvider,
)
from strands_evals.types.evaluation import TaskOutput
from strands_evals.types.trace import Session


class ConcreteProvider(TraceProvider):
"""Minimal concrete implementation for testing the ABC."""

def __init__(self, session: Session | None = None):
self._session = session

def get_evaluation_data(self, session_id: str) -> TaskOutput:
if self._session is None:
raise SessionNotFoundError(f"No session found: {session_id}")
return TaskOutput(
output="test response",
trajectory=self._session,
)




class TestExceptionHierarchy:
def test_trace_provider_error_is_exception(self):
assert issubclass(TraceProviderError, Exception)

def test_session_not_found_is_trace_provider_error(self):
assert issubclass(SessionNotFoundError, TraceProviderError)

def test_trace_not_found_is_trace_provider_error(self):
assert issubclass(TraceNotFoundError, TraceProviderError)

def test_provider_error_is_trace_provider_error(self):
assert issubclass(ProviderError, TraceProviderError)

def test_exceptions_carry_message(self):
err = SessionNotFoundError("session-123 not found")
assert "session-123 not found" in str(err)

def test_catching_base_catches_all(self):
"""All provider exceptions can be caught with TraceProviderError."""
for exc_class in (SessionNotFoundError, TraceNotFoundError, ProviderError):
with pytest.raises(TraceProviderError):
raise exc_class("test")



class TestTraceProviderABC:
def test_cannot_instantiate_without_get_evaluation_data(self):
with pytest.raises(TypeError):
TraceProvider() # type: ignore[abstract]

def test_concrete_provider_instantiates(self):
provider = ConcreteProvider()
assert isinstance(provider, TraceProvider)

def test_get_evaluation_data_returns_task_output(self):
session = Session(session_id="s1", traces=[])
provider = ConcreteProvider(session=session)
result = provider.get_evaluation_data("s1")
assert result["output"] == "test response"
assert result["trajectory"] == session

def test_get_evaluation_data_raises_session_not_found(self):
provider = ConcreteProvider(session=None)
with pytest.raises(SessionNotFoundError, match="No session found"):
provider.get_evaluation_data("missing")