Skip to content

Commit 25f1357

Browse files
akmarouJonathan Buck
authored andcommitted
updated README
1 parent 09cce3e commit 25f1357

File tree

9 files changed

+122
-410
lines changed

9 files changed

+122
-410
lines changed

src/examples/actor_simulator_basic.py

Lines changed: 32 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,48 +1,60 @@
11
import asyncio
22

3+
from opentelemetry import trace
4+
from opentelemetry.sdk.trace.export import BatchSpanProcessor
5+
from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
36
from strands import Agent
47

58
from strands_evals import ActorSimulator, Case, Dataset
6-
from strands_evals.evaluators import OutputEvaluator
9+
from strands_evals.evaluators import HelpfulnessEvaluator
10+
from strands_evals.mappers import StrandsInMemorySessionMapper
11+
from strands_evals.telemetry import StrandsEvalsTelemetry
712

13+
# ======================================
14+
# SETUP TELEMETRY
15+
# ======================================
16+
# Target agent telemetry
17+
target_telemetry = StrandsEvalsTelemetry()
18+
target_exporter = InMemorySpanExporter()
19+
target_telemetry.tracer_provider.add_span_processor(BatchSpanProcessor(target_exporter))
820

9-
async def simulate_conversation(case: Case) -> dict:
21+
# Simulator telemetry (no exporter)
22+
simulator_telemetry = StrandsEvalsTelemetry()
23+
24+
25+
async def task_function(case: Case) -> dict:
1026
"""Simulate a multi-turn conversation between user and agent."""
1127
# Create agent under test
28+
trace.set_tracer_provider(target_telemetry.tracer_provider)
1229
agent = Agent(system_prompt="You are a helpful travel assistant.", callback_handler=None)
1330

1431
# Create user simulator from case
32+
trace.set_tracer_provider(simulator_telemetry.tracer_provider)
1533
user_sim = ActorSimulator.from_case_for_user_simulator(case=case)
1634

17-
# Multi-turn conversation
18-
conversation = []
19-
max_turns = 10
20-
21-
# Initial user message
35+
# Multi-turn conversation (max 10 turns as failsafe)
2236
user_message = case.input
23-
conversation.append({"role": "user", "content": user_message})
2437

25-
for turn_num in range(max_turns):
26-
print(f"Starting turn number {turn_num}")
27-
# Check for completion
38+
for _ in range(10):
2839
if "<stop/>" in user_message:
2940
break
3041

3142
# Agent responds
43+
trace.set_tracer_provider(target_telemetry.tracer_provider)
3244
agent_response = agent(user_message)
3345
agent_message = str(agent_response)
34-
conversation.append({"role": "assistant", "content": agent_message})
3546

3647
# User acts
48+
trace.set_tracer_provider(simulator_telemetry.tracer_provider)
3749
user_result = user_sim.act(agent_message)
3850
user_message = str(user_result.structured_output.message)
39-
conversation.append({"role": "user", "content": user_message})
4051

41-
return {
42-
"output": conversation[-1]["content"] if conversation else "",
43-
"conversation": conversation,
44-
"turns": len(conversation) // 2,
45-
}
52+
# Collect traces
53+
finished_spans = target_exporter.get_finished_spans()
54+
mapper = StrandsInMemorySessionMapper()
55+
session = mapper.map_to_session(finished_spans, session_id="test-session")
56+
57+
return {"output": str(agent_response), "trajectory": session}
4658

4759

4860
# Create test cases
@@ -62,15 +74,15 @@ async def simulate_conversation(case: Case) -> dict:
6274
]
6375

6476
# Create evaluator
65-
evaluator = OutputEvaluator(rubric="Evaluate if the agent successfully helped achieve the user's travel goal.")
77+
evaluator = HelpfulnessEvaluator()
6678

6779
# Create dataset
6880
dataset = Dataset[str, str](cases=test_cases, evaluator=evaluator)
6981

7082

7183
# Run evaluations
7284
async def main():
73-
report = await dataset.run_evaluations_async(simulate_conversation)
85+
report = await dataset.run_evaluations_async(task_function)
7486
report.run_display()
7587

7688

src/examples/actor_simulator_with_state.py

Lines changed: 0 additions & 65 deletions
This file was deleted.

src/examples/multi_turn_user_sim.py

Lines changed: 0 additions & 66 deletions
This file was deleted.

0 commit comments

Comments
 (0)