Skip to content

Commit e66caa4

Browse files
committed
feat(telemetry): enabled trace setup for evaluations
1 parent b26e613 commit e66caa4

28 files changed

+1013
-60
lines changed

.github/workflows/integration-test.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,4 +62,4 @@ jobs:
6262
python-version: '3.10'
6363
- name: Install dependencies
6464
run: |
65-
pip install --no-cache-dir hatch
65+
pip install --no-cache-dir hatch

pyproject.toml

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@ dependencies = [
1919
"strands-agents>=1.0.0",
2020
"strands-agents-tools>=0.1.0,<1.0.0",
2121
"typing-extensions>=4.0",
22+
"opentelemetry-api>=1.20.0",
23+
"opentelemetry-sdk>=1.20.0",
24+
"opentelemetry-instrumentation-threading>=0.51b0,<1.00b0",
2225
]
2326

2427
[tool.hatch.build.targets.wheel]
@@ -34,18 +37,21 @@ test = [
3437

3538
dev = [
3639
"hatch>=1.0.0,<2.0.0",
37-
"mypy>=1.0",
40+
"mypy>=1.15.0,<2.0.0",
3841
"pre-commit>=3.2.0,<4.2.0",
39-
"ruff>=0.4.4,<1.0.0",
42+
"ruff>=0.13.0,<0.14.0",
4043
]
4144

45+
otel = ["opentelemetry-exporter-otlp-proto-http>=1.30.0,<2.0.0"]
46+
4247
[tool.ruff]
4348
line-length = 120
4449
include = ["src/**/*.py", "tests/**/*.py"]
4550

4651
[tool.hatch.envs.hatch-test]
4752
installer = "uv"
4853
extra-args = ["-n", "auto", "-vv"]
54+
features = ["otel"]
4955
dependencies = [
5056
"pytest>=8.0.0,<9.0.0",
5157
"pytest-cov>=7.0.0,<8.0.0",
@@ -79,6 +85,17 @@ prepare = [
7985
]
8086

8187

88+
[tool.hatch.envs.hatch-static-analysis]
89+
installer = "uv"
90+
features = ["otel"]
91+
dependencies = [
92+
"mypy>=1.15.0,<2.0.0",
93+
"ruff>=0.13.0,<0.14.0",
94+
# Include required pacakge dependencies for mypy
95+
"strands-agents-evals @ {root:uri}",
96+
]
97+
98+
8299
[tool.hatch.envs.hatch-static-analysis.scripts]
83100
format-check = [
84101
"ruff format --check"
@@ -137,9 +154,9 @@ dependencies = [
137154
]
138155
extra-dependencies = [
139156
"hatch>=1.0.0,<2.0.0",
140-
"mypy>=1.0",
157+
"mypy>=1.15.0,<2.0.0",
141158
"pre-commit>=3.2.0,<4.2.0",
142-
"ruff>=0.4.4,<1.0.0",
159+
"ruff>=0.13.0,<0.14.0",
143160
]
144161

145162
[tool.coverage.run]

src/examples/agents_as_tools.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import datetime
33

44
from strands import Agent, tool
5+
56
from strands_evals import Case, Dataset
67
from strands_evals.evaluators import InteractionsEvaluator, TrajectoryEvaluator
78
from strands_evals.extractors import tools_use_extractor

src/examples/bank_tools_trajectory.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import datetime
33

44
from strands import Agent, tool
5+
56
from strands_evals import Case, Dataset
67
from strands_evals.evaluators import TrajectoryEvaluator
78
from strands_evals.extractors import tools_use_extractor

src/examples/dataset_generator/simple_dataset.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import asyncio
22

33
from strands import Agent
4+
45
from strands_evals.evaluators.output_evaluator import OutputEvaluator
56
from strands_evals.generators.dataset_generator import DatasetGenerator
67

src/examples/dataset_generator/trajectory_dataset.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import asyncio
22

33
from strands import Agent, tool
4+
45
from strands_evals.evaluators import TrajectoryEvaluator
56
from strands_evals.extractors import tools_use_extractor
67
from strands_evals.generators import DatasetGenerator

src/examples/evaluate_graph.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
from strands import Agent
55
from strands.multiagent import GraphBuilder
6+
67
from strands_evals import Case, Dataset
78
from strands_evals.evaluators import InteractionsEvaluator, TrajectoryEvaluator
89
from strands_evals.extractors import graph_extractor

src/examples/evaluate_swarm.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
from strands import Agent
55
from strands.multiagent import Swarm
6+
67
from strands_evals import Case, Dataset
78
from strands_evals.evaluators import InteractionsEvaluator, TrajectoryEvaluator
89
from strands_evals.extractors import swarm_extractor

src/examples/multi_shots.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import datetime
33

44
from strands import Agent
5+
56
from strands_evals import Case, Dataset
67
from strands_evals.evaluators import InteractionsEvaluator
78
from strands_evals.types import Interaction, TaskOutput

src/examples/safety_judge_output.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import datetime
33

44
from strands import Agent
5+
56
from strands_evals import Case, Dataset
67
from strands_evals.evaluators import OutputEvaluator
78

0 commit comments

Comments
 (0)