Skip to content

Commit e46f2f7

Browse files
Added samples for traces smart filtering (#47217)
* Added samples for traces smart filtering * updated * updated * Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --------- Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
1 parent 9f5cc1b commit e46f2f7

4 files changed

Lines changed: 518 additions & 1 deletion

File tree

Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,176 @@
1+
# pylint: disable=line-too-long,useless-suppression
2+
# ------------------------------------
3+
# Copyright (c) Microsoft Corporation.
4+
# Licensed under the MIT License.
5+
# ------------------------------------
6+
7+
"""
8+
DESCRIPTION:
9+
Given an AIProjectClient, this sample demonstrates how to evaluate an
10+
agent from its traces by filtering traces from Application Insights using an
11+
agent name/version or agent ID, with smart filtering.
12+
13+
Three agent filter forms are supported:
14+
- agent_name + agent_version: Specify the agent by name and version separately.
15+
- agent_id: Specify the agent as a single "name:version" string.
16+
- smart_filtering: Use filter_strategy="smart_filtering" to bias trace
17+
selection toward more interesting conversations.
18+
19+
USAGE:
20+
python sample_agent_trace_evaluation_smart_filter.py
21+
python sample_agent_trace_evaluation_smart_filter.py --agent-id "my-agent:1"
22+
23+
Before running the sample:
24+
25+
pip install "azure-ai-projects>=2.2.0" python-dotenv
26+
27+
Set these environment variables with your own values:
28+
1) FOUNDRY_PROJECT_ENDPOINT - Required. The Azure AI Project endpoint.
29+
2) FOUNDRY_MODEL_NAME - Required. The model deployment name for AI-assisted evaluators.
30+
3) FOUNDRY_AGENT_NAME - Required. The name of the agent whose traces to evaluate.
31+
4) FOUNDRY_AGENT_VERSION - Optional. The agent version. If not set, latest is used.
32+
"""
33+
34+
import argparse
35+
import os
36+
import time
37+
from pprint import pprint
38+
from dotenv import load_dotenv
39+
from azure.identity import DefaultAzureCredential
40+
from azure.ai.projects import AIProjectClient
41+
from azure.ai.projects.models import TestingCriterionAzureAIEvaluator
42+
43+
load_dotenv()
44+
45+
endpoint = os.environ["FOUNDRY_PROJECT_ENDPOINT"]
46+
model_deployment_name = os.environ["FOUNDRY_MODEL_NAME"]
47+
agent_name = os.environ["FOUNDRY_AGENT_NAME"]
48+
agent_version = os.environ.get("FOUNDRY_AGENT_VERSION", "")
49+
50+
parser = argparse.ArgumentParser(description="Evaluate agent traces using agent filter.")
51+
parser.add_argument("--agent-id", default=None, help='Agent ID in "name:version" format')
52+
parser.add_argument("--max-traces", type=int, default=5, help="Max traces to evaluate (default: 5)")
53+
parser.add_argument("--lookback-hours", type=int, default=24, help="Hours to look back (default: 24)")
54+
args = parser.parse_args()
55+
56+
with (
57+
DefaultAzureCredential() as credential,
58+
AIProjectClient(endpoint=endpoint, credential=credential) as project_client,
59+
project_client.get_openai_client() as client,
60+
):
61+
# Eval group for trace-based evaluations
62+
data_source_config = {
63+
"type": "azure_ai_source",
64+
"scenario": "traces",
65+
}
66+
67+
testing_criteria = [
68+
TestingCriterionAzureAIEvaluator(
69+
type="azure_ai_evaluator",
70+
name="task_completion",
71+
evaluator_name="builtin.task_completion",
72+
initialization_parameters={"model": model_deployment_name},
73+
data_mapping={
74+
"query": "{{item.query}}",
75+
"response": "{{item.response}}",
76+
},
77+
),
78+
TestingCriterionAzureAIEvaluator(
79+
type="azure_ai_evaluator",
80+
name="conversation_coherence",
81+
evaluator_name="builtin.coherence",
82+
initialization_parameters={"model": model_deployment_name},
83+
data_mapping={
84+
"query": "{{item.query}}",
85+
"response": "{{item.response}}",
86+
},
87+
),
88+
TestingCriterionAzureAIEvaluator(
89+
type="azure_ai_evaluator",
90+
name="groundedness",
91+
evaluator_name="builtin.groundedness",
92+
initialization_parameters={"model": model_deployment_name},
93+
data_mapping={
94+
"query": "{{item.query}}",
95+
"response": "{{item.response}}",
96+
},
97+
),
98+
TestingCriterionAzureAIEvaluator(
99+
type="azure_ai_evaluator",
100+
name="violence",
101+
evaluator_name="builtin.violence",
102+
initialization_parameters={"model": model_deployment_name},
103+
data_mapping={
104+
"query": "{{item.query}}",
105+
"response": "{{item.response}}",
106+
},
107+
),
108+
]
109+
110+
print("Creating trace-based evaluation group")
111+
eval_object = client.evals.create(
112+
name="Trace Evaluation (Agent Smart Filter)",
113+
data_source_config=data_source_config, # type: ignore
114+
testing_criteria=testing_criteria,
115+
)
116+
print(f"Evaluation created (id: {eval_object.id})")
117+
118+
# Compute time window in unix seconds
119+
# Pad end_time by +600s (10 min) to avoid ingestion-delay edge exclusion
120+
now_unix = int(time.time())
121+
end_time = now_unix + 600
122+
start_time = now_unix - (args.lookback_hours * 3600)
123+
124+
# Build trace_source based on mode
125+
trace_source: dict = {
126+
"type": "agent_filter",
127+
"start_time": start_time,
128+
"end_time": end_time,
129+
"max_traces": args.max_traces,
130+
"filter_strategy": "smart_filtering"
131+
}
132+
133+
if args.agent_id:
134+
trace_source["agent_id"] = args.agent_id
135+
print(f"Using agent_id filter: {args.agent_id}")
136+
else:
137+
trace_source["agent_name"] = agent_name
138+
if agent_version:
139+
trace_source["agent_version"] = agent_version
140+
print(f"Using agent filter: {agent_name} v{agent_version or '(latest)'}")
141+
142+
data_source = {
143+
"type": "azure_ai_trace_data_source_preview",
144+
"trace_source": trace_source,
145+
}
146+
147+
eval_run = client.evals.runs.create(
148+
eval_id=eval_object.id,
149+
name="trace-evaluation-agent-smart-filter-run",
150+
data_source=data_source, # type: ignore
151+
)
152+
print(f"Evaluation run created (id: {eval_run.id})")
153+
154+
while True:
155+
run = client.evals.runs.retrieve(run_id=eval_run.id, eval_id=eval_object.id)
156+
if run.status in ("completed", "failed"):
157+
break
158+
print(f"Waiting for eval run to complete... current status: {run.status}")
159+
time.sleep(5)
160+
161+
if run.status == "completed":
162+
print("\n✓ Evaluation run completed successfully!")
163+
print(f"Result Counts: {run.result_counts}")
164+
165+
output_items = list(client.evals.runs.output_items.list(run_id=run.id, eval_id=eval_object.id))
166+
print(f"\nOUTPUT ITEMS (Total: {len(output_items)})")
167+
print(f"{'-'*60}")
168+
pprint(output_items)
169+
print(f"{'-'*60}")
170+
171+
print(f"\nEval Run Report URL: {run.report_url}")
172+
else:
173+
print(f"\n✗ Evaluation run failed: {run.error}")
174+
175+
client.evals.delete(eval_id=eval_object.id)
176+
print("Evaluation deleted")

0 commit comments

Comments
 (0)