Skip to content

Commit 15437e7

Browse files
committed
fix: linting errors and integration tests
1 parent 8211ed3 commit 15437e7

7 files changed

Lines changed: 303 additions & 12 deletions

File tree

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
[project]
2+
name = "eval-spans-testcase"
3+
version = "0.1.0"
4+
description = "E2E test for verifying eval spans (Evaluation Set Run, Evaluation, Evaluator)"
5+
requires-python = ">=3.11"
6+
dependencies = [
7+
"uipath",
8+
]
9+
10+
[tool.uv.sources]
11+
uipath = { path = "../../", editable = true }
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
#!/bin/bash
2+
set -e
3+
4+
echo "=== E2E Test: Eval Spans Verification ==="
5+
6+
echo "Syncing dependencies..."
7+
uv sync
8+
9+
echo "Authenticating with UiPath..."
10+
uv run uipath auth --client-id="$CLIENT_ID" --client-secret="$CLIENT_SECRET" --base-url="$BASE_URL"
11+
12+
echo "Running evaluations with trace capture..."
13+
# Run eval with trace file to capture spans
14+
uv run uipath eval main ../../samples/calculator/evaluations/eval-sets/default.json \
15+
--no-report \
16+
--trace-file __uipath/traces.jsonl
17+
18+
echo "Test completed successfully!"
Lines changed: 252 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,252 @@
1+
"""E2E assertions for eval spans testcase.
2+
3+
This script validates that the new eval spans are created correctly:
4+
1. "Evaluation Set Run" span with span_type: "eval_set_run"
5+
2. "Evaluation" spans with span_type: "evaluation"
6+
3. "Evaluator: {name}" spans with span_type: "evaluator"
7+
"""
8+
9+
import json
10+
import os
11+
import sys
12+
from typing import Any
13+
14+
15+
def load_traces(traces_file: str) -> list[dict[str, Any]]:
16+
"""Load traces from a JSONL file."""
17+
traces = []
18+
with open(traces_file, "r", encoding="utf-8") as f:
19+
for line in f:
20+
if line.strip():
21+
traces.append(json.loads(line))
22+
return traces
23+
24+
25+
def get_attributes(span: dict[str, Any]) -> dict[str, Any]:
26+
"""Get attributes from a span."""
27+
return span.get("attributes", {})
28+
29+
30+
def find_spans_by_type(
31+
traces: list[dict[str, Any]], span_type: str
32+
) -> list[dict[str, Any]]:
33+
"""Find all spans with the given span_type attribute."""
34+
return [
35+
trace for trace in traces if get_attributes(trace).get("span_type") == span_type
36+
]
37+
38+
39+
def find_spans_by_name(traces: list[dict[str, Any]], name: str) -> list[dict[str, Any]]:
40+
"""Find all spans with the given name."""
41+
return [trace for trace in traces if trace.get("name") == name]
42+
43+
44+
def find_spans_by_name_prefix(
45+
traces: list[dict[str, Any]], prefix: str
46+
) -> list[dict[str, Any]]:
47+
"""Find all spans whose name starts with the given prefix."""
48+
return [trace for trace in traces if trace.get("name", "").startswith(prefix)]
49+
50+
51+
def assert_eval_set_run_span(traces: list[dict[str, Any]]) -> None:
52+
"""Assert that the Evaluation Set Run span exists with correct attributes."""
53+
print("\n--- Checking 'Evaluation Set Run' span ---")
54+
55+
# Find by span_type
56+
eval_set_run_spans = find_spans_by_type(traces, "eval_set_run")
57+
58+
assert len(eval_set_run_spans) >= 1, (
59+
"Expected at least 1 'eval_set_run' span, found 0. "
60+
"Spans with span_type attribute: "
61+
f"{[get_attributes(t).get('span_type') for t in traces if get_attributes(t).get('span_type')]}"
62+
)
63+
64+
print(f" Found {len(eval_set_run_spans)} eval_set_run span(s)")
65+
66+
for span in eval_set_run_spans:
67+
name = span.get("name")
68+
attrs = get_attributes(span)
69+
70+
# Check span name
71+
assert name == "Evaluation Set Run", (
72+
f"Expected span name 'Evaluation Set Run', got '{name}'"
73+
)
74+
print(f" Name: {name}")
75+
76+
# Check span_type attribute
77+
assert attrs.get("span_type") == "eval_set_run", (
78+
f"Expected span_type 'eval_set_run', got '{attrs.get('span_type')}'"
79+
)
80+
print(f" span_type: {attrs.get('span_type')}")
81+
82+
# Check eval_set_run_id is present (may be execution_id fallback)
83+
if "eval_set_run_id" in attrs:
84+
print(f" eval_set_run_id: {attrs.get('eval_set_run_id')}")
85+
86+
print("Evaluation Set Run span assertion passed")
87+
88+
89+
def assert_evaluation_spans(traces: list[dict[str, Any]]) -> None:
90+
"""Assert that Evaluation spans exist with correct attributes."""
91+
print("\n--- Checking 'Evaluation' spans ---")
92+
93+
# Find by span_type
94+
evaluation_spans = find_spans_by_type(traces, "evaluation")
95+
96+
assert len(evaluation_spans) >= 1, "Expected at least 1 'evaluation' span, found 0"
97+
98+
print(f" Found {len(evaluation_spans)} evaluation span(s)")
99+
100+
for i, span in enumerate(evaluation_spans):
101+
name = span.get("name")
102+
attrs = get_attributes(span)
103+
104+
print(f"\n Evaluation span {i + 1}:")
105+
106+
# Check span name
107+
assert name == "Evaluation", f"Expected span name 'Evaluation', got '{name}'"
108+
print(f" Name: {name}")
109+
110+
# Check span_type attribute
111+
assert attrs.get("span_type") == "evaluation", (
112+
f"Expected span_type 'evaluation', got '{attrs.get('span_type')}'"
113+
)
114+
print(f" span_type: {attrs.get('span_type')}")
115+
116+
# Check required attributes
117+
assert "execution.id" in attrs, (
118+
"Expected 'execution.id' attribute in Evaluation span"
119+
)
120+
print(f" execution.id: {attrs.get('execution.id')}")
121+
122+
assert "eval_item_id" in attrs, (
123+
"Expected 'eval_item_id' attribute in Evaluation span"
124+
)
125+
print(f" eval_item_id: {attrs.get('eval_item_id')}")
126+
127+
assert "eval_item_name" in attrs, (
128+
"Expected 'eval_item_name' attribute in Evaluation span"
129+
)
130+
print(f" eval_item_name: {attrs.get('eval_item_name')}")
131+
132+
print("\nEvaluation spans assertion passed")
133+
134+
135+
def assert_evaluator_spans(traces: list[dict[str, Any]]) -> None:
136+
"""Assert that Evaluator spans exist with correct attributes."""
137+
print("\n--- Checking 'Evaluator' spans ---")
138+
139+
# Find by span_type
140+
evaluator_spans = find_spans_by_type(traces, "evaluator")
141+
142+
assert len(evaluator_spans) >= 1, "Expected at least 1 'evaluator' span, found 0"
143+
144+
print(f" Found {len(evaluator_spans)} evaluator span(s)")
145+
146+
for i, span in enumerate(evaluator_spans):
147+
name = span.get("name")
148+
attrs = get_attributes(span)
149+
150+
print(f"\n Evaluator span {i + 1}:")
151+
152+
# Check span name starts with "Evaluator: "
153+
assert name and name.startswith("Evaluator: "), (
154+
f"Expected span name to start with 'Evaluator: ', got '{name}'"
155+
)
156+
print(f" Name: {name}")
157+
158+
# Check span_type attribute
159+
assert attrs.get("span_type") == "evaluator", (
160+
f"Expected span_type 'evaluator', got '{attrs.get('span_type')}'"
161+
)
162+
print(f" span_type: {attrs.get('span_type')}")
163+
164+
# Check required attributes
165+
assert "evaluator_id" in attrs, (
166+
"Expected 'evaluator_id' attribute in Evaluator span"
167+
)
168+
print(f" evaluator_id: {attrs.get('evaluator_id')}")
169+
170+
assert "evaluator_name" in attrs, (
171+
"Expected 'evaluator_name' attribute in Evaluator span"
172+
)
173+
print(f" evaluator_name: {attrs.get('evaluator_name')}")
174+
175+
assert "eval_item_id" in attrs, (
176+
"Expected 'eval_item_id' attribute in Evaluator span"
177+
)
178+
print(f" eval_item_id: {attrs.get('eval_item_id')}")
179+
180+
print("\nEvaluator spans assertion passed")
181+
182+
183+
def assert_span_hierarchy(traces: list[dict[str, Any]]) -> None:
184+
"""Assert the span hierarchy is correct."""
185+
print("\n--- Checking span hierarchy ---")
186+
187+
# Build span lookup by span_id
188+
span_by_id: dict[str, dict[str, Any]] = {}
189+
for trace in traces:
190+
context = trace.get("context", {})
191+
span_id = context.get("span_id")
192+
if span_id:
193+
span_by_id[span_id] = trace
194+
195+
# Get spans by type
196+
eval_set_run_spans = find_spans_by_type(traces, "eval_set_run")
197+
evaluation_spans = find_spans_by_type(traces, "evaluation")
198+
evaluator_spans = find_spans_by_type(traces, "evaluator")
199+
200+
# Get eval_set_run span_id
201+
if eval_set_run_spans:
202+
eval_set_run_span_id = eval_set_run_spans[0].get("context", {}).get("span_id")
203+
print(f" EvalSetRun span_id: {eval_set_run_span_id}")
204+
205+
# Check Evaluation spans are children of EvalSetRun (through parent chain)
206+
# Note: In practice, there may be intermediate spans, so we just verify
207+
# the relationship exists through the trace
208+
print(f" Found {len(evaluation_spans)} Evaluation spans")
209+
print(f" Found {len(evaluator_spans)} Evaluator spans")
210+
211+
print("\nSpan hierarchy check passed")
212+
213+
214+
def main() -> None:
215+
"""Main assertion logic."""
216+
traces_file = "__uipath/traces.jsonl"
217+
218+
# Check if traces file exists
219+
if not os.path.isfile(traces_file):
220+
print(f"Traces file '{traces_file}' not found")
221+
sys.exit(1)
222+
223+
print(f"Loading traces from {traces_file}...")
224+
traces = load_traces(traces_file)
225+
print(f"Loaded {len(traces)} trace spans")
226+
227+
# Print all span names and types for debugging
228+
print("\n--- All spans ---")
229+
for i, trace in enumerate(traces):
230+
name = trace.get("name", "Unknown")
231+
attrs = get_attributes(trace)
232+
span_type = attrs.get("span_type", "N/A")
233+
print(f" {i + 1}. {name} (span_type: {span_type})")
234+
235+
# Run assertions
236+
try:
237+
assert_eval_set_run_span(traces)
238+
assert_evaluation_spans(traces)
239+
assert_evaluator_spans(traces)
240+
assert_span_hierarchy(traces)
241+
242+
print("\n" + "=" * 60)
243+
print("All eval span assertions passed!")
244+
print("=" * 60)
245+
246+
except AssertionError as e:
247+
print(f"\nAssertion failed: {e}")
248+
sys.exit(1)
249+
250+
251+
if __name__ == "__main__":
252+
main()
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
{
2+
"functions": {
3+
"main": "../../samples/calculator/main.py:main"
4+
}
5+
}

tests/cli/eval/test_eval_runtime_spans.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
class MockSpanContext:
2222
"""Mock span context manager for testing span creation."""
2323

24-
def __init__(self, name: str, attributes: Dict[str, Any]):
24+
def __init__(self, name: str, attributes: dict[str, Any] | None):
2525
self.name = name
2626
self.attributes = attributes or {}
2727
self.span = MagicMock(spec=Span)
@@ -40,7 +40,9 @@ class SpanCapturingTracer:
4040
def __init__(self):
4141
self.created_spans: List[Dict[str, Any]] = []
4242

43-
def start_as_current_span(self, name: str, attributes: Dict[str, Any] = None):
43+
def start_as_current_span(
44+
self, name: str, attributes: dict[str, Any] | None = None
45+
):
4446
"""Capture span creation and return a mock context manager."""
4547
span_info = {"name": name, "attributes": attributes or {}}
4648
self.created_spans.append(span_info)
@@ -415,7 +417,7 @@ def test_span_type_values_match_expected(self):
415417
"Evaluator": "evaluator",
416418
}
417419

418-
for span_name, span_type in expected_span_types.items():
420+
for _, span_type in expected_span_types.items():
419421
assert isinstance(span_type, str)
420422
assert span_type.islower() or "_" in span_type
421423

tests/cli/eval/test_eval_telemetry.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""Tests for EvalTelemetrySubscriber functionality."""
22

33
import os
4+
from typing import Any
45
from unittest.mock import patch
56

67
import pytest
@@ -93,7 +94,7 @@ def _create_eval_set_run_created_event(
9394
eval_set_run_id: str | None = "run-456",
9495
entrypoint: str = "agent.py",
9596
no_of_evals: int = 5,
96-
evaluators: list = None,
97+
evaluators: list[Any] | None = None,
9798
) -> EvalSetRunCreatedEvent:
9899
"""Helper to create EvalSetRunCreatedEvent."""
99100
return EvalSetRunCreatedEvent(
@@ -212,7 +213,7 @@ def _create_eval_run_updated_event(
212213
eval_item_name: str = "Test Eval",
213214
success: bool = True,
214215
agent_execution_time: float = 1.5,
215-
eval_results: list = None,
216+
eval_results: list[Any] | None = None,
216217
exception_details: EvalItemExceptionDetails | None = None,
217218
) -> EvalRunUpdatedEvent:
218219
"""Helper to create EvalRunUpdatedEvent."""
@@ -327,7 +328,7 @@ class TestEvalSetRunUpdated:
327328
def _create_eval_set_run_updated_event(
328329
self,
329330
execution_id: str = "exec-123",
330-
evaluator_scores: dict = None,
331+
evaluator_scores: dict[str, Any] | None = None,
331332
success: bool = True,
332333
) -> EvalSetRunUpdatedEvent:
333334
"""Helper to create EvalSetRunUpdatedEvent."""
@@ -409,7 +410,7 @@ class TestEnrichProperties:
409410
def test_enrich_properties_adds_source(self):
410411
"""Test that source and application name are always added."""
411412
subscriber = EvalTelemetrySubscriber()
412-
properties = {}
413+
properties: dict[str, Any] = {}
413414

414415
subscriber._enrich_properties(properties)
415416

@@ -419,7 +420,7 @@ def test_enrich_properties_adds_source(self):
419420
def test_enrich_properties_adds_env_vars(self):
420421
"""Test that environment variables are added when present."""
421422
subscriber = EvalTelemetrySubscriber()
422-
properties = {}
423+
properties: dict[str, Any] = {}
423424

424425
with patch.dict(
425426
os.environ,
@@ -440,7 +441,7 @@ def test_enrich_properties_adds_env_vars(self):
440441
def test_enrich_properties_skips_missing_env_vars(self):
441442
"""Test that missing environment variables are not added."""
442443
subscriber = EvalTelemetrySubscriber()
443-
properties = {}
444+
properties: dict[str, Any] = {}
444445

445446
with patch.dict(os.environ, {}, clear=True):
446447
# Remove env vars if they exist

0 commit comments

Comments
 (0)