Skip to content

Commit 37c6374

Browse files
Merge pull request #27 from agentevals-dev/chore/api-cleanup
API cleanup
2 parents 916c7e3 + e60488d commit 37c6374

13 files changed

Lines changed: 1440 additions & 406 deletions

File tree

src/agentevals/api/debug_routes.py

Lines changed: 18 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
from agentevals import __version__
2121
from ..utils.log_buffer import log_buffer
22+
from .models import StandardResponse, DebugLoadData, WSSessionStartedEvent, WSSessionCompleteEvent, SessionInfo
2223

2324
if TYPE_CHECKING:
2425
from ..streaming.ws_server import StreamingTraceManager
@@ -189,7 +190,7 @@ async def create_debug_bundle(diagnostics: FrontendDiagnostics):
189190
)
190191

191192

192-
@debug_router.post("/load")
193+
@debug_router.post("/load", response_model=StandardResponse[DebugLoadData])
193194
async def load_debug_bundle(file: UploadFile = FastAPIFile(...)):
194195
if not _trace_manager:
195196
raise HTTPException(
@@ -240,27 +241,27 @@ async def load_debug_bundle(file: UploadFile = FastAPIFile(...)):
240241

241242
_trace_manager.sessions[session.session_id] = session
242243

243-
await _trace_manager.broadcast_to_ui({
244-
"type": "session_started",
245-
"session": {
246-
"sessionId": session.session_id,
247-
"traceId": session.trace_id,
248-
"evalSetId": session.eval_set_id,
249-
"metadata": session.metadata,
250-
"startedAt": session.started_at.isoformat(),
251-
},
252-
})
244+
await _trace_manager.broadcast_to_ui(WSSessionStartedEvent(
245+
session=SessionInfo(
246+
session_id=session.session_id,
247+
trace_id=session.trace_id,
248+
eval_set_id=session.eval_set_id,
249+
span_count=len(session.spans),
250+
is_complete=False,
251+
started_at=session.started_at.isoformat(),
252+
metadata=session.metadata,
253+
),
254+
).model_dump(by_alias=True))
253255

254256
invocations_data = await _trace_manager._extract_invocations(session)
255257
await _trace_manager._save_spans_to_temp_file(session)
256258

257-
await _trace_manager.broadcast_to_ui({
258-
"type": "session_complete",
259-
"sessionId": session.session_id,
260-
"invocations": invocations_data,
261-
})
259+
await _trace_manager.broadcast_to_ui(WSSessionCompleteEvent(
260+
session_id=session.session_id,
261+
invocations=invocations_data,
262+
).model_dump(by_alias=True))
262263

263264
loaded.append(session.session_id)
264265
logger.info("Loaded session from bug report: %s", session.session_id)
265266

266-
return {"loaded_sessions": loaded, "count": len(loaded)}
267+
return StandardResponse(data=DebugLoadData(loaded_sessions=loaded, count=len(loaded)))

src/agentevals/api/models.py

Lines changed: 204 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,204 @@
1+
"""Pydantic response and event models for the agentevals API.
2+
3+
Provides a StandardResponse[T] envelope, typed REST response models,
4+
SSE evaluation event models, and WebSocket/UI broadcast event models.
5+
"""
6+
7+
from __future__ import annotations
8+
9+
from typing import Any, Generic, TypeVar
10+
11+
from pydantic import BaseModel, ConfigDict, Field
12+
from pydantic.alias_generators import to_camel
13+
14+
T = TypeVar("T")
15+
16+
17+
class CamelModel(BaseModel):
18+
model_config = ConfigDict(
19+
alias_generator=to_camel,
20+
populate_by_name=True,
21+
)
22+
23+
24+
class StandardResponse(CamelModel, Generic[T]):
25+
data: T
26+
error: str | None = None
27+
28+
29+
# ---------------------------------------------------------------------------
30+
# REST response data models
31+
# ---------------------------------------------------------------------------
32+
33+
34+
class HealthData(CamelModel):
35+
status: str
36+
version: str
37+
38+
39+
class ApiKeyStatus(CamelModel):
40+
google: bool
41+
anthropic: bool
42+
openai: bool
43+
44+
45+
class ConfigData(CamelModel):
46+
api_keys: ApiKeyStatus
47+
48+
49+
class MetricInfo(CamelModel):
50+
name: str
51+
category: str
52+
requires_eval_set: bool
53+
requires_llm: bool = Field(alias="requiresLLM")
54+
requires_gcp: bool = Field(alias="requiresGCP")
55+
requires_rubrics: bool
56+
description: str
57+
working: bool
58+
59+
60+
class EvalSetValidation(CamelModel):
61+
valid: bool
62+
eval_set_id: str | None = None
63+
num_cases: int | None = None
64+
errors: list[str] = Field(default_factory=list)
65+
66+
67+
class SessionInfo(CamelModel):
68+
session_id: str
69+
trace_id: str
70+
eval_set_id: str | None = None
71+
span_count: int
72+
is_complete: bool
73+
started_at: str
74+
metadata: dict[str, Any] = Field(default_factory=dict)
75+
invocations: list[dict[str, Any]] | None = None
76+
77+
78+
class CreateEvalSetData(CamelModel):
79+
eval_set: dict[str, Any]
80+
num_invocations: int
81+
82+
83+
class SessionEvalResult(CamelModel):
84+
session_id: str
85+
trace_id: str | None = None
86+
num_invocations: int | None = None
87+
metric_results: list[dict[str, Any]] | None = None
88+
error: str | None = None
89+
90+
91+
class EvaluateSessionsData(CamelModel):
92+
golden_session_id: str
93+
eval_set_id: str
94+
results: list[SessionEvalResult]
95+
96+
97+
class PrepareEvaluationData(CamelModel):
98+
eval_set_url: str
99+
trace_urls: list[str]
100+
num_traces: int
101+
102+
103+
class GetTraceData(CamelModel):
104+
session_id: str
105+
trace_content: str
106+
num_spans: int
107+
108+
109+
class DebugLoadData(CamelModel):
110+
loaded_sessions: list[str]
111+
count: int
112+
113+
114+
# ---------------------------------------------------------------------------
115+
# SSE evaluation event models
116+
# ---------------------------------------------------------------------------
117+
118+
119+
class SSEProgressEvent(CamelModel):
120+
message: str
121+
122+
123+
class SSETraceProgress(CamelModel):
124+
trace_id: str
125+
partial_result: dict[str, Any]
126+
127+
128+
class SSETraceProgressEvent(CamelModel):
129+
trace_progress: SSETraceProgress
130+
131+
132+
class SSEPerformanceMetricsEvent(CamelModel):
133+
trace_id: str
134+
performance_metrics: dict[str, Any]
135+
trace_metadata: dict[str, Any] | None = None
136+
137+
138+
class SSEDoneEvent(CamelModel):
139+
done: bool = True
140+
result: dict[str, Any]
141+
142+
143+
class SSEErrorEvent(CamelModel):
144+
error: str
145+
146+
147+
# ---------------------------------------------------------------------------
148+
# WebSocket / UI broadcast event models
149+
# ---------------------------------------------------------------------------
150+
151+
152+
class WSSessionStartedEvent(CamelModel):
153+
type: str = "session_started"
154+
session: SessionInfo
155+
156+
157+
class WSSessionCompleteEvent(CamelModel):
158+
type: str = "session_complete"
159+
session_id: str
160+
invocations: list[dict[str, Any]]
161+
162+
163+
class WSSpanReceivedEvent(CamelModel):
164+
type: str = "span_received"
165+
session_id: str
166+
span: dict[str, Any]
167+
168+
169+
class WSUserInputEvent(CamelModel):
170+
type: str = "user_input"
171+
session_id: str
172+
invocation_id: str
173+
text: str
174+
timestamp: float
175+
176+
177+
class WSAgentResponseEvent(CamelModel):
178+
type: str = "agent_response"
179+
session_id: str
180+
invocation_id: str
181+
text: str
182+
timestamp: float
183+
184+
185+
class WSToolCallEvent(CamelModel):
186+
type: str = "tool_call"
187+
session_id: str
188+
invocation_id: str
189+
tool_call: dict[str, Any]
190+
timestamp: float
191+
192+
193+
class WSTokenUpdateEvent(CamelModel):
194+
type: str = "token_update"
195+
session_id: str
196+
invocation_id: str | None = None
197+
input_tokens: int
198+
output_tokens: int
199+
model: str | None = None
200+
201+
202+
class WSErrorEvent(CamelModel):
203+
type: str = "error"
204+
message: str

src/agentevals/api/otlp_routes.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from fastapi import APIRouter, Request, Response
1919

2020
from ..extraction import flatten_otlp_attributes
21+
from .models import WSSpanReceivedEvent
2122
from ..trace_attrs import (
2223
OTEL_GENAI_INPUT_MESSAGES,
2324
OTEL_GENAI_OUTPUT_MESSAGES,
@@ -131,11 +132,10 @@ async def _process_traces(body: dict) -> None:
131132
update["sessionId"] = session.session_id
132133
await _trace_manager.broadcast_to_ui(update)
133134

134-
await _trace_manager.broadcast_to_ui({
135-
"type": "span_received",
136-
"sessionId": session.session_id,
137-
"span": span,
138-
})
135+
await _trace_manager.broadcast_to_ui(WSSpanReceivedEvent(
136+
session_id=session.session_id,
137+
span=span,
138+
).model_dump(by_alias=True))
139139

140140
_trace_manager.reset_idle_timer(session.session_id)
141141

0 commit comments

Comments
 (0)