agentevals-dev
diff --git a/‎src/agentevals/api/debug_routes.py‎
Lines changed: 18 additions & 17 deletions b/‎src/agentevals/api/debug_routes.py‎
Lines changed: 18 additions & 17 deletions
diff --git a/‎src/agentevals/api/models.py‎
Lines changed: 204 additions & 0 deletions b/‎src/agentevals/api/models.py‎
Lines changed: 204 additions & 0 deletions
diff --git a/‎src/agentevals/api/otlp_routes.py‎
Lines changed: 5 additions & 5 deletions b/‎src/agentevals/api/otlp_routes.py‎
Lines changed: 5 additions & 5 deletions
@@ -19,6 +19,7 @@
 
 from agentevals import __version__
 from ..utils.log_buffer import log_buffer
+from .models import StandardResponse, DebugLoadData, WSSessionStartedEvent, WSSessionCompleteEvent, SessionInfo
 
 if TYPE_CHECKING:
     from ..streaming.ws_server import StreamingTraceManager
@@ -189,7 +190,7 @@ async def create_debug_bundle(diagnostics: FrontendDiagnostics):
     )
 
 
-@debug_router.post("/load")
+@debug_router.post("/load", response_model=StandardResponse[DebugLoadData])
 async def load_debug_bundle(file: UploadFile = FastAPIFile(...)):
     if not _trace_manager:
         raise HTTPException(
@@ -240,27 +241,27 @@ async def load_debug_bundle(file: UploadFile = FastAPIFile(...)):
 
         _trace_manager.sessions[session.session_id] = session
 
-        await _trace_manager.broadcast_to_ui({
-            "type": "session_started",
-            "session": {
-                "sessionId": session.session_id,
-                "traceId": session.trace_id,
-                "evalSetId": session.eval_set_id,
-                "metadata": session.metadata,
-                "startedAt": session.started_at.isoformat(),
-            },
-        })
+        await _trace_manager.broadcast_to_ui(WSSessionStartedEvent(
+            session=SessionInfo(
+                session_id=session.session_id,
+                trace_id=session.trace_id,
+                eval_set_id=session.eval_set_id,
+                span_count=len(session.spans),
+                is_complete=False,
+                started_at=session.started_at.isoformat(),
+                metadata=session.metadata,
+            ),
+        ).model_dump(by_alias=True))
 
         invocations_data = await _trace_manager._extract_invocations(session)
         await _trace_manager._save_spans_to_temp_file(session)
 
-        await _trace_manager.broadcast_to_ui({
-            "type": "session_complete",
-            "sessionId": session.session_id,
-            "invocations": invocations_data,
-        })
+        await _trace_manager.broadcast_to_ui(WSSessionCompleteEvent(
+            session_id=session.session_id,
+            invocations=invocations_data,
+        ).model_dump(by_alias=True))
 
         loaded.append(session.session_id)
         logger.info("Loaded session from bug report: %s", session.session_id)
 
-    return {"loaded_sessions": loaded, "count": len(loaded)}
+    return StandardResponse(data=DebugLoadData(loaded_sessions=loaded, count=len(loaded)))
@@ -0,0 +1,204 @@
+"""Pydantic response and event models for the agentevals API.
+
+Provides a StandardResponse[T] envelope, typed REST response models,
+SSE evaluation event models, and WebSocket/UI broadcast event models.
+"""
+
+from __future__ import annotations
+
+from typing import Any, Generic, TypeVar
+
+from pydantic import BaseModel, ConfigDict, Field
+from pydantic.alias_generators import to_camel
+
+T = TypeVar("T")
+
+
+class CamelModel(BaseModel):
+    model_config = ConfigDict(
+        alias_generator=to_camel,
+        populate_by_name=True,
+    )
+
+
+class StandardResponse(CamelModel, Generic[T]):
+    data: T
+    error: str | None = None
+
+
+# ---------------------------------------------------------------------------
+# REST response data models
+# ---------------------------------------------------------------------------
+
+
+class HealthData(CamelModel):
+    status: str
+    version: str
+
+
+class ApiKeyStatus(CamelModel):
+    google: bool
+    anthropic: bool
+    openai: bool
+
+
+class ConfigData(CamelModel):
+    api_keys: ApiKeyStatus
+
+
+class MetricInfo(CamelModel):
+    name: str
+    category: str
+    requires_eval_set: bool
+    requires_llm: bool = Field(alias="requiresLLM")
+    requires_gcp: bool = Field(alias="requiresGCP")
+    requires_rubrics: bool
+    description: str
+    working: bool
+
+
+class EvalSetValidation(CamelModel):
+    valid: bool
+    eval_set_id: str | None = None
+    num_cases: int | None = None
+    errors: list[str] = Field(default_factory=list)
+
+
+class SessionInfo(CamelModel):
+    session_id: str
+    trace_id: str
+    eval_set_id: str | None = None
+    span_count: int
+    is_complete: bool
+    started_at: str
+    metadata: dict[str, Any] = Field(default_factory=dict)
+    invocations: list[dict[str, Any]] | None = None
+
+
+class CreateEvalSetData(CamelModel):
+    eval_set: dict[str, Any]
+    num_invocations: int
+
+
+class SessionEvalResult(CamelModel):
+    session_id: str
+    trace_id: str | None = None
+    num_invocations: int | None = None
+    metric_results: list[dict[str, Any]] | None = None
+    error: str | None = None
+
+
+class EvaluateSessionsData(CamelModel):
+    golden_session_id: str
+    eval_set_id: str
+    results: list[SessionEvalResult]
+
+
+class PrepareEvaluationData(CamelModel):
+    eval_set_url: str
+    trace_urls: list[str]
+    num_traces: int
+
+
+class GetTraceData(CamelModel):
+    session_id: str
+    trace_content: str
+    num_spans: int
+
+
+class DebugLoadData(CamelModel):
+    loaded_sessions: list[str]
+    count: int
+
+
+# ---------------------------------------------------------------------------
+# SSE evaluation event models
+# ---------------------------------------------------------------------------
+
+
+class SSEProgressEvent(CamelModel):
+    message: str
+
+
+class SSETraceProgress(CamelModel):
+    trace_id: str
+    partial_result: dict[str, Any]
+
+
+class SSETraceProgressEvent(CamelModel):
+    trace_progress: SSETraceProgress
+
+
+class SSEPerformanceMetricsEvent(CamelModel):
+    trace_id: str
+    performance_metrics: dict[str, Any]
+    trace_metadata: dict[str, Any] | None = None
+
+
+class SSEDoneEvent(CamelModel):
+    done: bool = True
+    result: dict[str, Any]
+
+
+class SSEErrorEvent(CamelModel):
+    error: str
+
+
+# ---------------------------------------------------------------------------
+# WebSocket / UI broadcast event models
+# ---------------------------------------------------------------------------
+
+
+class WSSessionStartedEvent(CamelModel):
+    type: str = "session_started"
+    session: SessionInfo
+
+
+class WSSessionCompleteEvent(CamelModel):
+    type: str = "session_complete"
+    session_id: str
+    invocations: list[dict[str, Any]]
+
+
+class WSSpanReceivedEvent(CamelModel):
+    type: str = "span_received"
+    session_id: str
+    span: dict[str, Any]
+
+
+class WSUserInputEvent(CamelModel):
+    type: str = "user_input"
+    session_id: str
+    invocation_id: str
+    text: str
+    timestamp: float
+
+
+class WSAgentResponseEvent(CamelModel):
+    type: str = "agent_response"
+    session_id: str
+    invocation_id: str
+    text: str
+    timestamp: float
+
+
+class WSToolCallEvent(CamelModel):
+    type: str = "tool_call"
+    session_id: str
+    invocation_id: str
+    tool_call: dict[str, Any]
+    timestamp: float
+
+
+class WSTokenUpdateEvent(CamelModel):
+    type: str = "token_update"
+    session_id: str
+    invocation_id: str | None = None
+    input_tokens: int
+    output_tokens: int
+    model: str | None = None
+
+
+class WSErrorEvent(CamelModel):
+    type: str = "error"
+    message: str
@@ -18,6 +18,7 @@
 from fastapi import APIRouter, Request, Response
 
 from ..extraction import flatten_otlp_attributes
+from .models import WSSpanReceivedEvent
 from ..trace_attrs import (
     OTEL_GENAI_INPUT_MESSAGES,
     OTEL_GENAI_OUTPUT_MESSAGES,
@@ -131,11 +132,10 @@ async def _process_traces(body: dict) -> None:
                         update["sessionId"] = session.session_id
                         await _trace_manager.broadcast_to_ui(update)
 
-                await _trace_manager.broadcast_to_ui({
-                    "type": "span_received",
-                    "sessionId": session.session_id,
-                    "span": span,
-                })
+                await _trace_manager.broadcast_to_ui(WSSpanReceivedEvent(
+                    session_id=session.session_id,
+                    span=span,
+                ).model_dump(by_alias=True))
 
                 _trace_manager.reset_idle_timer(session.session_id)