1- """Synchronous control surface used by ``/api/runs`` HTTP handlers .
1+ """Synchronous control surface used by ``/api/runs`` and ``/api/evaluate`` .
22
33Wraps the :class:`agentevals.storage.repos.RunRepository` with submit
4- idempotency, list pagination, and the 409 spec-mismatch path.
5-
6- Also provides :meth:`RunService.record_completed_eval` for the
7- ``/api/evaluate`` path: that handler executes synchronously (the trace was
8- already supplied as multipart and the result is being streamed back over
9- SSE), so we synthesize a Run row for visibility in run history rather than
10- queueing work for the worker.
4+ idempotency, list pagination, and the 409 spec-mismatch path. Also exposes
5+ :meth:`RunService.record_eval_run` for the ``/api/evaluate`` path, which
6+ executes synchronously and synthesizes a Run row for visibility in run
7+ history rather than queueing work for the worker.
118"""
129
1310from __future__ import annotations
1411
15- import json
1612import logging
1713from datetime import datetime , timezone
14+ from typing import Any
1815from uuid import UUID , uuid4
1916
2017from ..config import EvalParams
2118from ..runner import RunResult
22- from ..storage .models import Run , RunSpec , RunStatus
19+ from ..storage .models import Run , RunSpec , RunStatus , TraceTarget
2320from ..storage .repos import ResultRepository , RunRepository
2421from .result_builder import build_results , summarize_run_result
2522
2623logger = logging .getLogger (__name__ )
2724
2825
29- def _now () -> datetime :
30- return datetime .now (timezone .utc )
31-
32-
3326class RunSubmitConflict (Exception ):
3427 """Raised when a re-submission's spec differs from the persisted one.
3528
@@ -54,7 +47,7 @@ async def submit(self, *, run_id: UUID | None, spec: RunSpec) -> Run:
5447 spec = spec ,
5548 )
5649 persisted = await self ._runs .create (run )
57- if persisted .run_id == run .run_id and not _specs_equal ( persisted .spec , spec ) :
50+ if persisted .run_id == run .run_id and persisted .spec != spec :
5851 raise RunSubmitConflict (persisted )
5952 return persisted
6053
@@ -76,30 +69,46 @@ async def list_results(self, run_id: UUID):
7669 async def cancel (self , run_id : UUID ) -> bool :
7770 return await self ._runs .cancel (run_id )
7871
79- async def record_completed_eval (
72+ async def record_eval_run (
8073 self ,
8174 * ,
82- spec : RunSpec ,
8375 params : EvalParams ,
76+ eval_set_dict : dict [str , Any ] | None ,
77+ trace_format : str | None ,
78+ upload_filenames : list [str ] | None ,
8479 run_result : RunResult ,
8580 ) -> Run :
86- """Persist a synchronously-completed eval as a Run row plus Result rows.
87-
88- The run is created already in ``running`` state (so the row passes the
89- ``run_running_has_worker`` check is sidestepped via a synthetic worker
90- id), then transitioned to a terminal state in the same call. Two
91- writes per eval, but using the public :class:`RunRepository` API
92- avoids leaking an executor-only schema requirement into this layer.
81+ """Persist a synchronously-completed ``/api/evaluate`` call as a Run
82+ row plus Result rows.
83+
84+ Builds an ``uploaded`` :class:`TraceTarget` from the request metadata,
85+ creates a queued run, persists results, then transitions the run to
86+ a terminal status. Two writes (create + update_status), but the
87+ public :class:`RunRepository` API stays clean of executor-only
88+ schema knowledge.
9389 """
90+ filenames = list (upload_filenames or [])
91+ target = TraceTarget (
92+ kind = "uploaded" ,
93+ trace_format = trace_format if trace_format in ("jaeger-json" , "otlp-json" ) else None ,
94+ trace_count = len (filenames ),
95+ trace_files = filenames ,
96+ )
97+ spec = RunSpec (
98+ approach = "trace_replay" ,
99+ target = target ,
100+ eval_config = params .model_dump (by_alias = False ),
101+ eval_set = eval_set_dict ,
102+ )
103+
94104 run_id = uuid4 ()
95- worker_id = "sync:/api/evaluate"
96105 run = Run (
97106 run_id = run_id ,
98107 status = RunStatus .QUEUED ,
99108 spec = spec ,
100109 attempt = 1 ,
101- worker_id = worker_id ,
102- started_at = _now ( ),
110+ worker_id = "sync:/api/evaluate" ,
111+ started_at = datetime . now ( timezone . utc ),
103112 )
104113 await self ._runs .create (run )
105114
@@ -117,11 +126,3 @@ async def record_completed_eval(
117126 run .status = RunStatus .SUCCEEDED
118127 run .summary = summary
119128 return run
120-
121-
122- def _specs_equal (a : RunSpec , b : RunSpec ) -> bool :
123- """Deep equality on the JSON projection. Pydantic equality compares model
124- instances by class identity, which trips up the round-trip from JSONB."""
125- return json .dumps (a .model_dump (by_alias = False ), sort_keys = True ) == json .dumps (
126- b .model_dump (by_alias = False ), sort_keys = True
127- )
0 commit comments