Skip to content

Commit 73a110c

Browse files
author
Symphony Agent
committed
Merge remote-tracking branch 'origin/dev' into symphony/SYM-35
# Conflicts: # src/runtime/server/controllers/run-controller.ts
2 parents d333b31 + 71872c9 commit 73a110c

16 files changed

Lines changed: 1094 additions & 95 deletions

File tree

bun.lock

Lines changed: 15 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

dashboard/dist/index.html

Lines changed: 10 additions & 10 deletions
Large diffs are not rendered by default.

dashboard/src/App.tsx

Lines changed: 48 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import {
33
useCallback,
44
useEffect,
55
useMemo,
6+
useRef,
67
useState,
78
} from "react";
89
import { AveragesTable } from "./components/AveragesTable.tsx";
@@ -694,7 +695,7 @@ function RunsView({ request }: { request: ServerRequest }) {
694695
);
695696
}
696697

697-
function RunDetailView({
698+
export function RunDetailView({
698699
runId,
699700
request,
700701
token,
@@ -707,50 +708,73 @@ function RunDetailView({
707708
const [error, setError] = useState<string | null>(null);
708709
const [selectedOrdinal, setSelectedOrdinal] = useState<number | null>(null);
709710
const [cancelling, setCancelling] = useState(false);
711+
const requestRef = useRef(request);
712+
const activeRunIdRef = useRef(runId);
713+
const mountedRef = useRef(true);
710714

711-
const loadRun = useCallback(() => {
712-
return request<RunResponse>(`/api/runs/${encodeURIComponent(runId)}`)
713-
.then((data) => {
714-
setRun(data.run);
715-
setError(null);
716-
})
717-
.catch((err) => {
718-
setError(err instanceof Error ? err.message : String(err));
719-
});
720-
}, [request, runId]);
715+
requestRef.current = request;
716+
activeRunIdRef.current = runId;
721717

722718
useEffect(() => {
723-
let cancelled = false;
724-
loadRun().then(() => {
725-
if (cancelled) return;
726-
});
719+
mountedRef.current = true;
727720
return () => {
728-
cancelled = true;
721+
mountedRef.current = false;
729722
};
723+
}, []);
724+
725+
const loadRun = useCallback(async () => {
726+
const expectedRunId = runId;
727+
try {
728+
const data = await requestRef.current<RunResponse>(
729+
`/api/runs/${encodeURIComponent(expectedRunId)}`,
730+
);
731+
if (!mountedRef.current || activeRunIdRef.current !== expectedRunId) {
732+
return;
733+
}
734+
setRun(data.run);
735+
setError(null);
736+
} catch (err) {
737+
if (!mountedRef.current || activeRunIdRef.current !== expectedRunId) {
738+
return;
739+
}
740+
setError(err instanceof Error ? err.message : String(err));
741+
}
742+
}, [runId]);
743+
744+
const loadRunRef = useRef(loadRun);
745+
loadRunRef.current = loadRun;
746+
747+
useEffect(() => {
748+
setRun(null);
749+
setError(null);
750+
setSelectedOrdinal(null);
751+
void loadRun();
730752
}, [loadRun]);
731753

732754
useEffect(() => {
733-
if (!run || run.status !== "running") {
734-
return;
735-
}
736755
const tokenQuery = token
737756
? `?access_token=${encodeURIComponent(token)}`
738757
: "";
739758
const events = new EventSource(
740759
`/api/runs/${encodeURIComponent(runId)}/events${tokenQuery}`,
741760
);
742761
const refetch = () => {
743-
void loadRun();
762+
void loadRunRef.current();
763+
};
764+
const refetchAndClose = () => {
765+
refetch();
766+
events.close();
744767
};
768+
events.addEventListener("snapshot", refetch);
745769
events.addEventListener("suite_started", refetch);
746770
events.addEventListener("scenario_started", refetch);
747771
events.addEventListener("scenario_finished", refetch);
748772
events.addEventListener("scenario_error", refetch);
749-
events.addEventListener("run_finished", refetch);
750-
events.addEventListener("run_cancelled", refetch);
751-
events.addEventListener("run_error", refetch);
773+
events.addEventListener("run_finished", refetchAndClose);
774+
events.addEventListener("run_cancelled", refetchAndClose);
775+
events.addEventListener("run_error", refetchAndClose);
752776
return () => events.close();
753-
}, [loadRun, run, runId, token]);
777+
}, [runId, token]);
754778

755779
const cancelRun = async () => {
756780
setCancelling(true);

dashboard/src/components/CompareView.tsx

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,7 @@ export function CompareView({ token, apiBase = "" }: CompareViewProps) {
155155
const [pickerOpen, setPickerOpen] = useState(runIds.length < 2);
156156
const [availableRuns, setAvailableRuns] = useState<RunOption[]>([]);
157157
const [picker, setPicker] = useState<Set<string>>(new Set(runIds));
158+
const pickerCanApply = picker.size >= 2 && picker.size <= 10;
158159

159160
const fetchRuns = useCallback(async () => {
160161
try {
@@ -316,7 +317,9 @@ export function CompareView({ token, apiBase = "" }: CompareViewProps) {
316317
</li>
317318
))}
318319
</ul>
319-
<button type="submit">Apply</button>
320+
<button type="submit" disabled={!pickerCanApply}>
321+
Apply
322+
</button>
320323
</form>
321324
)}
322325

@@ -401,6 +404,11 @@ export function CompareView({ token, apiBase = "" }: CompareViewProps) {
401404
))}
402405
</tbody>
403406
</table>
407+
{filteredScenarios.length === 0 && (
408+
<p className="compare-hint">
409+
No aligned scenario rows match this comparison.
410+
</p>
411+
)}
404412
</>
405413
)}
406414

docs/generated/workspace-inventory.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Workspace Inventory
22

3-
Generated: 2026-04-17T15:44:17.421Z
3+
Generated: 2026-04-17T16:26:34.861Z
44

55
```text
66
AGENTS.md
@@ -426,13 +426,15 @@ tests/unit/
426426
tests/unit/persistence/
427427
tests/unit/persistence/factory.test.ts
428428
tests/unit/persistence/migrations.test.ts
429+
tests/unit/persistence/postgres-backend.test.ts
429430
tests/unit/persistence/url.test.ts
430431
tests/unit/report.test.ts
431432
tests/unit/runner.test.ts
432433
tests/unit/server/
433434
tests/unit/server/auth.test.ts
434435
tests/unit/server/comparison.test.ts
435436
tests/unit/server/config.test.ts
437+
tests/unit/server/inline-dashboard.test.ts
436438
tests/unit/server/selection.test.ts
437439
tests/unit/server/streams.test.ts
438440
tests/unit/simulator.test.ts

docs/product-specs/current-state.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ Last validated against `platform.md`: 2026-04-17
2727
- [x] API CORS allows only same-origin loopback by default
2828
- [x] Read-only HTTP and UI surfaces browse persisted run history
2929
- [x] Live run events stream through Server-Sent Events with replay support
30+
- [x] Run executor failures are logged and persisted
3031
- [x] Run control starts validated ad-hoc or preset-backed runs
3132
- [x] Cancellation cooperatively stops a server-managed run
3233
- [x] Presets save cross-file scenario selections for one-click rerun

docs/product-specs/e2e-checklist.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,11 @@ Derived from `platform.md`. Every scenario should have a coverage owner.
2424
| API CORS allows only same-origin loopback by default | `tests/integration/server/read-only.test.ts` + `tests/unit/server/config.test.ts` | ✅ covered |
2525
| Read-only HTTP and UI surfaces browse persisted run history | `tests/integration/server/read-only.test.ts` + dashboard build/typecheck | ✅ covered |
2626
| Live run events stream through Server-Sent Events with replay support | `tests/integration/server/write-control.test.ts` + `tests/unit/server/streams.test.ts` | ✅ covered |
27+
| Run executor failures are logged and persisted | `tests/integration/server/write-control.test.ts` | ✅ covered |
2728
| Run control starts validated ad-hoc or preset-backed runs | `tests/integration/server/write-control.test.ts` | ✅ covered |
2829
| Cancellation cooperatively stops a server-managed run | `tests/integration/server/write-control.test.ts` | ✅ covered |
2930
| Presets save cross-file scenario selections for one-click rerun | `tests/integration/server/write-control.test.ts` + `tests/unit/server/selection.test.ts` | ✅ covered |
30-
| Comparison workspace diffs 2 to 10 historical runs | `tests/integration/server/comparisons.test.ts` + dashboard compare-view component tests | ⏳ planned |
31+
| Comparison workspace diffs 2 to 10 historical runs | `tests/integration/server/comparisons.test.ts` + `tests/unit/dashboard/compare-view.test.tsx` | integration/unit covered; browser E2E planned |
3132
| Docker image boots safely with SQLite-on-volume persistence | `Dockerfile` + `docker-compose.yml` + `docs/playbooks/agent-probe-server.md` | ✅ covered |
3233
| Database URL credentials stay redacted in operator-visible output | `tests/unit/persistence/url.test.ts` + `tests/unit/server/config.test.ts` | ✅ covered |
3334
| Docker Compose readiness waits for server readiness | `docker-compose.yml` + `docs/playbooks/agent-probe-server.md` + `docker compose config` | ✅ covered |

docs/product-specs/platform.md

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,16 @@ events, supports reconnect via `Last-Event-ID` from an in-memory replay buffer
208208
for the most recent events, falls back to persisted run detail for events older
209209
than the buffer, and always emits a terminal event before closing the stream.
210210

211+
### Run executor failures are logged and persisted
212+
213+
**Given** a run launched through `agentprobe start-server` fails inside the
214+
server-side run executor
215+
**When** no SSE client is connected or the connected client disconnects before
216+
the failure is observed
217+
**Then** the server writes a structured `run_executor` error line to stderr,
218+
persists the failure on the run record for later `/api/runs/:runId` reads, and
219+
publishes a terminal `run_error` event for any active stream subscribers.
220+
211221
### Run control starts validated ad-hoc or preset-backed runs
212222

213223
**Given** an `agentprobe start-server` instance with a resolvable `./data` root
@@ -246,13 +256,15 @@ snapshot.
246256
### Comparison workspace diffs 2 to 10 historical runs
247257

248258
**Given** at least two persisted runs, preferably launched from the same preset
249-
**When** the operator requests `GET /api/comparisons?run_ids=a,b[,...]` or opens
250-
the `/compare` workspace
259+
**When** the operator requests
260+
`GET /api/comparisons?run_ids=<run-uuid>,<run-uuid>[,...]` or opens the
261+
`/compare` workspace
251262
**Then** the server returns a scenario-aligned payload with per-run pass/fail,
252263
score delta, `status_change`, and summary buckets for improved, regressed, and
253264
unchanged scenarios; scenarios missing from one side surface as `present_in`
254265
entries rather than failing the request; and the request rejects any count
255-
below 2 or above 10 run IDs with a structured validation error.
266+
below 2 or above 10 run IDs, malformed run UUIDs, or duplicate run IDs with a
267+
structured validation error.
256268

257269
### Docker image boots safely with SQLite-on-volume persistence
258270

package.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@
3131
"@types/node": "^25.6.0",
3232
"@types/nunjucks": "^3.2.6",
3333
"bun-types": "^1.3.12",
34+
"happy-dom": "^20.9.0",
35+
"react": "^19.2.5",
36+
"react-dom": "^19.2.5",
3437
"typescript": "^6.0.2"
3538
}
3639
}

src/runtime/server/controllers/comparison-controller.ts

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ import { HttpInputError } from "../validation.ts";
88

99
export const MIN_COMPARISON_RUNS = 2;
1010
export const MAX_COMPARISON_RUNS = 10;
11+
const RUN_ID_PATTERN =
12+
/^(?:[0-9a-f]{32}|[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})$/i;
1113

1214
export type ComparisonAlignment =
1315
| "preset_snapshot"
@@ -411,30 +413,41 @@ export function createComparisonController(options: {
411413
const trimmed = runIds
412414
.map((id) => (typeof id === "string" ? id.trim() : ""))
413415
.filter((id) => id.length > 0);
414-
const deduped: string[] = [];
415416
const seen = new Set<string>();
416417
for (const id of trimmed) {
417-
if (!seen.has(id)) {
418-
seen.add(id);
419-
deduped.push(id);
418+
if (!RUN_ID_PATTERN.test(id)) {
419+
throw new HttpInputError(
420+
400,
421+
"bad_request",
422+
"run_ids must contain persisted run UUIDs.",
423+
);
420424
}
425+
const normalized = id.toLowerCase();
426+
if (seen.has(normalized)) {
427+
throw new HttpInputError(
428+
400,
429+
"bad_request",
430+
"run_ids must not contain duplicates.",
431+
);
432+
}
433+
seen.add(normalized);
421434
}
422-
if (deduped.length < MIN_COMPARISON_RUNS) {
435+
if (trimmed.length < MIN_COMPARISON_RUNS) {
423436
throw new HttpInputError(
424437
400,
425438
"bad_request",
426-
`At least ${MIN_COMPARISON_RUNS} unique run_ids are required for comparison.`,
439+
`At least ${MIN_COMPARISON_RUNS} run_ids are required for comparison.`,
427440
);
428441
}
429-
if (deduped.length > MAX_COMPARISON_RUNS) {
442+
if (trimmed.length > MAX_COMPARISON_RUNS) {
430443
throw new HttpInputError(
431444
400,
432445
"bad_request",
433446
`At most ${MAX_COMPARISON_RUNS} run_ids may be compared in a single request.`,
434447
);
435448
}
436449
const runs = await Promise.all(
437-
deduped.map(async (runId) => {
450+
trimmed.map(async (runId) => {
438451
const record = await repository.getRun(runId);
439452
if (!record) {
440453
throw new HttpInputError(

0 commit comments

Comments
 (0)