Skip to content

Commit 347b0bc

Browse files
committed
fix(review): address #175 audit findings
MEDIUM: - agentic_decision.js: do NOT retry our own abort-timeout — only fast-failing connection errors. A timed-out request already consumed a full window; retrying it up to 4x could span most of the ~5min tick. Gate on controller.signal.aborted. +1 test. - persona-agent-eval.ts: drop the redundant post-matrix integrity block — runProfileMatrix already enforces the posture (assert/warn/off) and returns result.integrity, so the duplicate double-warned in 'warn' and was dead in 'assert'. Thread allowMixed:true explicitly; remove now-unused assertRealBackend. LOW cleanup: - Remove dead canCommand prop from PerformanceTab (interface + route caller). - Delete orphaned PerformanceCopilotPanel.tsx (last import went with the fills rail). - Remove stale useOperatorAuth mock from PerformanceTab.test.tsx. - operator-matrix-smoke: guard --max-turns NaN + require auth before driving. Verified: agentic_decision 21 tests, PerformanceTab 31, evals+arena tsc clean.
1 parent 6ded894 commit 347b0bc

8 files changed

Lines changed: 48 additions & 215 deletions

File tree

arena/src/components/bot-detail/PerformanceCopilotPanel.tsx

Lines changed: 0 additions & 172 deletions
This file was deleted.

arena/src/components/bot-detail/PerformanceTab.tsx

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -645,7 +645,6 @@ function BenchmarkStrip({ summary }: { summary: BotPerformanceSummary }) {
645645
interface PerformanceTabProps {
646646
bot: Bot;
647647
isLive: boolean;
648-
canCommand?: boolean;
649648
}
650649

651650
export function PerformanceTab({ bot, isLive }: PerformanceTabProps) {

arena/src/components/bot-detail/__tests__/PerformanceTab.test.tsx

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -119,10 +119,6 @@ const lightweightChartMock = vi.hoisted(() => {
119119
volumeSeries,
120120
};
121121
});
122-
const operatorAuthMock = vi.hoisted(() => ({
123-
isAuthenticated: false,
124-
token: null as string | null,
125-
}));
126122
const useBotMarketCandlesMock = vi.hoisted(() => vi.fn(() => ({
127123
data: [] as typeof mockMarketCandles,
128124
})));
@@ -269,19 +265,6 @@ function makeTrade(overrides: Partial<Trade>): Trade {
269265
};
270266
}
271267

272-
vi.mock('~/lib/hooks/useOperatorAuth', () => ({
273-
useOperatorAuth: () => ({
274-
token: operatorAuthMock.token,
275-
isAuthenticated: operatorAuthMock.isAuthenticated,
276-
isAuthenticating: false,
277-
authenticate: vi.fn(),
278-
clearCachedToken: vi.fn(),
279-
error: null,
280-
getCachedToken: vi.fn(() => operatorAuthMock.token),
281-
getToken: vi.fn(async () => operatorAuthMock.token),
282-
}),
283-
}));
284-
285268
vi.mock('~/lib/hooks/useChartTheme', () => ({
286269
useChartTheme: () => ({
287270
positive: '#0f0',
@@ -354,8 +337,6 @@ describe('PerformanceTab', () => {
354337
data: mockMarketCandles,
355338
}));
356339
useBotMarketCandlesMock.mockClear();
357-
operatorAuthMock.isAuthenticated = false;
358-
operatorAuthMock.token = null;
359340
lightweightChartMock.areaSeries.createPriceLine.mockClear();
360341
lightweightChartMock.areaSeries.applyOptions.mockClear();
361342
lightweightChartMock.areaSeries.removePriceLine.mockClear();

arena/src/routes/arena.bot.$id.tsx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -463,7 +463,7 @@ export default function BotDetailPage() {
463463
const renderWorkspace = () => {
464464
switch (activeSection) {
465465
case 'performance':
466-
return <PerformanceTab bot={bot} isLive={botIsLive} canCommand={canCommandBot} />;
466+
return <PerformanceTab bot={bot} isLive={botIsLive} />;
467467
case 'portfolio':
468468
return (
469469
<PortfolioWorkspace

evals/src/bin/operator-matrix-smoke.ts

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,18 @@ if (!operatorUrl) {
2323
}
2424
const privateKey = arg('--private-key') ?? process.env.OPERATOR_PRIVATE_KEY
2525
const token = arg('--token') ?? process.env.OPERATOR_API_TOKEN
26+
if (!privateKey && !token) {
27+
console.error(
28+
'operator-matrix-smoke: --private-key or --token (or OPERATOR_PRIVATE_KEY/OPERATOR_API_TOKEN) required to drive the operator',
29+
)
30+
process.exit(2)
31+
}
32+
33+
const maxTurns = Number(arg('--max-turns') ?? 4)
34+
if (!Number.isFinite(maxTurns) || maxTurns < 1) {
35+
console.error('operator-matrix-smoke: --max-turns must be a positive number')
36+
process.exit(2)
37+
}
2638

2739
// Exactly one cell: one model x one persona x one market, one rep.
2840
const summary = await runTradingPersonaEval({
@@ -34,7 +46,7 @@ const summary = await runTradingPersonaEval({
3446
markets: [defaultScenarios()[0]!],
3547
intents: [STANDARD_USER_INTENTS[0]!],
3648
reps: 1,
37-
maxTurnsPerShot: Number(arg('--max-turns') ?? 4),
49+
maxTurnsPerShot: maxTurns,
3850
maxConcurrency: 1,
3951
// A single live cell: warn (don't hard-throw) so the smoke reports the result
4052
// even if the inner operator LLM spend isn't visible to the integrity guard.

evals/src/trading/persona-agent-eval.ts

Lines changed: 6 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@ import {
3636
FileSystemTraceStore,
3737
TraceEmitter,
3838
agentProfileHash,
39-
assertRealBackend,
4039
recordRunsToScorecard,
4140
summarizeBackendIntegrity,
4241
validateRunRecord,
@@ -486,6 +485,7 @@ async function runOperatorMatrix(
486485
maxConcurrency: options.maxConcurrency ?? 1,
487486
...(options.costCeiling !== undefined ? { costCeiling: options.costCeiling } : {}),
488487
integrity: options.integrity ?? 'assert',
488+
allowMixed: true,
489489
personaOf: (s) => s.persona.id,
490490
dispatch: async (profile, scenario, ctx) => {
491491
const groundTruth = groundTruthByMarket.get(scenario.market.id)
@@ -561,21 +561,11 @@ async function runOperatorMatrix(
561561
byPersona[persona] = { meanScore: rollup.meanComposite, n: rollup.n }
562562
}
563563

564-
// 'assert' (default) hard-fails on a stub/unconfigured backend; 'warn' logs the
565-
// same diagnosis but returns the report so the caller still gets a result;
566-
// 'off' is silent. Only 'assert' may throw.
567-
const integrityMode = options.integrity ?? 'assert'
568-
let integrity: BackendIntegrityReport
569-
if (integrityMode === 'assert') {
570-
integrity = assertRealBackend(result.records, { allowMixed: true })
571-
} else {
572-
integrity = summarizeBackendIntegrity(result.records)
573-
if (integrityMode === 'warn' && integrity.verdict !== 'real') {
574-
console.warn(
575-
`[operator-matrix] backend integrity: ${integrity.verdict}${integrity.diagnosis}`,
576-
)
577-
}
578-
}
564+
// runProfileMatrix already enforces the integrity posture passed above
565+
// ('assert' throws on a stub backend, 'warn' logs, 'off' skips) and returns the
566+
// whole-matrix report. Reuse it rather than re-running the guard, which would
567+
// double-warn in 'warn' mode and be dead code in 'assert' mode.
568+
const integrity = result.integrity
579569

580570
const best =
581571
byProfile.length === 0

trading-blueprint-lib/src/prompts/tools/agentic_decision.js

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -150,10 +150,7 @@ async function postChatCompletion(opts) {
150150
signal: controller.signal,
151151
});
152152
} catch {
153-
// Connection reset / DNS / TLS / aborted-timeout reaching the provider.
154-
// This was the dominant live failure ("error sending request for url") for
155-
// bots that call the model every tick — and it is transient, so it MUST be
156-
// retried like a 429, not fail-closed to a missed decision.
153+
// Connection reset / DNS / TLS error, OR our own abort-timeout firing.
157154
threw = true;
158155
}
159156
clearTimeout(timer);
@@ -164,8 +161,14 @@ async function postChatCompletion(opts) {
164161
return null;
165162
}
166163
}
164+
// A fast-failing connection error ("error sending request for url" — the
165+
// dominant live failure for bots that call the model every tick) is transient
166+
// and MUST be retried like a 429. But our OWN abort-timeout already consumed a
167+
// full timeoutMs window, so retrying it `retries` times could span most of the
168+
// ~5min tick interval — treat a timeout as non-retryable and fail closed.
169+
const timedOut = threw && controller.signal.aborted;
167170
const status = threw ? 0 : (response && Number(response.status));
168-
const retryable = threw || status === 429 || (status >= 500 && status < 600);
171+
const retryable = (threw && !timedOut) || status === 429 || (status >= 500 && status < 600);
169172
if (attempt < retries && retryable) {
170173
// Escalating backoff spanning a sustained-throttle window (the shared key
171174
// can stay 429 across a full per-minute bucket under fleet concurrency),

trading-blueprint-lib/src/prompts/tools/agentic_decision.test.cjs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,26 @@ test('retries a transient network throw then succeeds', async () => {
110110
assert.equal(d.action, 'buy');
111111
});
112112

113+
test('does NOT retry our own abort-timeout (would blow the tick budget)', async () => {
114+
let calls = 0;
115+
// A request that hangs until the AbortController fires, then rejects — exactly
116+
// how a slow endpoint manifests. Distinct from a fast connection error: a
117+
// timeout already consumed a full window, so retrying it would span the tick.
118+
const hangUntilAbort = (_url, opts) =>
119+
new Promise((_resolve, reject) => {
120+
calls += 1;
121+
opts.signal.addEventListener('abort', () => reject(new Error('aborted')), { once: true });
122+
});
123+
const d = await agenticDecision(SPEC, {
124+
env: ENV,
125+
timeoutMs: 20,
126+
backoffMs: [1, 1, 1, 1],
127+
fetch: hangUntilAbort,
128+
});
129+
assert.equal(d, null);
130+
assert.equal(calls, 1, 'a timeout is not retried');
131+
});
132+
113133
test('disabled when TRADING_AGENTIC_DECISIONS=0', async () => {
114134
assert.equal(agenticDecisionsEnabled({ ZAI_API_KEY: 'k', TRADING_AGENTIC_DECISIONS: '0' }), false);
115135
const d = await agenticDecision(SPEC, {

0 commit comments

Comments
 (0)