Skip to content

Commit a9996c7

Browse files
authored
Fix vacuous drift test assertions and normalize proxy relay status codes (#169)
## Summary - Remove vacuous `shouldFail` guard from all 21 drift test files (89 instances) — assertions now always run unconditionally, catching broken extraction logic and warning-level drift that previously passed silently with zero assertions - Normalize upstream HTTP status codes in 5 recorder proxy relay paths — 200 for success, 502 for errors — preventing provider implementation details (429, 503, 401, etc.) from leaking to aimock clients - Add 5 new status normalization tests with fixture preservation verification; fix test cleanup (awaited server.close in try/finally) ## Test plan - [x] All 2841 tests pass (79 files, 37 skipped) - [x] TypeScript typecheck clean - [x] Prettier + ESLint clean (pre-commit hooks pass) - [x] 7-agent CR loop converged (R1: 2 bucket-a fixed, R2: 0 bucket-a, Procedure 3: 0 promotions)
2 parents 37d3335 + 1f7cdd4 commit a9996c7

26 files changed

Lines changed: 593 additions & 328 deletions

CHANGELOG.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,16 @@
11
# @copilotkit/aimock
22

3+
## [Unreleased]
4+
5+
### Fixed
6+
7+
- **Drift tests passed vacuously with zero assertions** — the `shouldFail` guard silently skipped all `expect` calls when no critical diffs were found, so broken extraction logic or warning-level drift went completely undetected. Replaced every guarded assertion across all 21 drift test files (89 instances) with unconditional `expect(diffs.filter(...)).toEqual([])`
8+
- **Proxy relay leaked raw upstream HTTP status codes** — 5 recorder relay paths in `recorder.ts` and `agui-recorder.ts` forwarded raw upstream codes (429, 503, 401, 201, etc.) to aimock clients, exposing provider implementation details. Normalized to 200 for success and 502 for errors; fixture recording preserves the original status for fidelity
9+
10+
### Added
11+
12+
- **Status code normalization tests** — 5 tests verifying proxy relay normalization (201→200, 429→502, 503→502, 401→502, SSE 429→502) with fixture preservation assertions; 2 existing tests updated to expect normalized 502
13+
314
## [1.19.5] - 2026-05-09
415

516
### Fixed

src/__tests__/drift/anthropic.drift.ts

Lines changed: 25 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,7 @@
77
import http from "node:http";
88
import { describe, it, expect, beforeAll, afterAll } from "vitest";
99
import type { ServerInstance } from "../../server.js";
10-
import {
11-
extractShape,
12-
triangulate,
13-
compareSSESequences,
14-
formatDriftReport,
15-
shouldFail,
16-
} from "./schema.js";
10+
import { extractShape, triangulate, compareSSESequences, formatDriftReport } from "./schema.js";
1711
import {
1812
anthropicMessageShape,
1913
anthropicMessageToolCallShape,
@@ -66,9 +60,10 @@ describe.skipIf(!ANTHROPIC_API_KEY)("Anthropic Claude Messages drift", () => {
6660
const diffs = triangulate(sdkShape, realShape, mockShape);
6761
const report = formatDriftReport("Anthropic Claude (non-streaming text)", diffs);
6862

69-
if (shouldFail(diffs)) {
70-
expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical"));
71-
}
63+
expect(
64+
diffs.filter((d) => d.severity === "critical"),
65+
report,
66+
).toEqual([]);
7267
});
7368

7469
it("streaming text event sequence and shapes match", async () => {
@@ -97,9 +92,10 @@ describe.skipIf(!ANTHROPIC_API_KEY)("Anthropic Claude Messages drift", () => {
9792
const diffs = compareSSESequences(sdkEvents, realStream.events, mockSSEShapes);
9893
const report = formatDriftReport("Anthropic Claude (streaming text events)", diffs);
9994

100-
if (shouldFail(diffs)) {
101-
expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical"));
102-
}
95+
expect(
96+
diffs.filter((d) => d.severity === "critical"),
97+
report,
98+
).toEqual([]);
10399
});
104100

105101
it("non-streaming tool call shape matches", async () => {
@@ -134,9 +130,10 @@ describe.skipIf(!ANTHROPIC_API_KEY)("Anthropic Claude Messages drift", () => {
134130
const diffs = triangulate(sdkShape, realShape, mockShape);
135131
const report = formatDriftReport("Anthropic Claude (non-streaming tool call)", diffs);
136132

137-
if (shouldFail(diffs)) {
138-
expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical"));
139-
}
133+
expect(
134+
diffs.filter((d) => d.severity === "critical"),
135+
report,
136+
).toEqual([]);
140137
});
141138

142139
it("streaming tool call event sequence matches", async () => {
@@ -184,9 +181,10 @@ describe.skipIf(!ANTHROPIC_API_KEY)("Anthropic Claude Messages drift", () => {
184181
const diffs = compareSSESequences(sdkEvents, realStream.events, mockSSEShapes);
185182
const report = formatDriftReport("Anthropic Claude (streaming tool call events)", diffs);
186183

187-
if (shouldFail(diffs)) {
188-
expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical"));
189-
}
184+
expect(
185+
diffs.filter((d) => d.severity === "critical"),
186+
report,
187+
).toEqual([]);
190188
});
191189
});
192190

@@ -222,9 +220,10 @@ describe("Anthropic Claude extended thinking shapes", () => {
222220
const diffs = triangulate(sdkShape, sdkShape, mockShape);
223221
const report = formatDriftReport("Anthropic Claude (non-streaming thinking)", diffs);
224222

225-
if (shouldFail(diffs)) {
226-
expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical"));
227-
}
223+
expect(
224+
diffs.filter((d) => d.severity === "critical"),
225+
report,
226+
).toEqual([]);
228227
});
229228

230229
it("streaming thinking event sequence and shapes match", async () => {
@@ -279,9 +278,10 @@ describe("Anthropic Claude extended thinking shapes", () => {
279278
const diffs = compareSSESequences(sdkEvents, sdkEvents, mockSSEShapes);
280279
const report = formatDriftReport("Anthropic Claude (streaming thinking events)", diffs);
281280

282-
if (shouldFail(diffs)) {
283-
expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical"));
284-
}
281+
expect(
282+
diffs.filter((d) => d.severity === "critical"),
283+
report,
284+
).toEqual([]);
285285
});
286286

287287
it("thinking block index precedes text block index", async () => {

src/__tests__/drift/bedrock-stream.drift.ts

Lines changed: 25 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ import http from "node:http";
99
import { describe, it, expect, beforeAll, afterAll } from "vitest";
1010
import { createServer, type ServerInstance } from "../../server.js";
1111
import type { Fixture } from "../../types.js";
12-
import { extractShape, triangulate, formatDriftReport, shouldFail } from "./schema.js";
12+
import { extractShape, triangulate, formatDriftReport } from "./schema.js";
1313
import { httpPost, startDriftServer, stopDriftServer } from "./helpers.js";
1414
import {
1515
bedrockConverseStreamEventShapes,
@@ -203,9 +203,10 @@ describe.skipIf(!HAS_CREDENTIALS)("Bedrock drift", () => {
203203
const diffs = triangulate(sdkShape, sdkShape, mockShape);
204204
const report = formatDriftReport("Bedrock Invoke", diffs);
205205

206-
if (shouldFail(diffs)) {
207-
expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical"));
208-
}
206+
expect(
207+
diffs.filter((d) => d.severity === "critical"),
208+
report,
209+
).toEqual([]);
209210
}
210211
});
211212

@@ -262,9 +263,10 @@ describe.skipIf(!HAS_CREDENTIALS)("Bedrock drift", () => {
262263
const diffs = triangulate(sdkEvent.dataShape, sdkEvent.dataShape, mockEvent.dataShape);
263264
const report = formatDriftReport(`Bedrock InvokeStream:${sdkEvent.type}`, diffs);
264265

265-
if (shouldFail(diffs)) {
266-
expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical"));
267-
}
266+
expect(
267+
diffs.filter((d) => d.severity === "critical"),
268+
report,
269+
).toEqual([]);
268270
}
269271
});
270272

@@ -291,9 +293,10 @@ describe.skipIf(!HAS_CREDENTIALS)("Bedrock drift", () => {
291293
const diffs = triangulate(sdkShape, sdkShape, mockShape);
292294
const report = formatDriftReport("Bedrock Converse", diffs);
293295

294-
if (shouldFail(diffs)) {
295-
expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical"));
296-
}
296+
expect(
297+
diffs.filter((d) => d.severity === "critical"),
298+
report,
299+
).toEqual([]);
297300
}
298301
});
299302

@@ -396,9 +399,10 @@ describe.skipIf(!HAS_CREDENTIALS)("Bedrock drift", () => {
396399
const diffs = triangulate(sdkEvent.dataShape, sdkEvent.dataShape, mockEvent.dataShape);
397400
const report = formatDriftReport(`Bedrock ConverseStream:${sdkEvent.type}`, diffs);
398401

399-
if (shouldFail(diffs)) {
400-
expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical"));
401-
}
402+
expect(
403+
diffs.filter((d) => d.severity === "critical"),
404+
report,
405+
).toEqual([]);
402406
}
403407
});
404408

@@ -480,9 +484,10 @@ describe.skipIf(!HAS_CREDENTIALS)("Bedrock drift", () => {
480484
const diffs = triangulate(sdkEvent.dataShape, sdkEvent.dataShape, mockEvent.dataShape);
481485
const report = formatDriftReport(`Bedrock ConverseStream Tool:${sdkEvent.type}`, diffs);
482486

483-
if (shouldFail(diffs)) {
484-
expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical"));
485-
}
487+
expect(
488+
diffs.filter((d) => d.severity === "critical"),
489+
report,
490+
).toEqual([]);
486491
}
487492
});
488493

@@ -594,9 +599,10 @@ describe.skipIf(!HAS_CREDENTIALS)("Bedrock drift", () => {
594599
diffs,
595600
);
596601

597-
if (shouldFail(diffs)) {
598-
expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical"));
599-
}
602+
expect(
603+
diffs.filter((d) => d.severity === "critical"),
604+
report,
605+
).toEqual([]);
600606
}
601607
} finally {
602608
await new Promise<void>((r) => reasoningInstance.server.close(() => r()));

src/__tests__/drift/cohere.drift.ts

Lines changed: 29 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
import { describe, it, expect, beforeAll, afterAll } from "vitest";
1111
import type { ServerInstance } from "../../server.js";
12-
import { extractShape, triangulate, formatDriftReport, shouldFail } from "./schema.js";
12+
import { extractShape, triangulate, formatDriftReport } from "./schema.js";
1313
import {
1414
httpPost,
1515
httpPostRaw,
@@ -158,9 +158,10 @@ describe("Cohere error shapes", () => {
158158
const diffs = triangulate(sdkShape, sdkShape, mockShape);
159159
const report = formatDriftReport("Cohere /v2/chat malformed JSON error", diffs);
160160

161-
if (shouldFail(diffs)) {
162-
expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical"));
163-
}
161+
expect(
162+
diffs.filter((d) => d.severity === "critical"),
163+
report,
164+
).toEqual([]);
164165
});
165166

166167
it("missing model field returns 400 with error envelope", async () => {
@@ -177,9 +178,10 @@ describe("Cohere error shapes", () => {
177178
const diffs = triangulate(sdkShape, sdkShape, mockShape);
178179
const report = formatDriftReport("Cohere /v2/chat missing model error", diffs);
179180

180-
if (shouldFail(diffs)) {
181-
expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical"));
182-
}
181+
expect(
182+
diffs.filter((d) => d.severity === "critical"),
183+
report,
184+
).toEqual([]);
183185
});
184186

185187
it("missing messages array returns 400 with error envelope", async () => {
@@ -196,9 +198,10 @@ describe("Cohere error shapes", () => {
196198
const diffs = triangulate(sdkShape, sdkShape, mockShape);
197199
const report = formatDriftReport("Cohere /v2/chat missing messages error", diffs);
198200

199-
if (shouldFail(diffs)) {
200-
expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical"));
201-
}
201+
expect(
202+
diffs.filter((d) => d.severity === "critical"),
203+
report,
204+
).toEqual([]);
202205
});
203206

204207
it("no fixture match returns 404 with error envelope", async () => {
@@ -216,9 +219,10 @@ describe("Cohere error shapes", () => {
216219
const diffs = triangulate(sdkShape, sdkShape, mockShape);
217220
const report = formatDriftReport("Cohere /v2/chat no fixture match error", diffs);
218221

219-
if (shouldFail(diffs)) {
220-
expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical"));
221-
}
222+
expect(
223+
diffs.filter((d) => d.severity === "critical"),
224+
report,
225+
).toEqual([]);
222226
});
223227
});
224228

@@ -246,9 +250,10 @@ describe.skipIf(!HAS_CREDENTIALS)("Cohere drift", () => {
246250
const diffs = triangulate(sdkShape, realShape, mockShape);
247251
const report = formatDriftReport("Cohere /v2/chat (non-streaming)", diffs);
248252

249-
if (shouldFail(diffs)) {
250-
expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical"));
251-
}
253+
expect(
254+
diffs.filter((d) => d.severity === "critical"),
255+
report,
256+
).toEqual([]);
252257
}
253258
});
254259

@@ -281,9 +286,10 @@ describe.skipIf(!HAS_CREDENTIALS)("Cohere drift", () => {
281286
const diffs = triangulate(sdkChunkShape, realChunkShape, mockChunkShape);
282287
const report = formatDriftReport("Cohere /v2/chat (streaming first chunk)", diffs);
283288

284-
if (shouldFail(diffs)) {
285-
expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical"));
286-
}
289+
expect(
290+
diffs.filter((d) => d.severity === "critical"),
291+
report,
292+
).toEqual([]);
287293

288294
// Also compare the LAST chunk shape (has finish_reason, usage)
289295
const sdkLastChunkShape = extractShape({
@@ -304,9 +310,10 @@ describe.skipIf(!HAS_CREDENTIALS)("Cohere drift", () => {
304310
const lastDiffs = triangulate(sdkLastChunkShape, realLastShape, mockLastShape);
305311
const lastReport = formatDriftReport("Cohere /v2/chat (streaming last chunk)", lastDiffs);
306312

307-
if (shouldFail(lastDiffs)) {
308-
expect.soft([], lastReport).toEqual(lastDiffs.filter((d) => d.severity === "critical"));
309-
}
313+
expect(
314+
lastDiffs.filter((d) => d.severity === "critical"),
315+
lastReport,
316+
).toEqual([]);
310317
}
311318
}
312319
});

src/__tests__/drift/elevenlabs.drift.ts

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ import http from "node:http";
1818
import { describe, it, expect, beforeAll, afterAll } from "vitest";
1919
import { createServer, type ServerInstance } from "../../server.js";
2020
import type { Fixture } from "../../types.js";
21-
import { extractShape, triangulate, formatDriftReport, shouldFail } from "./schema.js";
21+
import { extractShape, triangulate, formatDriftReport } from "./schema.js";
2222

2323
// ---------------------------------------------------------------------------
2424
// Credentials check
@@ -176,9 +176,10 @@ describe("ElevenLabs drift — sound generation", () => {
176176
const diffs = triangulate(sdkShape, sdkShape, mockShape);
177177
const report = formatDriftReport("ElevenLabs /v1/sound-generation 400 error", diffs);
178178

179-
if (shouldFail(diffs)) {
180-
expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical"));
181-
}
179+
expect(
180+
diffs.filter((d) => d.severity === "critical"),
181+
report,
182+
).toEqual([]);
182183
});
183184

184185
it.skipIf(!HAS_CREDENTIALS)(
@@ -242,9 +243,10 @@ describe("ElevenLabs drift — music endpoints", () => {
242243
const diffs = triangulate(sdkShape, sdkShape, mockShape);
243244
const report = formatDriftReport("ElevenLabs /v1/music/plan", diffs);
244245

245-
if (shouldFail(diffs)) {
246-
expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical"));
247-
}
246+
expect(
247+
diffs.filter((d) => d.severity === "critical"),
248+
report,
249+
).toEqual([]);
248250
});
249251

250252
it("/v1/music missing prompt returns 400 with error shape", async () => {
@@ -265,9 +267,10 @@ describe("ElevenLabs drift — music endpoints", () => {
265267
const diffs = triangulate(expectedShape, expectedShape, mockShape);
266268
const report = formatDriftReport("ElevenLabs /v1/music 400 error", diffs);
267269

268-
if (shouldFail(diffs)) {
269-
expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical"));
270-
}
270+
expect(
271+
diffs.filter((d) => d.severity === "critical"),
272+
report,
273+
).toEqual([]);
271274
});
272275

273276
it("/v1/music song-id header absent on plan endpoint", async () => {

0 commit comments

Comments
 (0)