test(llm-tracing): add integration test for llm-dominates-request rule

sharon77242 · sharon77242 · commit 2a6cd63efafb · 2026-05-18T11:04:46.000+03:00
Replace weak no-op anomaly test with a real end-to-end assertion:
emits 'request' on agent with 1ms HTTP duration, runs OpenAI mock
(10ms delay) inside runWithContext with matching traceId, then asserts
'anomaly' event with type 'llm-dominates-request' fires on the agent.
diff --git a/packages/agent/tests/integration/llm-tracing.test.ts b/packages/agent/tests/integration/llm-tracing.test.ts
@@ -2,6 +2,7 @@ import { describe, it, beforeEach, afterEach } from "node:test";
 import assert from "node:assert/strict";
 import { ArgusAgent } from "../../src/argus-agent.ts";
 import { requireRef } from "../../src/instrumentation/drivers/_require.ts";
+import { runWithContext } from "../../src/instrumentation/correlation.ts";
 import type { LLMEvent } from "../../src/instrumentation/llm/types.ts";
 
 function makeMockOpenAI() {
@@ -127,23 +128,56 @@ describe("ArgusAgent LLM tracing integration", () => {
     assert.strictEqual(events.length, 1, "no new events after stop");
   });
 
-  it("n-llm-calls anomaly fires as 'anomaly' event on agent", async () => {
+  it("llm-dominates-request fires as 'anomaly' when LLM exceeds 80% of HTTP duration", async () => {
+    // Slow mock: 10ms delay → LLM durationMs ≈ 10ms, easily > 80% of the 1ms HTTP duration
+    const slowProto = {
+      create: async (_params: Record<string, unknown>) => {
+        await new Promise<void>((r) => setTimeout(r, 10));
+        return {
+          model: "gpt-4o",
+          choices: [{ message: { content: "ok" } }],
+          usage: { prompt_tokens: 5, completion_tokens: 3 },
+        };
+      },
+    };
+    const SlowMock = { prototype: { chat: { completions: slowProto } } };
+    requireRef.current = Object.assign(
+      (id: string) => (id === "openai" ? { default: SlowMock } : originalRequire(id)),
+      originalRequire,
+    ) as typeof originalRequire;
+
     agent = ArgusAgent.create().withLLMTracing({ providers: ["openai"] });
     await agent.start();
 
-    const anomalies: unknown[] = [];
-    agent.on("anomaly", (a) => anomalies.push(a));
-
-    const mock = requireRef.current("openai") as { default: ReturnType<typeof makeMockOpenAI> };
-    // Three calls with same traceId (context not set, so traceId is undefined — skip traceId rule)
-    // Use costUsd spike instead: need 5 baseline calls first
-    for (let i = 0; i < 5; i++) {
-      await mock.default.prototype.chat.completions.create({
-        model: "gpt-4o",
-        messages: [{ role: "user", content: "hello" }],
-      });
-    }
-    // No anomaly yet from n-llm-calls since no traceId — but no crash either
-    assert.ok(agent.isRunning, "agent still running after multiple LLM calls");
+    const anomalies: Record<string, unknown>[] = [];
+    agent.on("anomaly", (a) => anomalies.push(a as Record<string, unknown>));
+
+    // W3C 128-bit traceId (32 lowercase hex chars)
+    const traceId = "0af7651916cd43dd8448eb211c80319c";
+
+    // Record a 1ms HTTP request — LLM will take ~10ms, far exceeding the 80% threshold
+    agent.emit("request", { traceId, durationMs: 1 });
+
+    // Run the LLM call inside an async context with the matching traceId
+    await runWithContext(
+      {
+        requestId: "req-dom-1",
+        traceId,
+        spanId: "ab12cd34ef56ab12",
+        method: "POST",
+        url: "/api/chat",
+        startedAt: Date.now(),
+      },
+      () =>
+        SlowMock.prototype.chat.completions.create({
+          model: "gpt-4o",
+          messages: [{ role: "user", content: "summarize" }],
+        }),
+    );
+
+    assert.ok(
+      anomalies.some((a) => a.type === "llm-dominates-request"),
+      "should emit llm-dominates-request anomaly when LLM dominates HTTP request time",
+    );
   });
 });