fix(llm-tracing): resolve all post-review gaps

sharon77242 · sharon77242 · commit 09ca4a7263e8 · 2026-05-18T11:04:45.000+03:00
- Add llm-dominates-request rule to LLMAnalyzer; wire HTTP duration
  listener in ArgusAgent.wireLLMTracing() via crossSignalListeners
- Add GenAI semantic conventions to OTLPExporter.formatToOTLP() with
  safe string/number/bool extractors (fixes no-base-to-string)
- Fix dot-notation lint: mod.OpenAI / mod.Anthropic (not bracket form)
- Fix no-unnecessary-type-assertion: type DEFAULT_COSTS as
  Partial&lt;Record&lt;...&gt;&gt; so undefined is natural, no cast needed
- Expand tests: llm-analyzer (llm-dominates-request), openai/anthropic
  (mod.X fallback, unknown-model zero-cost, safe unpatch),
  base-llm-driver (mergeDetections, empty prompt), console-logger
  (LLM PII + injection events)
- Demo app: add "llm" to appType and POST /quotes/summarize route
  with synthetic LLM event emission (no real API key required)
diff --git a/packages/agent/src/analysis/llm-analyzer.ts b/packages/agent/src/analysis/llm-analyzer.ts
@@ -10,6 +10,14 @@ export class LLMAnalyzer {
   // context-window-growth: keyed by endpoint → last 5 promptToken counts
   private tokenHistory = new Map<string, number[]>();
 
+  // llm-dominates-request: keyed by traceId → most recent HTTP request durationMs
+  private httpDurations = new Map<string, number>();
+
+  /** Called by ArgusAgent when an outbound HTTP request completes on a traced request. */
+  recordHTTPDuration(traceId: string, durationMs: number): void {
+    this.httpDurations.set(traceId, durationMs);
+  }
+
   analyze(event: LLMEvent): LLMSuggestion[] {
     const suggestions: LLMSuggestion[] = [];
     const now = Date.now();
@@ -30,6 +38,22 @@ export class LLMAnalyzer {
       }
     }
 
+    // Rule: llm-dominates-request — LLM > 80% of HTTP request duration on same traceId
+    if (event.traceId) {
+      const httpMs = this.httpDurations.get(event.traceId);
+      if (httpMs !== undefined && httpMs > 0) {
+        const pct = Math.round((event.durationMs / httpMs) * 100);
+        if (pct > 80) {
+          suggestions.push({
+            rule: "llm-dominates-request",
+            suggestedFix:
+              `LLM accounts for ${pct}% of ${event.endpoint} latency. ` +
+              `Consider streaming the response or caching for identical prompts.`,
+          });
+        }
+      }
+    }
+
     // Rule: llm-cost-spike — single call cost > 10x rolling average (last 100 calls)
     const modelHistory = this.costHistory.get(event.model) ?? [];
     if (modelHistory.length >= 5 && event.costUsd > 0) {
@@ -76,5 +100,6 @@ export class LLMAnalyzer {
     this.callsByTrace.clear();
     this.costHistory.clear();
     this.tokenHistory.clear();
+    this.httpDurations.clear();
   }
 }
diff --git a/packages/agent/src/argus-agent.ts b/packages/agent/src/argus-agent.ts
@@ -1066,6 +1066,17 @@ export class ArgusAgent extends EventEmitter {
 
     if (providers.includes("openai")) patchOpenAI(onEvent, costMap);
     if (providers.includes("anthropic")) patchAnthropic(onEvent, costMap);
+
+    // Feed HTTP request durations so llm-dominates-request rule can correlate
+    const httpListener = (req: Record<string, unknown>): void => {
+      const traceId = req.traceId as string | undefined;
+      const durationMs = req.durationMs as number | undefined;
+      if (traceId && typeof durationMs === "number") {
+        analyzer.recordHTTPDuration(traceId, durationMs);
+      }
+    };
+    this.on("request", httpListener);
+    this.crossSignalListeners.push(["request", httpListener] as DebugListener);
   }
 
   private wireCrossSignalRules(): void {
diff --git a/packages/agent/src/export/exporter.ts b/packages/agent/src/export/exporter.ts
@@ -127,6 +127,50 @@ export class OTLPExporter {
         typeof event.payload.timestamp === "number" ? event.payload.timestamp : Date.now();
       const durationMs = event.metricName === "memory-leak" ? 0 : event.value;
 
+      const baseAttributes = [
+        { key: "diagnostic.value", value: { doubleValue: event.value } },
+        { key: "diagnostic.payload", value: { stringValue: JSON.stringify(event.payload) } },
+      ];
+
+      const toStr = (v: unknown, fallback = ""): string => (typeof v === "string" ? v : fallback);
+      const toNum = (v: unknown): number => (typeof v === "number" ? v : 0);
+      const toBool = (v: unknown): boolean => v === true;
+
+      // GenAI semantic conventions for LLM spans
+      const llmAttributes =
+        event.metricName === "llm"
+          ? [
+              {
+                key: "gen_ai.system",
+                value: { stringValue: toStr(event.payload["gen_ai.system"]) },
+              },
+              {
+                key: "gen_ai.request.model",
+                value: { stringValue: toStr(event.payload["gen_ai.request.model"]) },
+              },
+              {
+                key: "gen_ai.usage.input_tokens",
+                value: { intValue: toNum(event.payload["gen_ai.usage.input_tokens"]) },
+              },
+              {
+                key: "gen_ai.usage.output_tokens",
+                value: { intValue: toNum(event.payload["gen_ai.usage.output_tokens"]) },
+              },
+              {
+                key: "argus.llm.cost_usd",
+                value: { doubleValue: toNum(event.payload["argus.llm.cost_usd"]) },
+              },
+              {
+                key: "argus.llm.pii_detected",
+                value: { stringValue: toStr(event.payload["argus.llm.pii_detected"], "[]") },
+              },
+              {
+                key: "argus.llm.injection_attempt",
+                value: { boolValue: toBool(event.payload["argus.llm.injection_attempt"]) },
+              },
+            ]
+          : [];
+
       return {
         traceId:
           (event.payload.traceId as string | undefined) ?? crypto.randomBytes(16).toString("hex"),
@@ -135,16 +179,7 @@ export class OTLPExporter {
         kind: 1, // SPAN_KIND_INTERNAL
         startTimeUnixNano: timestamp * 1000000,
         endTimeUnixNano: (timestamp + durationMs) * 1000000,
-        attributes: [
-          {
-            key: "diagnostic.value",
-            value: { doubleValue: event.value },
-          },
-          {
-            key: "diagnostic.payload",
-            value: { stringValue: JSON.stringify(event.payload) },
-          },
-        ],
+        attributes: [...baseAttributes, ...llmAttributes],
       };
     });
 
diff --git a/packages/agent/src/instrumentation/llm/anthropic.ts b/packages/agent/src/instrumentation/llm/anthropic.ts
@@ -35,7 +35,7 @@ function asRecord(v: unknown): Record<string, unknown> {
 export function patchAnthropic(onEvent: OnLLMEvent, costMap: ModelCostMap): boolean {
   try {
     const mod = nodeRequire("@anthropic-ai/sdk") as Record<string, unknown>;
-    const AnthropicClass = (mod?.default ?? mod?.Anthropic ?? mod) as
+    const AnthropicClass = (mod.default ?? mod.Anthropic ?? mod) as
       | { prototype?: { messages?: ProtoWithCreate } }
       | undefined;
     const proto = AnthropicClass?.prototype?.messages;
@@ -81,7 +81,7 @@ export function patchAnthropic(onEvent: OnLLMEvent, costMap: ModelCostMap): bool
               provider: "anthropic",
               model,
               rawPrompt: extractAnthropicPrompt(
-                (params.messages as { role: string; content: unknown }[]) ?? [],
+                (params.messages as { role: string; content: unknown }[] | undefined) ?? [],
               ),
               rawCompletion: completion,
               promptTokens,
@@ -101,7 +101,7 @@ export function patchAnthropic(onEvent: OnLLMEvent, costMap: ModelCostMap): bool
           provider: "anthropic",
           model: result.model ?? (typeof params.model === "string" ? params.model : "unknown"),
           rawPrompt: extractAnthropicPrompt(
-            (params.messages as { role: string; content: unknown }[]) ?? [],
+            (params.messages as { role: string; content: unknown }[] | undefined) ?? [],
           ),
           rawCompletion: text,
           promptTokens: result.usage?.input_tokens ?? 0,
diff --git a/packages/agent/src/instrumentation/llm/base-llm-driver.ts b/packages/agent/src/instrumentation/llm/base-llm-driver.ts
@@ -14,7 +14,7 @@ export interface RawLLMCall {
   durationMs: number;
 }
 
-const DEFAULT_COSTS: Record<string, { input: number; output: number }> = {
+const DEFAULT_COSTS: Partial<Record<string, { input: number; output: number }>> = {
   "gpt-4o": { input: 0.000005, output: 0.000015 },
   "gpt-4-turbo": { input: 0.00001, output: 0.00003 },
   "gpt-3.5-turbo": { input: 0.0000005, output: 0.0000015 },
@@ -43,7 +43,9 @@ export function processLLMCall(
 
   const allDetections = mergeDetections([...promptDetections, ...completionDetections]);
 
-  const pricing = costMap[raw.model] ?? DEFAULT_COSTS[raw.model];
+  const pricing =
+    (costMap[raw.model] as { input: number; output: number } | undefined) ??
+    DEFAULT_COSTS[raw.model];
   const costUsd = pricing
     ? raw.promptTokens * pricing.input + raw.completionTokens * pricing.output
     : 0;
diff --git a/packages/agent/src/instrumentation/llm/openai.ts b/packages/agent/src/instrumentation/llm/openai.ts
@@ -34,7 +34,7 @@ function asRecord(v: unknown): Record<string, unknown> {
 export function patchOpenAI(onEvent: OnLLMEvent, costMap: ModelCostMap): boolean {
   try {
     const mod = nodeRequire("openai") as Record<string, unknown>;
-    const OpenAIClass = (mod?.default ?? mod?.OpenAI ?? mod) as
+    const OpenAIClass = (mod.default ?? mod.OpenAI ?? mod) as
       | { prototype?: { chat?: { completions?: ProtoWithCreate } } }
       | undefined;
     const proto = OpenAIClass?.prototype?.chat?.completions;
@@ -76,7 +76,7 @@ export function patchOpenAI(onEvent: OnLLMEvent, costMap: ModelCostMap): boolean
             {
               provider: "openai",
               model,
-              rawPrompt: extractPromptText((params.messages as OpenAIMessage[]) ?? []),
+              rawPrompt: extractPromptText((params.messages as OpenAIMessage[] | undefined) ?? []),
               rawCompletion: completion,
               promptTokens,
               completionTokens,
@@ -93,7 +93,7 @@ export function patchOpenAI(onEvent: OnLLMEvent, costMap: ModelCostMap): boolean
         {
           provider: "openai",
           model: result.model ?? (typeof params.model === "string" ? params.model : "unknown"),
-          rawPrompt: extractPromptText((params.messages as OpenAIMessage[]) ?? []),
+          rawPrompt: extractPromptText((params.messages as OpenAIMessage[] | undefined) ?? []),
           rawCompletion: result.choices?.[0]?.message?.content ?? "",
           promptTokens: result.usage?.prompt_tokens ?? 0,
           completionTokens: result.usage?.completion_tokens ?? 0,
diff --git a/packages/agent/tests/analysis/llm-analyzer.test.ts b/packages/agent/tests/analysis/llm-analyzer.test.ts
@@ -156,6 +156,41 @@ describe("LLMAnalyzer", () => {
     });
   });
 
+  // ── llm-dominates-request ─────────────────────────────────────────────────
+  describe("llm-dominates-request rule", () => {
+    it("fires when LLM > 80% of HTTP request duration on same traceId", () => {
+      analyzer.recordHTTPDuration("trace-abc", 1000);
+      // LLM takes 900ms = 90% of 1000ms
+      const result = analyzer.analyze(makeEvent({ durationMs: 900, traceId: "trace-abc" }));
+      assert.ok(result.some((s) => s.rule === "llm-dominates-request"));
+    });
+
+    it("does not fire when LLM is ≤80% of HTTP duration", () => {
+      analyzer.recordHTTPDuration("trace-abc", 1000);
+      // LLM takes 700ms = 70% — below threshold
+      const result = analyzer.analyze(makeEvent({ durationMs: 700, traceId: "trace-abc" }));
+      assert.ok(!result.some((s) => s.rule === "llm-dominates-request"));
+    });
+
+    it("does not fire when no HTTP duration recorded for traceId", () => {
+      const result = analyzer.analyze(makeEvent({ durationMs: 900, traceId: "trace-no-http" }));
+      assert.ok(!result.some((s) => s.rule === "llm-dominates-request"));
+    });
+
+    it("does not fire when traceId is absent", () => {
+      analyzer.recordHTTPDuration("trace-abc", 100);
+      const result = analyzer.analyze(makeEvent({ durationMs: 95, traceId: undefined }));
+      assert.ok(!result.some((s) => s.rule === "llm-dominates-request"));
+    });
+
+    it("reset() clears HTTP durations", () => {
+      analyzer.recordHTTPDuration("trace-abc", 1000);
+      analyzer.reset();
+      const result = analyzer.analyze(makeEvent({ durationMs: 900, traceId: "trace-abc" }));
+      assert.ok(!result.some((s) => s.rule === "llm-dominates-request"));
+    });
+  });
+
   // ── reset ─────────────────────────────────────────────────────────────────
   describe("reset()", () => {
     it("clears all state — cost spike does not fire after reset with fresh baseline", () => {
diff --git a/packages/agent/tests/instrumentation/llm/anthropic.test.ts b/packages/agent/tests/instrumentation/llm/anthropic.test.ts
@@ -130,4 +130,46 @@ describe("patchAnthropic", () => {
     assert.strictEqual(events[0].completionTokens, 7);
     assert.ok(events[0].sanitizedCompletion.includes("Hello"));
   });
+
+  it("falls back to mod.Anthropic when mod.default is absent", async () => {
+    const { MockAnthropic } = makeMockAnthropic();
+    requireRef.current = Object.assign(
+      (id: string) =>
+        id === "@anthropic-ai/sdk"
+          ? { Anthropic: MockAnthropic, default: undefined }
+          : originalRequire(id),
+      originalRequire,
+    ) as typeof originalRequire;
+
+    const result = patchAnthropic((e) => events.push(e), {});
+    assert.strictEqual(result, true, "should patch via mod.Anthropic fallback");
+  });
+
+  it("unpatchAnthropic is safe to call when nothing was patched", () => {
+    assert.doesNotThrow(() => unpatchAnthropic());
+  });
+
+  it("uses costUsd 0 for unknown model", async () => {
+    // Mock that echoes back whatever model was requested (not hardcoded claude-3-5-sonnet)
+    const unknownProto = {
+      create: async (params: Record<string, unknown>) => ({
+        model: params.model as string,
+        content: [{ type: "text", text: "ok" }],
+        usage: { input_tokens: 5, output_tokens: 3 },
+      }),
+    };
+    const UnknownMock = { prototype: { messages: unknownProto } };
+    requireRef.current = Object.assign(
+      (id: string) => (id === "@anthropic-ai/sdk" ? { default: UnknownMock } : originalRequire(id)),
+      originalRequire,
+    ) as typeof originalRequire;
+
+    patchAnthropic((e) => events.push(e), {});
+    await UnknownMock.prototype.messages.create({
+      model: "claude-unknown-xyz",
+      messages: [{ role: "user", content: "hi" }],
+      max_tokens: 10,
+    });
+    assert.strictEqual(events[0].costUsd, 0);
+  });
 });
diff --git a/packages/agent/tests/instrumentation/llm/base-llm-driver.test.ts b/packages/agent/tests/instrumentation/llm/base-llm-driver.test.ts
@@ -85,4 +85,20 @@ describe("processLLMCall", () => {
     const ev = runCall({ provider: "anthropic", model: "claude-3-5-sonnet" });
     assert.deepStrictEqual(ev.suggestions, []);
   });
+
+  it("merges duplicate PII detection types from prompt and completion", () => {
+    // Both prompt and completion have emails — mergeDetections should sum counts
+    const ev = runCall({
+      rawPrompt: "contact a@test.com and b@test.com",
+      rawCompletion: "reply to c@test.com",
+    });
+    const emailDet = ev.piiDetected.find((d) => d.type === "EMAIL");
+    assert.ok(emailDet && emailDet.count >= 3, "should merge 3 email detections");
+  });
+
+  it("handles missing messages field gracefully (no throw)", () => {
+    assert.doesNotThrow(() => {
+      runCall({ rawPrompt: "", rawCompletion: "" });
+    });
+  });
 });
diff --git a/packages/agent/tests/instrumentation/llm/openai.test.ts b/packages/agent/tests/instrumentation/llm/openai.test.ts
@@ -145,4 +145,43 @@ describe("patchOpenAI", () => {
     assert.ok(!events[0].sanitizedPrompt.includes("secret@test.com"));
     assert.ok(events[0].piiDetected.some((d) => d.type === "EMAIL"));
   });
+
+  it("falls back to mod.OpenAI when mod.default is absent", async () => {
+    const { MockOpenAI } = makeMockOpenAI();
+    requireRef.current = Object.assign(
+      (id: string) =>
+        id === "openai" ? { OpenAI: MockOpenAI, default: undefined } : originalRequire(id),
+      originalRequire,
+    ) as typeof originalRequire;
+
+    const result = patchOpenAI((e) => events.push(e), {});
+    assert.strictEqual(result, true, "should patch via mod.OpenAI fallback");
+  });
+
+  it("unpatchOpenAI is safe to call when nothing was patched", () => {
+    assert.doesNotThrow(() => unpatchOpenAI());
+  });
+
+  it("uses costUsd 0 for unknown model", async () => {
+    // Mock that echoes back whatever model was requested (not hardcoded gpt-4o)
+    const unknownProto = {
+      create: async (params: Record<string, unknown>) => ({
+        model: params.model as string,
+        choices: [{ message: { content: "ok" } }],
+        usage: { prompt_tokens: 5, completion_tokens: 3 },
+      }),
+    };
+    const UnknownMock = { prototype: { chat: { completions: unknownProto } } };
+    requireRef.current = Object.assign(
+      (id: string) => (id === "openai" ? { default: UnknownMock } : originalRequire(id)),
+      originalRequire,
+    ) as typeof originalRequire;
+
+    patchOpenAI((e) => events.push(e), {});
+    await UnknownMock.prototype.chat.completions.create({
+      model: "gpt-unknown-xyz",
+      messages: [{ role: "user", content: "hi" }],
+    });
+    assert.strictEqual(events[0].costUsd, 0);
+  });
 });
diff --git a/packages/agent/tests/integration/llm-tracing.test.ts b/packages/agent/tests/integration/llm-tracing.test.ts
@@ -28,7 +28,7 @@ function makeMockAnthropic() {
 
 describe("ArgusAgent LLM tracing integration", () => {
   const originalRequire = requireRef.current;
-  let agent: ArgusAgent;
+  let agent: ArgusAgent | undefined;
 
   beforeEach(() => {
     const MockOpenAI = makeMockOpenAI();
diff --git a/packages/agent/tests/internal/console-logger.test.ts b/packages/agent/tests/internal/console-logger.test.ts
diff --git a/quotes-demo-app/diagnostic.js b/quotes-demo-app/diagnostic.js
diff --git a/quotes-demo-app/routes/quotes.js b/quotes-demo-app/routes/quotes.js