Skip to content

Commit 09ca4a7

Browse files
committed
fix(llm-tracing): resolve all post-review gaps
- Add llm-dominates-request rule to LLMAnalyzer; wire HTTP duration listener in ArgusAgent.wireLLMTracing() via crossSignalListeners - Add GenAI semantic conventions to OTLPExporter.formatToOTLP() with safe string/number/bool extractors (fixes no-base-to-string) - Fix dot-notation lint: mod.OpenAI / mod.Anthropic (not bracket form) - Fix no-unnecessary-type-assertion: type DEFAULT_COSTS as Partial<Record<...>> so undefined is natural, no cast needed - Expand tests: llm-analyzer (llm-dominates-request), openai/anthropic (mod.X fallback, unknown-model zero-cost, safe unpatch), base-llm-driver (mergeDetections, empty prompt), console-logger (LLM PII + injection events) - Demo app: add "llm" to appType and POST /quotes/summarize route with synthetic LLM event emission (no real API key required)
1 parent 9b71a4d commit 09ca4a7

14 files changed

Lines changed: 328 additions & 20 deletions

File tree

packages/agent/src/analysis/llm-analyzer.ts

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,14 @@ export class LLMAnalyzer {
1010
// context-window-growth: keyed by endpoint → last 5 promptToken counts
1111
private tokenHistory = new Map<string, number[]>();
1212

13+
// llm-dominates-request: keyed by traceId → most recent HTTP request durationMs
14+
private httpDurations = new Map<string, number>();
15+
16+
/** Called by ArgusAgent when an outbound HTTP request completes on a traced request. */
17+
recordHTTPDuration(traceId: string, durationMs: number): void {
18+
this.httpDurations.set(traceId, durationMs);
19+
}
20+
1321
analyze(event: LLMEvent): LLMSuggestion[] {
1422
const suggestions: LLMSuggestion[] = [];
1523
const now = Date.now();
@@ -30,6 +38,22 @@ export class LLMAnalyzer {
3038
}
3139
}
3240

41+
// Rule: llm-dominates-request — LLM > 80% of HTTP request duration on same traceId
42+
if (event.traceId) {
43+
const httpMs = this.httpDurations.get(event.traceId);
44+
if (httpMs !== undefined && httpMs > 0) {
45+
const pct = Math.round((event.durationMs / httpMs) * 100);
46+
if (pct > 80) {
47+
suggestions.push({
48+
rule: "llm-dominates-request",
49+
suggestedFix:
50+
`LLM accounts for ${pct}% of ${event.endpoint} latency. ` +
51+
`Consider streaming the response or caching for identical prompts.`,
52+
});
53+
}
54+
}
55+
}
56+
3357
// Rule: llm-cost-spike — single call cost > 10x rolling average (last 100 calls)
3458
const modelHistory = this.costHistory.get(event.model) ?? [];
3559
if (modelHistory.length >= 5 && event.costUsd > 0) {
@@ -76,5 +100,6 @@ export class LLMAnalyzer {
76100
this.callsByTrace.clear();
77101
this.costHistory.clear();
78102
this.tokenHistory.clear();
103+
this.httpDurations.clear();
79104
}
80105
}

packages/agent/src/argus-agent.ts

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1066,6 +1066,17 @@ export class ArgusAgent extends EventEmitter {
10661066

10671067
if (providers.includes("openai")) patchOpenAI(onEvent, costMap);
10681068
if (providers.includes("anthropic")) patchAnthropic(onEvent, costMap);
1069+
1070+
// Feed HTTP request durations so llm-dominates-request rule can correlate
1071+
const httpListener = (req: Record<string, unknown>): void => {
1072+
const traceId = req.traceId as string | undefined;
1073+
const durationMs = req.durationMs as number | undefined;
1074+
if (traceId && typeof durationMs === "number") {
1075+
analyzer.recordHTTPDuration(traceId, durationMs);
1076+
}
1077+
};
1078+
this.on("request", httpListener);
1079+
this.crossSignalListeners.push(["request", httpListener] as DebugListener);
10691080
}
10701081

10711082
private wireCrossSignalRules(): void {

packages/agent/src/export/exporter.ts

Lines changed: 45 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,50 @@ export class OTLPExporter {
127127
typeof event.payload.timestamp === "number" ? event.payload.timestamp : Date.now();
128128
const durationMs = event.metricName === "memory-leak" ? 0 : event.value;
129129

130+
const baseAttributes = [
131+
{ key: "diagnostic.value", value: { doubleValue: event.value } },
132+
{ key: "diagnostic.payload", value: { stringValue: JSON.stringify(event.payload) } },
133+
];
134+
135+
const toStr = (v: unknown, fallback = ""): string => (typeof v === "string" ? v : fallback);
136+
const toNum = (v: unknown): number => (typeof v === "number" ? v : 0);
137+
const toBool = (v: unknown): boolean => v === true;
138+
139+
// GenAI semantic conventions for LLM spans
140+
const llmAttributes =
141+
event.metricName === "llm"
142+
? [
143+
{
144+
key: "gen_ai.system",
145+
value: { stringValue: toStr(event.payload["gen_ai.system"]) },
146+
},
147+
{
148+
key: "gen_ai.request.model",
149+
value: { stringValue: toStr(event.payload["gen_ai.request.model"]) },
150+
},
151+
{
152+
key: "gen_ai.usage.input_tokens",
153+
value: { intValue: toNum(event.payload["gen_ai.usage.input_tokens"]) },
154+
},
155+
{
156+
key: "gen_ai.usage.output_tokens",
157+
value: { intValue: toNum(event.payload["gen_ai.usage.output_tokens"]) },
158+
},
159+
{
160+
key: "argus.llm.cost_usd",
161+
value: { doubleValue: toNum(event.payload["argus.llm.cost_usd"]) },
162+
},
163+
{
164+
key: "argus.llm.pii_detected",
165+
value: { stringValue: toStr(event.payload["argus.llm.pii_detected"], "[]") },
166+
},
167+
{
168+
key: "argus.llm.injection_attempt",
169+
value: { boolValue: toBool(event.payload["argus.llm.injection_attempt"]) },
170+
},
171+
]
172+
: [];
173+
130174
return {
131175
traceId:
132176
(event.payload.traceId as string | undefined) ?? crypto.randomBytes(16).toString("hex"),
@@ -135,16 +179,7 @@ export class OTLPExporter {
135179
kind: 1, // SPAN_KIND_INTERNAL
136180
startTimeUnixNano: timestamp * 1000000,
137181
endTimeUnixNano: (timestamp + durationMs) * 1000000,
138-
attributes: [
139-
{
140-
key: "diagnostic.value",
141-
value: { doubleValue: event.value },
142-
},
143-
{
144-
key: "diagnostic.payload",
145-
value: { stringValue: JSON.stringify(event.payload) },
146-
},
147-
],
182+
attributes: [...baseAttributes, ...llmAttributes],
148183
};
149184
});
150185

packages/agent/src/instrumentation/llm/anthropic.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ function asRecord(v: unknown): Record<string, unknown> {
3535
export function patchAnthropic(onEvent: OnLLMEvent, costMap: ModelCostMap): boolean {
3636
try {
3737
const mod = nodeRequire("@anthropic-ai/sdk") as Record<string, unknown>;
38-
const AnthropicClass = (mod?.default ?? mod?.Anthropic ?? mod) as
38+
const AnthropicClass = (mod.default ?? mod.Anthropic ?? mod) as
3939
| { prototype?: { messages?: ProtoWithCreate } }
4040
| undefined;
4141
const proto = AnthropicClass?.prototype?.messages;
@@ -81,7 +81,7 @@ export function patchAnthropic(onEvent: OnLLMEvent, costMap: ModelCostMap): bool
8181
provider: "anthropic",
8282
model,
8383
rawPrompt: extractAnthropicPrompt(
84-
(params.messages as { role: string; content: unknown }[]) ?? [],
84+
(params.messages as { role: string; content: unknown }[] | undefined) ?? [],
8585
),
8686
rawCompletion: completion,
8787
promptTokens,
@@ -101,7 +101,7 @@ export function patchAnthropic(onEvent: OnLLMEvent, costMap: ModelCostMap): bool
101101
provider: "anthropic",
102102
model: result.model ?? (typeof params.model === "string" ? params.model : "unknown"),
103103
rawPrompt: extractAnthropicPrompt(
104-
(params.messages as { role: string; content: unknown }[]) ?? [],
104+
(params.messages as { role: string; content: unknown }[] | undefined) ?? [],
105105
),
106106
rawCompletion: text,
107107
promptTokens: result.usage?.input_tokens ?? 0,

packages/agent/src/instrumentation/llm/base-llm-driver.ts

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ export interface RawLLMCall {
1414
durationMs: number;
1515
}
1616

17-
const DEFAULT_COSTS: Record<string, { input: number; output: number }> = {
17+
const DEFAULT_COSTS: Partial<Record<string, { input: number; output: number }>> = {
1818
"gpt-4o": { input: 0.000005, output: 0.000015 },
1919
"gpt-4-turbo": { input: 0.00001, output: 0.00003 },
2020
"gpt-3.5-turbo": { input: 0.0000005, output: 0.0000015 },
@@ -43,7 +43,9 @@ export function processLLMCall(
4343

4444
const allDetections = mergeDetections([...promptDetections, ...completionDetections]);
4545

46-
const pricing = costMap[raw.model] ?? DEFAULT_COSTS[raw.model];
46+
const pricing =
47+
(costMap[raw.model] as { input: number; output: number } | undefined) ??
48+
DEFAULT_COSTS[raw.model];
4749
const costUsd = pricing
4850
? raw.promptTokens * pricing.input + raw.completionTokens * pricing.output
4951
: 0;

packages/agent/src/instrumentation/llm/openai.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ function asRecord(v: unknown): Record<string, unknown> {
3434
export function patchOpenAI(onEvent: OnLLMEvent, costMap: ModelCostMap): boolean {
3535
try {
3636
const mod = nodeRequire("openai") as Record<string, unknown>;
37-
const OpenAIClass = (mod?.default ?? mod?.OpenAI ?? mod) as
37+
const OpenAIClass = (mod.default ?? mod.OpenAI ?? mod) as
3838
| { prototype?: { chat?: { completions?: ProtoWithCreate } } }
3939
| undefined;
4040
const proto = OpenAIClass?.prototype?.chat?.completions;
@@ -76,7 +76,7 @@ export function patchOpenAI(onEvent: OnLLMEvent, costMap: ModelCostMap): boolean
7676
{
7777
provider: "openai",
7878
model,
79-
rawPrompt: extractPromptText((params.messages as OpenAIMessage[]) ?? []),
79+
rawPrompt: extractPromptText((params.messages as OpenAIMessage[] | undefined) ?? []),
8080
rawCompletion: completion,
8181
promptTokens,
8282
completionTokens,
@@ -93,7 +93,7 @@ export function patchOpenAI(onEvent: OnLLMEvent, costMap: ModelCostMap): boolean
9393
{
9494
provider: "openai",
9595
model: result.model ?? (typeof params.model === "string" ? params.model : "unknown"),
96-
rawPrompt: extractPromptText((params.messages as OpenAIMessage[]) ?? []),
96+
rawPrompt: extractPromptText((params.messages as OpenAIMessage[] | undefined) ?? []),
9797
rawCompletion: result.choices?.[0]?.message?.content ?? "",
9898
promptTokens: result.usage?.prompt_tokens ?? 0,
9999
completionTokens: result.usage?.completion_tokens ?? 0,

packages/agent/tests/analysis/llm-analyzer.test.ts

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,41 @@ describe("LLMAnalyzer", () => {
156156
});
157157
});
158158

159+
// ── llm-dominates-request ─────────────────────────────────────────────────
160+
describe("llm-dominates-request rule", () => {
161+
it("fires when LLM > 80% of HTTP request duration on same traceId", () => {
162+
analyzer.recordHTTPDuration("trace-abc", 1000);
163+
// LLM takes 900ms = 90% of 1000ms
164+
const result = analyzer.analyze(makeEvent({ durationMs: 900, traceId: "trace-abc" }));
165+
assert.ok(result.some((s) => s.rule === "llm-dominates-request"));
166+
});
167+
168+
it("does not fire when LLM is ≤80% of HTTP duration", () => {
169+
analyzer.recordHTTPDuration("trace-abc", 1000);
170+
// LLM takes 700ms = 70% — below threshold
171+
const result = analyzer.analyze(makeEvent({ durationMs: 700, traceId: "trace-abc" }));
172+
assert.ok(!result.some((s) => s.rule === "llm-dominates-request"));
173+
});
174+
175+
it("does not fire when no HTTP duration recorded for traceId", () => {
176+
const result = analyzer.analyze(makeEvent({ durationMs: 900, traceId: "trace-no-http" }));
177+
assert.ok(!result.some((s) => s.rule === "llm-dominates-request"));
178+
});
179+
180+
it("does not fire when traceId is absent", () => {
181+
analyzer.recordHTTPDuration("trace-abc", 100);
182+
const result = analyzer.analyze(makeEvent({ durationMs: 95, traceId: undefined }));
183+
assert.ok(!result.some((s) => s.rule === "llm-dominates-request"));
184+
});
185+
186+
it("reset() clears HTTP durations", () => {
187+
analyzer.recordHTTPDuration("trace-abc", 1000);
188+
analyzer.reset();
189+
const result = analyzer.analyze(makeEvent({ durationMs: 900, traceId: "trace-abc" }));
190+
assert.ok(!result.some((s) => s.rule === "llm-dominates-request"));
191+
});
192+
});
193+
159194
// ── reset ─────────────────────────────────────────────────────────────────
160195
describe("reset()", () => {
161196
it("clears all state — cost spike does not fire after reset with fresh baseline", () => {

packages/agent/tests/instrumentation/llm/anthropic.test.ts

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,4 +130,46 @@ describe("patchAnthropic", () => {
130130
assert.strictEqual(events[0].completionTokens, 7);
131131
assert.ok(events[0].sanitizedCompletion.includes("Hello"));
132132
});
133+
134+
it("falls back to mod.Anthropic when mod.default is absent", async () => {
135+
const { MockAnthropic } = makeMockAnthropic();
136+
requireRef.current = Object.assign(
137+
(id: string) =>
138+
id === "@anthropic-ai/sdk"
139+
? { Anthropic: MockAnthropic, default: undefined }
140+
: originalRequire(id),
141+
originalRequire,
142+
) as typeof originalRequire;
143+
144+
const result = patchAnthropic((e) => events.push(e), {});
145+
assert.strictEqual(result, true, "should patch via mod.Anthropic fallback");
146+
});
147+
148+
it("unpatchAnthropic is safe to call when nothing was patched", () => {
149+
assert.doesNotThrow(() => unpatchAnthropic());
150+
});
151+
152+
it("uses costUsd 0 for unknown model", async () => {
153+
// Mock that echoes back whatever model was requested (not hardcoded claude-3-5-sonnet)
154+
const unknownProto = {
155+
create: async (params: Record<string, unknown>) => ({
156+
model: params.model as string,
157+
content: [{ type: "text", text: "ok" }],
158+
usage: { input_tokens: 5, output_tokens: 3 },
159+
}),
160+
};
161+
const UnknownMock = { prototype: { messages: unknownProto } };
162+
requireRef.current = Object.assign(
163+
(id: string) => (id === "@anthropic-ai/sdk" ? { default: UnknownMock } : originalRequire(id)),
164+
originalRequire,
165+
) as typeof originalRequire;
166+
167+
patchAnthropic((e) => events.push(e), {});
168+
await UnknownMock.prototype.messages.create({
169+
model: "claude-unknown-xyz",
170+
messages: [{ role: "user", content: "hi" }],
171+
max_tokens: 10,
172+
});
173+
assert.strictEqual(events[0].costUsd, 0);
174+
});
133175
});

packages/agent/tests/instrumentation/llm/base-llm-driver.test.ts

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,4 +85,20 @@ describe("processLLMCall", () => {
8585
const ev = runCall({ provider: "anthropic", model: "claude-3-5-sonnet" });
8686
assert.deepStrictEqual(ev.suggestions, []);
8787
});
88+
89+
it("merges duplicate PII detection types from prompt and completion", () => {
90+
// Both prompt and completion have emails — mergeDetections should sum counts
91+
const ev = runCall({
92+
rawPrompt: "contact a@test.com and b@test.com",
93+
rawCompletion: "reply to c@test.com",
94+
});
95+
const emailDet = ev.piiDetected.find((d) => d.type === "EMAIL");
96+
assert.ok(emailDet && emailDet.count >= 3, "should merge 3 email detections");
97+
});
98+
99+
it("handles missing messages field gracefully (no throw)", () => {
100+
assert.doesNotThrow(() => {
101+
runCall({ rawPrompt: "", rawCompletion: "" });
102+
});
103+
});
88104
});

packages/agent/tests/instrumentation/llm/openai.test.ts

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,4 +145,43 @@ describe("patchOpenAI", () => {
145145
assert.ok(!events[0].sanitizedPrompt.includes("secret@test.com"));
146146
assert.ok(events[0].piiDetected.some((d) => d.type === "EMAIL"));
147147
});
148+
149+
it("falls back to mod.OpenAI when mod.default is absent", async () => {
150+
const { MockOpenAI } = makeMockOpenAI();
151+
requireRef.current = Object.assign(
152+
(id: string) =>
153+
id === "openai" ? { OpenAI: MockOpenAI, default: undefined } : originalRequire(id),
154+
originalRequire,
155+
) as typeof originalRequire;
156+
157+
const result = patchOpenAI((e) => events.push(e), {});
158+
assert.strictEqual(result, true, "should patch via mod.OpenAI fallback");
159+
});
160+
161+
it("unpatchOpenAI is safe to call when nothing was patched", () => {
162+
assert.doesNotThrow(() => unpatchOpenAI());
163+
});
164+
165+
it("uses costUsd 0 for unknown model", async () => {
166+
// Mock that echoes back whatever model was requested (not hardcoded gpt-4o)
167+
const unknownProto = {
168+
create: async (params: Record<string, unknown>) => ({
169+
model: params.model as string,
170+
choices: [{ message: { content: "ok" } }],
171+
usage: { prompt_tokens: 5, completion_tokens: 3 },
172+
}),
173+
};
174+
const UnknownMock = { prototype: { chat: { completions: unknownProto } } };
175+
requireRef.current = Object.assign(
176+
(id: string) => (id === "openai" ? { default: UnknownMock } : originalRequire(id)),
177+
originalRequire,
178+
) as typeof originalRequire;
179+
180+
patchOpenAI((e) => events.push(e), {});
181+
await UnknownMock.prototype.chat.completions.create({
182+
model: "gpt-unknown-xyz",
183+
messages: [{ role: "user", content: "hi" }],
184+
});
185+
assert.strictEqual(events[0].costUsd, 0);
186+
});
148187
});

0 commit comments

Comments
 (0)