Skip to content

Commit 53fe0fc

Browse files
committed
feat(api): global LLM usage observer hook
Hosts (wilds-ai foundation_usage_events, billing dashboards, BI pipelines) previously had to wrap every agentos LLM callsite in their own emitter to get per-call cost telemetry. wilds-ai had ~50+ callsites and the duplication was already biting. New module observers.ts exposes setGlobalLlmObserver(callback). Hosts register once at boot: setGlobalLlmObserver((event) => { recordFoundationUsageEvent({ meterKey: event.source ?? 'llm.call', providerKey: event.provider, modelKey: event.model, quantity: event.usage.costUSD ?? 0, unit: 'usd', }); }); generateText + streamText now fire fireLlmUsageObserver on every successful return / stream finalize. Payload carries resolved provider, model, full TokenUsage (including costUSD + cache tokens), caller-supplied source label, finishReason, and surface so the host can route by meter / disambiguate streaming vs non-streaming. Fire-and-forget: synchronous + async observer errors are swallowed so a misbehaving observer can never crash the LLM call path. New source?: string option on GenerateTextOptions lets callers tag each call with a meter label (narrator_turn, companion_reply, world_compile_job, etc.) without threading their own context. 10 unit tests on observers.ts pass. generateText + streamText return paths instrumented at every success exit.
1 parent 84b48d3 commit 53fe0fc

182 files changed

Lines changed: 332 additions & 12 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.
File renamed without changes.
Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
/**
2+
* Tests for the global LLM usage observer registration and dispatch.
3+
*/
4+
import { describe, expect, it, vi, beforeEach, afterEach } from 'vitest';
5+
import {
6+
setGlobalLlmObserver,
7+
getGlobalLlmObserver,
8+
fireLlmUsageObserver,
9+
} from '../observers.js';
10+
import type { LlmUsageEvent } from '../observers.js';
11+
12+
function makeEvent(overrides?: Partial<LlmUsageEvent>): LlmUsageEvent {
13+
return {
14+
provider: 'anthropic',
15+
model: 'claude-sonnet-4-6',
16+
usage: {
17+
promptTokens: 100,
18+
completionTokens: 50,
19+
totalTokens: 150,
20+
costUSD: 0.012,
21+
},
22+
source: 'unit-test',
23+
finishReason: 'stop',
24+
surface: 'generateText',
25+
...overrides,
26+
};
27+
}
28+
29+
beforeEach(() => {
30+
setGlobalLlmObserver(null);
31+
});
32+
33+
afterEach(() => {
34+
setGlobalLlmObserver(null);
35+
});
36+
37+
describe('setGlobalLlmObserver / getGlobalLlmObserver', () => {
38+
it('returns null when no observer is registered', () => {
39+
expect(getGlobalLlmObserver()).toBeNull();
40+
});
41+
42+
it('registers and retrieves the observer', () => {
43+
const obs = vi.fn();
44+
setGlobalLlmObserver(obs);
45+
expect(getGlobalLlmObserver()).toBe(obs);
46+
});
47+
48+
it('clears the observer when null is passed', () => {
49+
setGlobalLlmObserver(vi.fn());
50+
setGlobalLlmObserver(null);
51+
expect(getGlobalLlmObserver()).toBeNull();
52+
});
53+
54+
it('replaces an existing observer (last writer wins)', () => {
55+
const first = vi.fn();
56+
const second = vi.fn();
57+
setGlobalLlmObserver(first);
58+
setGlobalLlmObserver(second);
59+
expect(getGlobalLlmObserver()).toBe(second);
60+
});
61+
});
62+
63+
describe('fireLlmUsageObserver', () => {
64+
it('no-ops when no observer is registered (does not throw)', () => {
65+
expect(() => fireLlmUsageObserver(makeEvent())).not.toThrow();
66+
});
67+
68+
it('passes the full event payload to the observer', () => {
69+
const obs = vi.fn();
70+
setGlobalLlmObserver(obs);
71+
const event = makeEvent({ source: 'narrator_turn' });
72+
fireLlmUsageObserver(event);
73+
expect(obs).toHaveBeenCalledTimes(1);
74+
expect(obs).toHaveBeenCalledWith(event);
75+
});
76+
77+
it('swallows synchronous errors from the observer', () => {
78+
setGlobalLlmObserver(() => {
79+
throw new Error('observer crash');
80+
});
81+
expect(() => fireLlmUsageObserver(makeEvent())).not.toThrow();
82+
});
83+
84+
it('swallows promise rejections from async observers', async () => {
85+
setGlobalLlmObserver(async () => {
86+
throw new Error('async observer crash');
87+
});
88+
expect(() => fireLlmUsageObserver(makeEvent())).not.toThrow();
89+
// Yield to the microtask queue so the rejection lands + gets swallowed.
90+
await new Promise((resolve) => setTimeout(resolve, 0));
91+
});
92+
93+
it('forwards source label and surface so hosts can route by meter', () => {
94+
const obs = vi.fn();
95+
setGlobalLlmObserver(obs);
96+
fireLlmUsageObserver(makeEvent({ source: 'companion_reply', surface: 'streamText' }));
97+
expect(obs.mock.calls[0][0].source).toBe('companion_reply');
98+
expect(obs.mock.calls[0][0].surface).toBe('streamText');
99+
});
100+
101+
it('forwards cache-token fields when present on usage', () => {
102+
const obs = vi.fn();
103+
setGlobalLlmObserver(obs);
104+
fireLlmUsageObserver(
105+
makeEvent({
106+
usage: {
107+
promptTokens: 1000,
108+
completionTokens: 50,
109+
totalTokens: 1050,
110+
costUSD: 0.005,
111+
cacheReadTokens: 800,
112+
cacheCreationTokens: 200,
113+
},
114+
}),
115+
);
116+
expect(obs.mock.calls[0][0].usage.cacheReadTokens).toBe(800);
117+
expect(obs.mock.calls[0][0].usage.cacheCreationTokens).toBe(200);
118+
});
119+
});

src/api/generateText.ts

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
import { randomUUID } from 'node:crypto';
1515
import { resolveModelOption, resolveProvider, createProviderManager } from './model.js';
1616
import { attachUsageAttributes, toTurnMetricUsage } from './observability.js';
17+
import { fireLlmUsageObserver } from './observers.js';
1718
import {
1819
hostPolicyToRouteParams,
1920
mergeRequiredCapabilities,
@@ -346,6 +347,15 @@ export interface GenerateTextOptions {
346347
* @param fallbackProvider - The provider identifier being tried next.
347348
*/
348349
onFallback?: (error: Error, fallbackProvider: string) => void;
350+
/**
351+
* Optional source label forwarded to the global LLM usage observer
352+
* registered via {@link setGlobalLlmObserver}. Hosts use this to
353+
* tag the emitted telemetry row with a caller-defined meter key
354+
* (e.g. 'narrator_turn', 'companion_reply', 'world_compile_job').
355+
*
356+
* Has no effect when no observer is registered.
357+
*/
358+
source?: string;
349359
/**
350360
* Optional model router for intelligent provider/model selection.
351361
* When provided, the router's `selectModel()` is called before provider
@@ -1304,6 +1314,18 @@ export async function generateText(opts: GenerateTextOptions): Promise<GenerateT
13041314
span?.setAttribute('agentos.api.finish_reason', choice.finishReason ?? 'stop');
13051315
span?.setAttribute('agentos.api.tool_calls', allToolCalls.length);
13061316
attachUsageAttributes(span, totalUsage);
1317+
// 2026-05-29 — fire the global LLM usage observer so hosts
1318+
// (wilds-ai foundation_usage_events, billing dashboards) get
1319+
// the resolved provider + model + cost without wrapping every
1320+
// callsite. No-op when no observer is registered.
1321+
fireLlmUsageObserver({
1322+
provider: resolved.providerId,
1323+
model: resolved.modelId,
1324+
usage: totalUsage,
1325+
source: opts.source,
1326+
finishReason: choice.finishReason ?? 'stop',
1327+
surface: 'generateText',
1328+
});
13071329
return {
13081330
provider: resolved.providerId,
13091331
model: resolved.modelId,
@@ -1417,6 +1439,14 @@ export async function generateText(opts: GenerateTextOptions): Promise<GenerateT
14171439
span?.setAttribute('agentos.api.finish_reason', choice.finishReason ?? 'stop');
14181440
span?.setAttribute('agentos.api.tool_calls', allToolCalls.length);
14191441
attachUsageAttributes(span, totalUsage);
1442+
fireLlmUsageObserver({
1443+
provider: resolved.providerId,
1444+
model: resolved.modelId,
1445+
usage: totalUsage,
1446+
source: opts.source,
1447+
finishReason: choice.finishReason ?? 'stop',
1448+
surface: 'generateText',
1449+
});
14201450
return {
14211451
provider: resolved.providerId,
14221452
model: resolved.modelId,
@@ -1433,6 +1463,14 @@ export async function generateText(opts: GenerateTextOptions): Promise<GenerateT
14331463
span?.setAttribute('agentos.api.finish_reason', 'tool-calls');
14341464
span?.setAttribute('agentos.api.tool_calls', allToolCalls.length);
14351465
attachUsageAttributes(span, totalUsage);
1466+
fireLlmUsageObserver({
1467+
provider: resolved.providerId,
1468+
model: resolved.modelId,
1469+
usage: totalUsage,
1470+
source: opts.source,
1471+
finishReason: 'tool-calls',
1472+
surface: 'generateText',
1473+
});
14361474
return {
14371475
provider: resolved.providerId,
14381476
model: resolved.modelId,

src/api/index.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,13 @@ export { embedText } from './embedText.js';
3838
export { generateImage } from './generateImage.js';
3939
export { transferStyle } from './transferStyle.js';
4040

41+
// --- LLM usage observer (global cost / telemetry hook) ---
42+
export {
43+
setGlobalLlmObserver,
44+
type LlmUsageEvent,
45+
type LlmUsageObserver,
46+
} from './observers.js';
47+
4148
// --- Global default provider config ---
4249
export {
4350
setDefaultProvider,

src/api/observers.ts

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
/**
2+
* @file observers.ts
3+
* @description Global observer registration for AgentOS LLM usage events.
4+
*
5+
* Hosts (Next.js apps, CLI tools, long-running services) register a
6+
* single callback at boot time. Every {@link generateText} /
7+
* {@link generateObject} / streamText / streamObject completion fires
8+
* the callback with the resolved provider, model, usage metrics, and
9+
* caller-supplied source label.
10+
*
11+
* Rationale: pre-2026-05-29, every host that wanted per-call cost
12+
* telemetry had to wrap each LLM callsite in its own emitter. wilds-ai
13+
* (50+ callsites across narrator / companion / world-compile / asset
14+
* judge / etc.) was the canonical example of this duplication. The
15+
* global observer pattern lets the host register once:
16+
*
17+
* import { setGlobalLlmObserver } from '@framers/agentos';
18+
* setGlobalLlmObserver((event) => {
19+
* recordFoundationUsageEvent({
20+
* meterKey: event.source ?? 'llm.call',
21+
* providerKey: event.provider,
22+
* modelKey: event.model,
23+
* quantity: event.usage.costUSD ?? 0,
24+
* unit: 'usd',
25+
* ...
26+
* });
27+
* });
28+
*
29+
* And every downstream agentos call automatically flows through. No
30+
* per-callsite wrappers required.
31+
*
32+
* Observer call is fire-and-forget: errors thrown by the host callback
33+
* are swallowed so telemetry never crashes the LLM call path.
34+
*
35+
* @module agentos/api/observers
36+
*/
37+
import type { TokenUsage } from './generateText.js';
38+
39+
/**
40+
* Payload delivered to a registered usage observer once an LLM call
41+
* resolves. Mirrors the agentos-side fields that downstream cost /
42+
* billing systems care about.
43+
*/
44+
export interface LlmUsageEvent {
45+
/** Resolved provider id (e.g. 'openai', 'anthropic', 'openrouter'). */
46+
provider: string;
47+
/** Resolved model id (e.g. 'gpt-4o', 'claude-sonnet-4-6'). */
48+
model: string;
49+
/**
50+
* Aggregated token usage for the call — promptTokens, completionTokens,
51+
* totalTokens, costUSD, cacheReadTokens, cacheCreationTokens.
52+
* Mirrors the `usage` field on the GenerateText/Object result.
53+
*/
54+
usage: TokenUsage;
55+
/**
56+
* Opt-in source label set by the caller via the `source` option
57+
* (e.g. 'narrator_turn', 'companion_reply', 'world_compile_job').
58+
* Hosts use this to tag emitted rows with their own meter_key.
59+
*/
60+
source?: string;
61+
/**
62+
* Mirrors the `finishReason` on the GenerateText result so observers
63+
* can distinguish a clean stop from a token-cap truncation.
64+
*/
65+
finishReason?: string;
66+
/**
67+
* Which agentos surface fired the event. Lets a single observer
68+
* route generateText, generateObject, generateImage, embedText, etc.
69+
* into different meters when needed.
70+
*/
71+
surface:
72+
| 'generateText'
73+
| 'generateObject'
74+
| 'streamText'
75+
| 'streamObject'
76+
| 'embedText'
77+
| 'generateImage';
78+
}
79+
80+
/**
81+
* Observer callback signature. May return a promise; agentos waits on
82+
* it only with `void` (no backpressure on the LLM call path).
83+
*/
84+
export type LlmUsageObserver = (event: LlmUsageEvent) => void | Promise<void>;
85+
86+
let globalObserver: LlmUsageObserver | null = null;
87+
88+
/**
89+
* Register (or clear) the process-wide LLM usage observer.
90+
*
91+
* Hosts typically call this once at app boot:
92+
*
93+
* ```ts
94+
* setGlobalLlmObserver((event) => {
95+
* recordFoundationUsageEvent({
96+
* meterKey: event.source ?? 'llm.call',
97+
* providerKey: event.provider,
98+
* modelKey: event.model,
99+
* quantity: event.usage.costUSD ?? 0,
100+
* unit: 'usd',
101+
* });
102+
* });
103+
* ```
104+
*
105+
* Passing `null` clears the observer (useful in tests).
106+
*
107+
* @param observer - The observer callback, or null to clear.
108+
*/
109+
export function setGlobalLlmObserver(observer: LlmUsageObserver | null): void {
110+
globalObserver = observer;
111+
}
112+
113+
/**
114+
* Returns the currently-registered global observer, or null when
115+
* unregistered. Exposed so internal agentos code can short-circuit
116+
* the dispatcher when nothing is wired (saves the allocation +
117+
* try/catch on every LLM call).
118+
*
119+
* @internal
120+
*/
121+
export function getGlobalLlmObserver(): LlmUsageObserver | null {
122+
return globalObserver;
123+
}
124+
125+
/**
126+
* Internal dispatcher called by every agentos LLM-resolving surface
127+
* (generateText/generateObject/etc.) immediately before returning to
128+
* the caller. Fire-and-forget — errors thrown by the host observer
129+
* are caught + logged to stderr so a misbehaving observer can never
130+
* crash the LLM call path.
131+
*
132+
* No-op when no observer is registered.
133+
*
134+
* @internal
135+
*/
136+
export function fireLlmUsageObserver(event: LlmUsageEvent): void {
137+
const cb = globalObserver;
138+
if (!cb) return;
139+
try {
140+
const result = cb(event);
141+
if (result && typeof (result as Promise<void>).then === 'function') {
142+
void (result as Promise<void>).catch((err) => {
143+
// eslint-disable-next-line no-console
144+
console.warn('[agentos.onUsage] observer promise rejected:', err);
145+
});
146+
}
147+
} catch (err) {
148+
// eslint-disable-next-line no-console
149+
console.warn('[agentos.onUsage] observer threw synchronously:', err);
150+
}
151+
}

src/api/runtime/global-default.integration.test.ts renamed to src/api/runtime/__tests__/global-default.integration.test.ts

File renamed without changes.
File renamed without changes.

src/api/runtime/provider-priority.integration.test.ts renamed to src/api/runtime/__tests__/provider-priority.integration.test.ts

File renamed without changes.
File renamed without changes.

src/api/streamText.ts

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import { randomUUID } from 'node:crypto';
1111
import { resolveModelOption, resolveProvider, createProviderManager } from './model.js';
1212
import { attachUsageAttributes, toTurnMetricUsage } from './observability.js';
13+
import { fireLlmUsageObserver } from './observers.js';
1314
import { hostPolicyToRouteParams, mergeRequiredCapabilities } from './runtime/hostPolicy.js';
1415
import { adaptTools } from './runtime/toolAdapter.js';
1516
import {
@@ -832,6 +833,22 @@ export function streamText(opts: GenerateTextOptions): StreamTextResult {
832833
status: metricStatus,
833834
usage: toTurnMetricUsage(usage),
834835
});
836+
// 2026-05-29 — fire the global LLM usage observer with the
837+
// finalized stream usage. Same hook generateText fires; hosts
838+
// (wilds-ai foundation_usage_events, billing dashboards) get
839+
// one consistent stream of events whether the caller used
840+
// generateText or streamText. No-op when no observer is
841+
// registered.
842+
if (metricStatus !== 'error') {
843+
fireLlmUsageObserver({
844+
provider: recordedProviderId ?? '',
845+
model: recordedModelId ?? '',
846+
usage,
847+
source: opts.source,
848+
finishReason: allToolCalls.length > 0 && !finalText ? 'tool-calls' : 'stop',
849+
surface: 'streamText',
850+
});
851+
}
835852
}
836853
}
837854

0 commit comments

Comments
 (0)