Skip to content

Commit bf5fe49

Browse files
lpcoxCopilot
andcommitted
fix: extract OpenAI/Copilot cached_tokens from prompt_tokens_details
The token tracker only extracted Anthropic-style cache fields (cache_read_input_tokens, cache_creation_input_tokens) but missed the OpenAI/Copilot format where cache info is nested under usage.prompt_tokens_details.cached_tokens. This caused token-usage.jsonl to report cache_read_tokens: 0 for all Copilot API requests, even when the API was returning significant cache hits (e.g., 43,894 of 43,977 prompt tokens cached). Fix both extractUsageFromJson() and extractUsageFromSseLine() to read prompt_tokens_details.cached_tokens and map it to the normalized cache_read_input_tokens field. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 6536923 commit bf5fe49

2 files changed

Lines changed: 94 additions & 2 deletions

File tree

containers/api-proxy/token-tracker.js

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ function createDecompressor(headers) {
133133
* Extract token usage from a non-streaming JSON response body.
134134
*
135135
* Supports:
136-
* - OpenAI/Copilot: { usage: { prompt_tokens, completion_tokens, total_tokens } }
136+
* - OpenAI/Copilot: { usage: { prompt_tokens, completion_tokens, total_tokens, prompt_tokens_details: { cached_tokens } } }
137137
* - Anthropic: { usage: { input_tokens, output_tokens, cache_creation_input_tokens, cache_read_input_tokens } }
138138
*
139139
* Also extracts the model field if present.
@@ -180,6 +180,11 @@ function extractUsageFromJson(body) {
180180
usage.total_tokens = json.usage.total_tokens;
181181
hasField = true;
182182
}
183+
// OpenAI/Copilot nested cache fields (prompt_tokens_details.cached_tokens)
184+
if (json.usage.prompt_tokens_details && typeof json.usage.prompt_tokens_details.cached_tokens === 'number') {
185+
usage.cache_read_input_tokens = json.usage.prompt_tokens_details.cached_tokens;
186+
hasField = true;
187+
}
183188
if (hasField) {
184189
result.usage = usage;
185190
}
@@ -201,7 +206,7 @@ function extractUsageFromJson(body) {
201206
* - message_delta: { type: "message_delta", usage: { output_tokens } }
202207
*
203208
* OpenAI/Copilot streaming events with usage:
204-
* - Final chunk: { usage: { prompt_tokens, completion_tokens, total_tokens } }
209+
* - Final chunk: { usage: { prompt_tokens, completion_tokens, total_tokens, prompt_tokens_details: { cached_tokens } } }
205210
*
206211
* @param {string} line - A single SSE data line (without "data: " prefix)
207212
* @returns {{ usage: object|null, model: string|null }}
@@ -237,6 +242,10 @@ function extractUsageFromSseLine(line) {
237242
if (typeof json.usage.prompt_tokens === 'number') result.usage.prompt_tokens = json.usage.prompt_tokens;
238243
if (typeof json.usage.completion_tokens === 'number') result.usage.completion_tokens = json.usage.completion_tokens;
239244
if (typeof json.usage.total_tokens === 'number') result.usage.total_tokens = json.usage.total_tokens;
245+
// OpenAI/Copilot nested cache fields (prompt_tokens_details.cached_tokens)
246+
if (json.usage.prompt_tokens_details && typeof json.usage.prompt_tokens_details.cached_tokens === 'number') {
247+
result.usage.cache_read_input_tokens = json.usage.prompt_tokens_details.cached_tokens;
248+
}
240249
return result;
241250
}
242251

containers/api-proxy/token-tracker.test.js

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,50 @@ describe('extractUsageFromJson', () => {
110110
const result = extractUsageFromJson(body);
111111
expect(result.usage).toEqual({ completion_tokens: 50 });
112112
});
113+
114+
test('extracts OpenAI prompt_tokens_details.cached_tokens', () => {
115+
const body = Buffer.from(JSON.stringify({
116+
id: 'chatcmpl-456',
117+
model: 'claude-sonnet-4.6',
118+
usage: {
119+
prompt_tokens: 41344,
120+
completion_tokens: 256,
121+
total_tokens: 41600,
122+
prompt_tokens_details: {
123+
cached_tokens: 36500,
124+
},
125+
},
126+
}));
127+
128+
const result = extractUsageFromJson(body);
129+
expect(result.model).toBe('claude-sonnet-4.6');
130+
expect(result.usage).toEqual({
131+
prompt_tokens: 41344,
132+
completion_tokens: 256,
133+
total_tokens: 41600,
134+
cache_read_input_tokens: 36500,
135+
});
136+
});
137+
138+
test('handles OpenAI usage without prompt_tokens_details', () => {
139+
const body = Buffer.from(JSON.stringify({
140+
model: 'gpt-4o',
141+
usage: {
142+
prompt_tokens: 100,
143+
completion_tokens: 50,
144+
total_tokens: 150,
145+
},
146+
}));
147+
148+
const result = extractUsageFromJson(body);
149+
expect(result.usage).toEqual({
150+
prompt_tokens: 100,
151+
completion_tokens: 50,
152+
total_tokens: 150,
153+
});
154+
// Should NOT have cache_read_input_tokens
155+
expect(result.usage.cache_read_input_tokens).toBeUndefined();
156+
});
113157
});
114158

115159
// ── extractUsageFromSseLine ───────────────────────────────────────────
@@ -187,6 +231,30 @@ describe('extractUsageFromSseLine', () => {
187231
const result = extractUsageFromSseLine('invalid json');
188232
expect(result.usage).toBeNull();
189233
});
234+
235+
test('extracts OpenAI prompt_tokens_details.cached_tokens from streaming final chunk', () => {
236+
const line = JSON.stringify({
237+
model: 'claude-sonnet-4.6',
238+
choices: [{ finish_reason: 'stop' }],
239+
usage: {
240+
prompt_tokens: 43977,
241+
completion_tokens: 24,
242+
total_tokens: 44001,
243+
prompt_tokens_details: {
244+
cached_tokens: 43894,
245+
},
246+
},
247+
});
248+
249+
const result = extractUsageFromSseLine(line);
250+
expect(result.model).toBe('claude-sonnet-4.6');
251+
expect(result.usage).toEqual({
252+
prompt_tokens: 43977,
253+
completion_tokens: 24,
254+
total_tokens: 44001,
255+
cache_read_input_tokens: 43894,
256+
});
257+
});
190258
});
191259

192260
// ── parseSseDataLines ─────────────────────────────────────────────────
@@ -283,6 +351,21 @@ describe('normalizeUsage', () => {
283351
expect(result.input_tokens).toBe(200);
284352
expect(result.output_tokens).toBe(80);
285353
});
354+
355+
test('normalizes OpenAI cache tokens via cache_read_input_tokens mapping', () => {
356+
const result = normalizeUsage({
357+
prompt_tokens: 43977,
358+
completion_tokens: 24,
359+
total_tokens: 44001,
360+
cache_read_input_tokens: 43894,
361+
});
362+
expect(result).toEqual({
363+
input_tokens: 43977,
364+
output_tokens: 24,
365+
cache_read_tokens: 43894,
366+
cache_write_tokens: 0,
367+
});
368+
});
286369
});
287370

288371
// ── isStreamingResponse ───────────────────────────────────────────────

0 commit comments

Comments
 (0)