Skip to content

Commit 1a47934

Browse files
colegottdankclaude
andcommitted
fix: extract Anthropic cache_creation_input_tokens without TTL breakdown
The AnthropicUsageProcessor was only extracting cache write tokens from the TTL breakdown (ephemeral_5m_input_tokens, ephemeral_1h_input_tokens) but completely ignoring the overall cache_creation_input_tokens field. When Anthropic's API returns cache_creation_input_tokens without the TTL breakdown (older API versions, certain response types), the cache write tokens were lost, causing: - Incorrect cost calculations (undercharging) - Missing cache write data in the dashboard - "cache read without cache creation" display issues Fixes: 1. AnthropicUsageProcessor now extracts cache_creation_input_tokens and uses it as fallback when TTL breakdown is not available 2. Fixed toOpenai.ts non-streaming transformation that incorrectly fell back to cachedTokens (read) instead of cacheWriteTokens (write) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent d7d8b4b commit 1a47934

File tree

3 files changed

+88
-1
lines changed

3 files changed

+88
-1
lines changed

packages/__tests__/cost/usageProcessor.test.ts

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,81 @@ describe("AnthropicUsageProcessor", () => {
298298
});
299299
});
300300

301+
it("should extract cache_creation_input_tokens without TTL breakdown", async () => {
302+
// Test case for older API responses that have cache_creation_input_tokens
303+
// but no cache_creation.ephemeral_5m_input_tokens breakdown
304+
const mockResponse = {
305+
id: "msg_test",
306+
type: "message",
307+
role: "assistant",
308+
content: [{ type: "text", text: "Hello" }],
309+
model: "claude-sonnet-4",
310+
stop_reason: "end_turn",
311+
usage: {
312+
input_tokens: 100,
313+
output_tokens: 50,
314+
cache_creation_input_tokens: 500,
315+
cache_read_input_tokens: 200,
316+
// Note: no cache_creation.ephemeral_5m_input_tokens
317+
},
318+
};
319+
320+
const result = await processor.parse({
321+
responseBody: JSON.stringify(mockResponse),
322+
isStream: false,
323+
model: "claude-sonnet-4",
324+
});
325+
326+
expect(result.error).toBeNull();
327+
expect(result.data).toEqual({
328+
input: 100,
329+
output: 50,
330+
cacheDetails: {
331+
cachedInput: 200,
332+
write5m: 500, // Should fall back to cache_creation_input_tokens
333+
},
334+
});
335+
});
336+
337+
it("should prefer TTL breakdown over total cache_creation_input_tokens", async () => {
338+
// When both total and breakdown are provided, use the breakdown
339+
const mockResponse = {
340+
id: "msg_test",
341+
type: "message",
342+
role: "assistant",
343+
content: [{ type: "text", text: "Hello" }],
344+
model: "claude-sonnet-4",
345+
stop_reason: "end_turn",
346+
usage: {
347+
input_tokens: 100,
348+
output_tokens: 50,
349+
cache_creation_input_tokens: 600, // Total (should be 5m + 1h)
350+
cache_read_input_tokens: 200,
351+
cache_creation: {
352+
ephemeral_5m_input_tokens: 400,
353+
ephemeral_1h_input_tokens: 200,
354+
},
355+
},
356+
};
357+
358+
const result = await processor.parse({
359+
responseBody: JSON.stringify(mockResponse),
360+
isStream: false,
361+
model: "claude-sonnet-4",
362+
});
363+
364+
expect(result.error).toBeNull();
365+
expect(result.data).toEqual({
366+
input: 100,
367+
output: 50,
368+
cacheDetails: {
369+
cachedInput: 200,
370+
write5m: 400, // Use breakdown, not total
371+
write1h: 200,
372+
},
373+
});
374+
});
375+
301376
it("usage processing snapshot", async () => {
302377
const testCases = [
303378
{

packages/cost/usage/anthropicUsageProcessor.ts

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,10 @@ export class AnthropicUsageProcessor implements IUsageProcessor {
104104
const outputTokens = usage.output_tokens ?? 0;
105105
const cacheReadInputTokens = usage.cache_read_input_tokens ?? 0;
106106

107+
// Total cache creation tokens (always present when caching occurs)
108+
const cacheCreationInputTokens = usage.cache_creation_input_tokens ?? 0;
109+
110+
// TTL breakdown (may not be present in all API versions/responses)
107111
const cacheCreation = usage.cache_creation || {};
108112
const ephemeral5mTokens = cacheCreation.ephemeral_5m_input_tokens ?? 0;
109113
const ephemeral1hTokens = cacheCreation.ephemeral_1h_input_tokens ?? 0;
@@ -119,13 +123,21 @@ export class AnthropicUsageProcessor implements IUsageProcessor {
119123

120124
if (
121125
cacheReadInputTokens > 0 ||
126+
cacheCreationInputTokens > 0 ||
122127
ephemeral5mTokens > 0 ||
123128
ephemeral1hTokens > 0
124129
) {
125130
modelUsage.cacheDetails = { cachedInput: cacheReadInputTokens };
126131

132+
// Use TTL breakdown if available, otherwise fall back to total cache creation tokens
133+
// This handles cases where cache_creation_input_tokens is set but TTL breakdown is not
134+
const ttlBreakdownTotal = ephemeral5mTokens + ephemeral1hTokens;
135+
127136
if (ephemeral5mTokens > 0) {
128137
modelUsage.cacheDetails.write5m = ephemeral5mTokens;
138+
} else if (cacheCreationInputTokens > 0 && ttlBreakdownTotal === 0) {
139+
// No TTL breakdown provided, use total cache creation tokens as 5m (default TTL)
140+
modelUsage.cacheDetails.write5m = cacheCreationInputTokens;
129141
}
130142

131143
if (ephemeral1hTokens > 0) {

packages/llm-mapper/transform/providers/anthropic/response/toOpenai.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ export function toOpenAI(response: AnthropicResponseBody): OpenAIResponseBody {
6969
cache_write_details: {
7070
write_5m_tokens:
7171
anthropicUsage.cache_creation?.ephemeral_5m_input_tokens ??
72-
cachedTokens ??
72+
cacheWriteTokens ??
7373
0,
7474
write_1h_tokens:
7575
anthropicUsage.cache_creation?.ephemeral_1h_input_tokens ?? 0,

0 commit comments

Comments
 (0)