fix: extract Anthropic cache_creation_input_tokens without TTL breakdown

colegottdank · claude · colegottdank · commit 1a47934b3829 · 2026-02-24T10:29:44.000-08:00
The AnthropicUsageProcessor was only extracting cache write tokens from the
TTL breakdown (ephemeral_5m_input_tokens, ephemeral_1h_input_tokens) but
completely ignoring the overall cache_creation_input_tokens field.

When Anthropic's API returns cache_creation_input_tokens without the TTL
breakdown (older API versions, certain response types), the cache write
tokens were lost, causing:
- Incorrect cost calculations (undercharging)
- Missing cache write data in the dashboard
- "cache read without cache creation" display issues

Fixes:
1. AnthropicUsageProcessor now extracts cache_creation_input_tokens and uses
   it as fallback when TTL breakdown is not available
2. Fixed toOpenai.ts non-streaming transformation that incorrectly fell back
   to cachedTokens (read) instead of cacheWriteTokens (write)

Co-Authored-By: Claude Opus 4.5 &lt;noreply@anthropic.com&gt;
diff --git a/packages/__tests__/cost/usageProcessor.test.ts b/packages/__tests__/cost/usageProcessor.test.ts
@@ -298,6 +298,81 @@ describe("AnthropicUsageProcessor", () => {
     });
   });
 
+  it("should extract cache_creation_input_tokens without TTL breakdown", async () => {
+    // Test case for older API responses that have cache_creation_input_tokens
+    // but no cache_creation.ephemeral_5m_input_tokens breakdown
+    const mockResponse = {
+      id: "msg_test",
+      type: "message",
+      role: "assistant",
+      content: [{ type: "text", text: "Hello" }],
+      model: "claude-sonnet-4",
+      stop_reason: "end_turn",
+      usage: {
+        input_tokens: 100,
+        output_tokens: 50,
+        cache_creation_input_tokens: 500,
+        cache_read_input_tokens: 200,
+        // Note: no cache_creation.ephemeral_5m_input_tokens
+      },
+    };
+
+    const result = await processor.parse({
+      responseBody: JSON.stringify(mockResponse),
+      isStream: false,
+      model: "claude-sonnet-4",
+    });
+
+    expect(result.error).toBeNull();
+    expect(result.data).toEqual({
+      input: 100,
+      output: 50,
+      cacheDetails: {
+        cachedInput: 200,
+        write5m: 500, // Should fall back to cache_creation_input_tokens
+      },
+    });
+  });
+
+  it("should prefer TTL breakdown over total cache_creation_input_tokens", async () => {
+    // When both total and breakdown are provided, use the breakdown
+    const mockResponse = {
+      id: "msg_test",
+      type: "message",
+      role: "assistant",
+      content: [{ type: "text", text: "Hello" }],
+      model: "claude-sonnet-4",
+      stop_reason: "end_turn",
+      usage: {
+        input_tokens: 100,
+        output_tokens: 50,
+        cache_creation_input_tokens: 600, // Total (should be 5m + 1h)
+        cache_read_input_tokens: 200,
+        cache_creation: {
+          ephemeral_5m_input_tokens: 400,
+          ephemeral_1h_input_tokens: 200,
+        },
+      },
+    };
+
+    const result = await processor.parse({
+      responseBody: JSON.stringify(mockResponse),
+      isStream: false,
+      model: "claude-sonnet-4",
+    });
+
+    expect(result.error).toBeNull();
+    expect(result.data).toEqual({
+      input: 100,
+      output: 50,
+      cacheDetails: {
+        cachedInput: 200,
+        write5m: 400, // Use breakdown, not total
+        write1h: 200,
+      },
+    });
+  });
+
   it("usage processing snapshot", async () => {
     const testCases = [
       {
diff --git a/packages/cost/usage/anthropicUsageProcessor.ts b/packages/cost/usage/anthropicUsageProcessor.ts
@@ -104,6 +104,10 @@ export class AnthropicUsageProcessor implements IUsageProcessor {
       const outputTokens = usage.output_tokens ?? 0;
       const cacheReadInputTokens = usage.cache_read_input_tokens ?? 0;
 
+      // Total cache creation tokens (always present when caching occurs)
+      const cacheCreationInputTokens = usage.cache_creation_input_tokens ?? 0;
+
+      // TTL breakdown (may not be present in all API versions/responses)
       const cacheCreation = usage.cache_creation || {};
       const ephemeral5mTokens = cacheCreation.ephemeral_5m_input_tokens ?? 0;
       const ephemeral1hTokens = cacheCreation.ephemeral_1h_input_tokens ?? 0;
@@ -119,13 +123,21 @@ export class AnthropicUsageProcessor implements IUsageProcessor {
 
       if (
         cacheReadInputTokens > 0 ||
+        cacheCreationInputTokens > 0 ||
         ephemeral5mTokens > 0 ||
         ephemeral1hTokens > 0
       ) {
         modelUsage.cacheDetails = { cachedInput: cacheReadInputTokens };
 
+        // Use TTL breakdown if available, otherwise fall back to total cache creation tokens
+        // This handles cases where cache_creation_input_tokens is set but TTL breakdown is not
+        const ttlBreakdownTotal = ephemeral5mTokens + ephemeral1hTokens;
+
         if (ephemeral5mTokens > 0) {
           modelUsage.cacheDetails.write5m = ephemeral5mTokens;
+        } else if (cacheCreationInputTokens > 0 && ttlBreakdownTotal === 0) {
+          // No TTL breakdown provided, use total cache creation tokens as 5m (default TTL)
+          modelUsage.cacheDetails.write5m = cacheCreationInputTokens;
         }
 
         if (ephemeral1hTokens > 0) {
diff --git a/packages/llm-mapper/transform/providers/anthropic/response/toOpenai.ts b/packages/llm-mapper/transform/providers/anthropic/response/toOpenai.ts
@@ -69,7 +69,7 @@ export function toOpenAI(response: AnthropicResponseBody): OpenAIResponseBody {
             cache_write_details: {
               write_5m_tokens:
                 anthropicUsage.cache_creation?.ephemeral_5m_input_tokens ??
-                cachedTokens ??
+                cacheWriteTokens ??
                 0,
               write_1h_tokens:
                 anthropicUsage.cache_creation?.ephemeral_1h_input_tokens ?? 0,