Skip to content

Commit b198d26

Browse files
colegottdankclaude
andcommitted
fix: extract thoughtsTokenCount for Gemini thinking models
- Add reasoning_tokens extraction from Gemini usageMetadata.thoughtsTokenCount in worker's getDetailedUsage() - Add reasoningTokens to GoogleBodyProcessor output in Jawn - Fix promptTokens calculation when no caching is present (was returning undefined instead of raw promptTokenCount) This fixes incorrect token counting and cost calculations for Gemini thinking models (2.5 Flash, 3.x Pro) where thoughtsTokenCount was not being stored separately. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent d7d8b4b commit b198d26

File tree

3 files changed

+65
-5
lines changed

3 files changed

+65
-5
lines changed

valhalla/jawn/src/lib/shared/bodyProcessors/__tests__/googleBodyProcessor.test.ts

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,4 +118,58 @@ describe("GoogleBodyProcessor", () => {
118118
promptCacheReadTokens: 13644,
119119
});
120120
});
121+
122+
it("handles Gemini thinking model responses with thoughtsTokenCount", async () => {
123+
const body = {
124+
candidates: [
125+
{
126+
content: {
127+
role: "model",
128+
parts: [
129+
{
130+
text: "25 * 47 = 1175",
131+
},
132+
],
133+
},
134+
finishReason: "STOP",
135+
avgLogprobs: -0.24105726576781206,
136+
},
137+
],
138+
usageMetadata: {
139+
promptTokenCount: 15,
140+
candidatesTokenCount: 359,
141+
totalTokenCount: 1035,
142+
thoughtsTokenCount: 661,
143+
trafficType: "ON_DEMAND",
144+
promptTokensDetails: [
145+
{
146+
modality: "TEXT",
147+
tokenCount: 15,
148+
},
149+
],
150+
candidatesTokensDetails: [
151+
{
152+
modality: "TEXT",
153+
tokenCount: 359,
154+
},
155+
],
156+
},
157+
modelVersion: "gemini-2.5-flash",
158+
createTime: "2026-02-24T06:59:17.723195Z",
159+
responseId: "RUydafuRLIuolu8P8YbxgAg",
160+
};
161+
162+
const { usage } = await parse(body);
163+
164+
expect(usage).toEqual({
165+
totalTokens: 1035,
166+
promptTokens: 15,
167+
// thoughts + candidates (661 + 359)
168+
completionTokens: 1020,
169+
// reasoningTokens should be extracted separately
170+
reasoningTokens: 661,
171+
heliconeCalculated: false,
172+
promptCacheReadTokens: undefined,
173+
});
174+
});
121175
});

valhalla/jawn/src/lib/shared/bodyProcessors/googleBodyProcessor.ts

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -70,19 +70,23 @@ export class GoogleBodyProcessor implements IBodyProcessor {
7070
?.promptTokenCount;
7171
const cachedContentTokens = usageMetadataItem?.usageMetadata
7272
?.cachedContentTokenCount;
73+
// If there's no caching, return raw promptTokenCount
74+
// If there's caching, subtract cached tokens from prompt tokens
7375
const adjustedPromptTokens =
74-
promptTokens !== undefined && cachedContentTokens !== undefined
75-
? promptTokens - cachedContentTokens
76+
promptTokens !== undefined
77+
? promptTokens - (cachedContentTokens ?? 0)
7678
: undefined;
7779

80+
const thoughtsTokenCount = usageMetadataItem?.usageMetadata?.thoughtsTokenCount ?? 0;
81+
const candidatesTokenCount = usageMetadataItem?.usageMetadata?.candidatesTokenCount ?? 0;
82+
7883
return ok({
7984
processedBody: parsedResponseBody,
8085
usage: {
8186
totalTokens: usageMetadataItem?.usageMetadata?.totalTokenCount,
8287
promptTokens: adjustedPromptTokens,
83-
completionTokens:
84-
(usageMetadataItem?.usageMetadata?.thoughtsTokenCount ?? 0) +
85-
(usageMetadataItem?.usageMetadata?.candidatesTokenCount ?? 0),
88+
completionTokens: thoughtsTokenCount + candidatesTokenCount,
89+
reasoningTokens: thoughtsTokenCount > 0 ? thoughtsTokenCount : undefined,
8690
heliconeCalculated: false,
8791
promptCacheReadTokens: cachedContentTokens,
8892
},

worker/src/lib/dbLogger/DBLoggable.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -532,6 +532,7 @@ export class DBLoggable {
532532
promptTokenCount?: number;
533533
candidatesTokenCount?: number;
534534
cachedContentTokenCount?: number;
535+
thoughtsTokenCount?: number;
535536
};
536537
};
537538
const usageMetadata = response.usageMetadata;
@@ -540,6 +541,7 @@ export class DBLoggable {
540541
prompt_tokens: usageMetadata?.promptTokenCount,
541542
completion_tokens: usageMetadata?.candidatesTokenCount,
542543
prompt_cache_read_tokens: usageMetadata?.cachedContentTokenCount,
544+
reasoning_tokens: usageMetadata?.thoughtsTokenCount,
543545
};
544546
}
545547

0 commit comments

Comments
 (0)