Skip to content

Commit 8882351

Browse files
authored
feat: Store and display reasoning tokens separately (#5429)
* feat: Store and display reasoning tokens separately * rename migration file * change row to be non nullable
1 parent 333f379 commit 8882351

File tree

28 files changed

+430
-10
lines changed

28 files changed

+430
-10
lines changed

bifrost/lib/clients/jawnTypes/private.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2042,6 +2042,8 @@ Json: JsonObject;
20422042
/** Format: double */
20432043
completion_tokens: number | null;
20442044
/** Format: double */
2045+
reasoning_tokens: number | null;
2046+
/** Format: double */
20452047
prompt_audio_tokens: number | null;
20462048
/** Format: double */
20472049
completion_audio_tokens: number | null;

bifrost/lib/clients/jawnTypes/public.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2117,6 +2117,8 @@ Json: JsonObject;
21172117
/** Format: double */
21182118
completion_tokens: number | null;
21192119
/** Format: double */
2120+
reasoning_tokens: number | null;
2121+
/** Format: double */
21202122
prompt_audio_tokens: number | null;
21212123
/** Format: double */
21222124
completion_audio_tokens: number | null;
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
ALTER TABLE request_response_rmt
2+
ADD COLUMN reasoning_tokens Int64 DEFAULT 0;

docs/swagger.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5189,6 +5189,11 @@
51895189
"format": "double",
51905190
"nullable": true
51915191
},
5192+
"reasoning_tokens": {
5193+
"type": "number",
5194+
"format": "double",
5195+
"nullable": true
5196+
},
51925197
"prompt_audio_tokens": {
51935198
"type": "number",
51945199
"format": "double",
@@ -5327,6 +5332,7 @@
53275332
"prompt_cache_write_tokens",
53285333
"prompt_cache_read_tokens",
53295334
"completion_tokens",
5335+
"reasoning_tokens",
53305336
"prompt_audio_tokens",
53315337
"completion_audio_tokens",
53325338
"cost",

helicone-mcp/src/types/public.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2117,6 +2117,8 @@ Json: JsonObject;
21172117
/** Format: double */
21182118
completion_tokens: number | null;
21192119
/** Format: double */
2120+
reasoning_tokens: number | null;
2121+
/** Format: double */
21202122
prompt_audio_tokens: number | null;
21212123
/** Format: double */
21222124
completion_audio_tokens: number | null;

packages/llm-mapper/types.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,7 @@ type HeliconeMetadata = {
244244
totalTokens: number | null;
245245
promptTokens: number | null;
246246
completionTokens: number | null;
247+
reasoningTokens: number | null;
247248
promptCacheWriteTokens: number | null;
248249
promptCacheReadTokens: number | null;
249250
latency: number | null;
@@ -366,6 +367,7 @@ export interface HeliconeRequest {
366367
prompt_cache_write_tokens: number | null;
367368
prompt_cache_read_tokens: number | null;
368369
completion_tokens: number | null;
370+
reasoning_tokens: number | null;
369371
prompt_audio_tokens: number | null;
370372
completion_audio_tokens: number | null;
371373
cost: number | null;

packages/llm-mapper/utils/getMappedContent.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ const metaDataFromHeliconeRequest = (
8080
createdAt: heliconeRequest.request_created_at,
8181
path: heliconeRequest.request_path,
8282
completionTokens: heliconeRequest.completion_tokens,
83+
reasoningTokens: heliconeRequest.reasoning_tokens,
8384
promptCacheWriteTokens: heliconeRequest.prompt_cache_write_tokens,
8485
promptCacheReadTokens: heliconeRequest.prompt_cache_read_tokens,
8586
promptTokens: heliconeRequest.prompt_tokens,

valhalla/jawn/src/controllers/public/__tests__/requestController.test.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ function makeHeliconeRequest(overrides: Partial<HeliconeRequest>): HeliconeReque
6060
prompt_cache_write_tokens: 0,
6161
prompt_cache_read_tokens: 0,
6262
completion_tokens: 0,
63+
reasoning_tokens: 0,
6364
prompt_audio_tokens: 0,
6465
completion_audio_tokens: 0,
6566
cost: 0,

valhalla/jawn/src/lib/db/ClickhouseWrapper.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -307,6 +307,7 @@ export interface RequestResponseRMT {
307307
prompt_cache_read_tokens: number;
308308
prompt_audio_tokens: number;
309309
completion_audio_tokens: number;
310+
reasoning_tokens: number;
310311
model: string;
311312
ai_gateway_body_mapping: string;
312313
request_id: string;

valhalla/jawn/src/lib/handlers/HandlerContext.ts

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,7 @@ export const toHeliconeRequest = (context: HandlerContext): HeliconeRequest => {
194194
);
195195
const promptAudioTokens = getPromptAudioTokens(modelUsage, legacyUsage);
196196
const completionAudioTokens = legacyUsage.completionAudioTokens ?? null;
197+
const reasoningTokens = getReasoningTokens(modelUsage);
197198

198199
return {
199200
cost: context.costBreakdown?.totalCost ?? legacyUsage.cost ?? null,
@@ -230,6 +231,7 @@ export const toHeliconeRequest = (context: HandlerContext): HeliconeRequest => {
230231
prompt_cache_read_tokens: isCacheHit ? 0 : promptCacheReadTokens,
231232
prompt_audio_tokens: isCacheHit ? 0 : promptAudioTokens,
232233
completion_audio_tokens: isCacheHit ? 0 : completionAudioTokens,
234+
reasoning_tokens: isCacheHit ? 0 : reasoningTokens,
233235

234236
/// NOTE: Unfortunately our codebase is running two prompts systems in parallel.
235237
// This used to track the legacy feature, but its now the new one.
@@ -286,9 +288,9 @@ export function getCompletionTokens(
286288
(modelUsage.image?.output ?? 0) +
287289
(modelUsage.video?.output ?? 0) +
288290
(modelUsage.file?.output ?? 0);
289-
const thinking = modelUsage.thinking ?? 0;
290-
if (modelUsage.output > 0 || modalityOutput > 0 || thinking > 0) {
291-
return modelUsage.output + modalityOutput + thinking;
291+
// Note: reasoning/thinking tokens are now tracked separately and NOT included in completion tokens
292+
if (modelUsage.output > 0 || modalityOutput > 0) {
293+
return modelUsage.output + modalityOutput;
292294
}
293295
}
294296
return legacyUsage.completionTokens ?? null;
@@ -300,9 +302,16 @@ function getTotalTokens(
300302
): number | null {
301303
const promptTokens = getPromptTokens(modelUsage, legacyUsage);
302304
const completionTokens = getCompletionTokens(modelUsage, legacyUsage);
305+
const reasoningTokens = getReasoningTokens(modelUsage);
303306

304-
if (promptTokens !== null || completionTokens !== null) {
305-
return (promptTokens ?? 0) + (completionTokens ?? 0);
307+
if (
308+
promptTokens !== null ||
309+
completionTokens !== null ||
310+
reasoningTokens !== null
311+
) {
312+
return (
313+
(promptTokens ?? 0) + (completionTokens ?? 0) + (reasoningTokens ?? 0)
314+
);
306315
}
307316
return legacyUsage.totalTokens ?? null;
308317
}
@@ -357,3 +366,12 @@ export function getCompletionAudioTokens(
357366
}
358367
return legacyUsage.completionAudioTokens ?? null;
359368
}
369+
370+
export function getReasoningTokens(
371+
modelUsage: ModelUsage | undefined
372+
): number | null {
373+
if (modelUsage?.thinking !== undefined && modelUsage.thinking > 0) {
374+
return modelUsage.thinking;
375+
}
376+
return null;
377+
}

0 commit comments

Comments
 (0)