diff --git a/src/providers/google-vertex-ai/chatComplete.ts b/src/providers/google-vertex-ai/chatComplete.ts index 2b412a497..6b69825c4 100644 --- a/src/providers/google-vertex-ai/chatComplete.ts +++ b/src/providers/google-vertex-ai/chatComplete.ts @@ -434,6 +434,7 @@ export const GoogleChatCompleteResponseTransform: ( promptTokenCount = 0, candidatesTokenCount = 0, totalTokenCount = 0, + thoughtsTokenCount = 0, } = response.usageMetadata; return { @@ -499,6 +500,9 @@ export const GoogleChatCompleteResponseTransform: ( prompt_tokens: promptTokenCount, completion_tokens: candidatesTokenCount, total_tokens: totalTokenCount, + completion_tokens_details: { + reasoning_tokens: thoughtsTokenCount, + }, }, }; } @@ -589,6 +593,9 @@ export const GoogleChatCompleteStreamChunkTransform: ( prompt_tokens: parsedChunk.usageMetadata.promptTokenCount, completion_tokens: parsedChunk.usageMetadata.candidatesTokenCount, total_tokens: parsedChunk.usageMetadata.totalTokenCount, + completion_tokens_details: { + reasoning_tokens: parsedChunk.usageMetadata.thoughtsTokenCount ?? 0, + }, }; } diff --git a/src/providers/google-vertex-ai/transformGenerationConfig.ts b/src/providers/google-vertex-ai/transformGenerationConfig.ts index 7d15142a1..0abf56abb 100644 --- a/src/providers/google-vertex-ai/transformGenerationConfig.ts +++ b/src/providers/google-vertex-ai/transformGenerationConfig.ts @@ -52,7 +52,8 @@ export function transformGenerationConfig(params: Params) { if (params?.thinking) { const thinkingConfig: Record = {}; - thinkingConfig['include_thoughts'] = true; + thinkingConfig['include_thoughts'] = + params.thinking.type && params.thinking.type === 'enabled' ? true : false; thinkingConfig['thinking_budget'] = params.thinking.budget_tokens; generationConfig['thinking_config'] = thinkingConfig; } diff --git a/src/providers/google-vertex-ai/types.ts b/src/providers/google-vertex-ai/types.ts index c0b6d6345..b8ec4fd43 100644 --- a/src/providers/google-vertex-ai/types.ts +++ b/src/providers/google-vertex-ai/types.ts @@ -65,6 +65,7 @@ export interface GoogleGenerateContentResponse { promptTokenCount: number; candidatesTokenCount: number; totalTokenCount: number; + thoughtsTokenCount?: number; }; } diff --git a/src/providers/google/chatComplete.ts b/src/providers/google/chatComplete.ts index 1e2b67f8b..fad7f86d7 100644 --- a/src/providers/google/chatComplete.ts +++ b/src/providers/google/chatComplete.ts @@ -73,7 +73,8 @@ const transformGenerationConfig = (params: Params) => { } if (params?.thinking) { const thinkingConfig: Record = {}; - thinkingConfig['include_thoughts'] = true; + thinkingConfig['include_thoughts'] = + params.thinking.type && params.thinking.type === 'enabled' ? true : false; thinkingConfig['thinking_budget'] = params.thinking.budget_tokens; generationConfig['thinking_config'] = thinkingConfig; } @@ -479,6 +480,7 @@ interface GoogleGenerateContentResponse { promptTokenCount: number; candidatesTokenCount: number; totalTokenCount: number; + thoughtsTokenCount: number; }; } @@ -579,6 +581,9 @@ export const GoogleChatCompleteResponseTransform: ( prompt_tokens: response.usageMetadata.promptTokenCount, completion_tokens: response.usageMetadata.candidatesTokenCount, total_tokens: response.usageMetadata.totalTokenCount, + completion_tokens_details: { + reasoning_tokens: response.usageMetadata.thoughtsTokenCount ?? 0, + }, }, }; } @@ -624,6 +629,9 @@ export const GoogleChatCompleteStreamChunkTransform: ( prompt_tokens: parsedChunk.usageMetadata.promptTokenCount, completion_tokens: parsedChunk.usageMetadata.candidatesTokenCount, total_tokens: parsedChunk.usageMetadata.totalTokenCount, + completion_tokens_details: { + reasoning_tokens: parsedChunk.usageMetadata.thoughtsTokenCount ?? 0, + }, }; }