feat: cached + reasoning tokens (#420)

k11kirky · web-flow · commit 86ca590dc55a · 2025-03-10T23:30:31.000Z
* feat: cached tokens

* fix: prettier + logs

* fix: vercel types

* fix preittier
diff --git a/posthog-ai/CHANGELOG.md b/posthog-ai/CHANGELOG.md
@@ -1,3 +1,9 @@
+# 3.3.0 - 2025-03-08
+
+- feat: add reasoning and cache tokens to openai and anthropic
+- feat: add tool support for vercel
+- feat: add support for other media types vercel
+
 # 3.2.1 - 2025-02-11
 
 - fix: add experimental_wrapLanguageModel to vercel middleware supporting older versions of ai
diff --git a/posthog-ai/package.json b/posthog-ai/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@posthog/ai",
-  "version": "3.2.1",
+  "version": "3.3.0",
   "description": "PostHog Node.js AI integrations",
   "repository": {
     "type": "git",
diff --git a/posthog-ai/src/anthropic/index.ts b/posthog-ai/src/anthropic/index.ts
@@ -70,9 +70,16 @@ export class WrappedMessages extends AnthropicOriginal.Messages {
     if (anthropicParams.stream) {
       return parentPromise.then((value) => {
         let accumulatedContent = ''
-        const usage: { inputTokens: number; outputTokens: number } = {
+        const usage: {
+          inputTokens: number
+          outputTokens: number
+          cacheCreationInputTokens?: number
+          cacheReadInputTokens?: number
+        } = {
           inputTokens: 0,
           outputTokens: 0,
+          cacheCreationInputTokens: 0,
+          cacheReadInputTokens: 0,
         }
         if ('tee' in value) {
           const [stream1, stream2] = value.tee()
@@ -87,6 +94,8 @@ export class WrappedMessages extends AnthropicOriginal.Messages {
                 }
                 if (chunk.type == 'message_start') {
                   usage.inputTokens = chunk.message.usage.input_tokens ?? 0
+                  usage.cacheCreationInputTokens = chunk.message.usage.cache_creation_input_tokens ?? 0
+                  usage.cacheReadInputTokens = chunk.message.usage.cache_read_input_tokens ?? 0
                 }
                 if ('usage' in chunk) {
                   usage.outputTokens = chunk.usage.output_tokens ?? 0
@@ -156,6 +165,8 @@ export class WrappedMessages extends AnthropicOriginal.Messages {
               usage: {
                 inputTokens: result.usage.input_tokens ?? 0,
                 outputTokens: result.usage.output_tokens ?? 0,
+                cacheCreationInputTokens: result.usage.cache_creation_input_tokens ?? 0,
+                cacheReadInputTokens: result.usage.cache_read_input_tokens ?? 0,
               },
             })
           }
diff --git a/posthog-ai/src/langchain/callbacks.ts b/posthog-ai/src/langchain/callbacks.ts
@@ -28,6 +28,8 @@ interface GenerationMetadata extends SpanMetadata {
   modelParams?: Record<string, any>
   /** The base URL—for example, the API base used */
   baseUrl?: string
+  /** The tools used in the generation */
+  tools?: Record<string, any>
 }
 
 /** A run may either be a Span or a Generation */
@@ -420,6 +422,10 @@ export class LangChainCallbackHandler extends BaseCallbackHandler {
       $ai_base_url: run.baseUrl,
     }
 
+    if (run.tools) {
+      eventProperties['$ai_tools'] = withPrivacyMode(this.client, this.privacyMode, run.tools)
+    }
+
     if (output instanceof Error) {
       eventProperties['$ai_http_status'] = (output as any).status || 500
       eventProperties['$ai_error'] = output.toString()
diff --git a/posthog-ai/src/openai/azure.ts b/posthog-ai/src/openai/azure.ts
@@ -86,7 +86,12 @@ export class WrappedCompletions extends AzureOpenAI.Chat.Completions {
     if (openAIParams.stream) {
       return parentPromise.then((value) => {
         let accumulatedContent = ''
-        let usage: { inputTokens: number; outputTokens: number } = {
+        let usage: {
+          inputTokens: number
+          outputTokens: number
+          reasoningTokens?: number
+          cacheReadInputTokens?: number
+        } = {
           inputTokens: 0,
           outputTokens: 0,
         }
@@ -105,6 +110,8 @@ export class WrappedCompletions extends AzureOpenAI.Chat.Completions {
                   usage = {
                     inputTokens: chunk.usage.prompt_tokens ?? 0,
                     outputTokens: chunk.usage.completion_tokens ?? 0,
+                    reasoningTokens: chunk.usage.completion_tokens_details?.reasoning_tokens ?? 0,
+                    cacheReadInputTokens: chunk.usage.prompt_tokens_details?.cached_tokens ?? 0,
                   }
                 }
               }
@@ -176,6 +183,8 @@ export class WrappedCompletions extends AzureOpenAI.Chat.Completions {
               usage: {
                 inputTokens: result.usage?.prompt_tokens ?? 0,
                 outputTokens: result.usage?.completion_tokens ?? 0,
+                reasoningTokens: result.usage?.completion_tokens_details?.reasoning_tokens ?? 0,
+                cacheReadInputTokens: result.usage?.prompt_tokens_details?.cached_tokens ?? 0,
               },
             })
           }
diff --git a/posthog-ai/src/openai/index.ts b/posthog-ai/src/openai/index.ts
@@ -88,11 +88,18 @@ export class WrappedCompletions extends OpenAIOrignal.Chat.Completions {
       return parentPromise.then((value) => {
         if ('tee' in value) {
           const [stream1, stream2] = value.tee()
-          // Use one stream for tracking
           ;(async () => {
             try {
               let accumulatedContent = ''
-              let usage = { inputTokens: 0, outputTokens: 0 }
+              let usage: {
+                inputTokens?: number
+                outputTokens?: number
+                reasoningTokens?: number
+                cacheReadInputTokens?: number
+              } = {
+                inputTokens: 0,
+                outputTokens: 0,
+              }
 
               for await (const chunk of stream1) {
                 const delta = chunk?.choices?.[0]?.delta?.content ?? ''
@@ -101,6 +108,8 @@ export class WrappedCompletions extends OpenAIOrignal.Chat.Completions {
                   usage = {
                     inputTokens: chunk.usage.prompt_tokens ?? 0,
                     outputTokens: chunk.usage.completion_tokens ?? 0,
+                    reasoningTokens: chunk.usage.completion_tokens_details?.reasoning_tokens ?? 0,
+                    cacheReadInputTokens: chunk.usage.prompt_tokens_details?.cached_tokens ?? 0,
                   }
                 }
               }
@@ -165,6 +174,8 @@ export class WrappedCompletions extends OpenAIOrignal.Chat.Completions {
               usage: {
                 inputTokens: result.usage?.prompt_tokens ?? 0,
                 outputTokens: result.usage?.completion_tokens ?? 0,
+                reasoningTokens: result.usage?.completion_tokens_details?.reasoning_tokens ?? 0,
+                cacheReadInputTokens: result.usage?.prompt_tokens_details?.cached_tokens ?? 0,
               },
             })
           }
diff --git a/posthog-ai/src/utils.ts b/posthog-ai/src/utils.ts
@@ -118,10 +118,17 @@ export type SendEventToPosthogParams = {
   latency: number
   baseURL: string
   httpStatus: number
-  usage?: { inputTokens?: number; outputTokens?: number }
+  usage?: {
+    inputTokens?: number
+    outputTokens?: number
+    reasoningTokens?: any
+    cacheReadInputTokens?: any
+    cacheCreationInputTokens?: any
+  }
   params: (ChatCompletionCreateParamsBase | MessageCreateParams) & MonitoringParams
   isError?: boolean
   error?: string
+  tools?: any
 }
 
 export const sendEventToPosthog = ({
@@ -139,6 +146,7 @@ export const sendEventToPosthog = ({
   usage = {},
   isError = false,
   error,
+  tools,
 }: SendEventToPosthogParams): void => {
   if (client.capture) {
     let errorData = {}
@@ -159,6 +167,12 @@ export const sendEventToPosthog = ({
       }
     }
 
+    let additionalTokenValues = {
+      ...(usage.reasoningTokens ? { $ai_reasoning_tokens: usage.reasoningTokens } : {}),
+      ...(usage.cacheReadInputTokens ? { $ai_cache_read_input_tokens: usage.cacheReadInputTokens } : {}),
+      ...(usage.cacheCreationInputTokens ? { $ai_cache_creation_input_tokens: usage.cacheCreationInputTokens } : {}),
+    }
+
     client.capture({
       distinctId: distinctId ?? traceId,
       event: '$ai_generation',
@@ -171,11 +185,13 @@ export const sendEventToPosthog = ({
         $ai_http_status: httpStatus,
         $ai_input_tokens: usage.inputTokens ?? 0,
         $ai_output_tokens: usage.outputTokens ?? 0,
+        ...additionalTokenValues,
         $ai_latency: latency,
         $ai_trace_id: traceId,
         $ai_base_url: baseURL,
         ...params.posthogProperties,
         ...(distinctId ? {} : { $process_person_profile: false }),
+        ...(tools ? { $ai_tools: tools } : {}),
         ...errorData,
         ...costOverrideData,
       },
diff --git a/posthog-ai/src/vercel/middleware.ts b/posthog-ai/src/vercel/middleware.ts
@@ -27,8 +27,13 @@ interface CreateInstrumentationMiddlewareOptions {
 }
 
 interface PostHogInput {
-  content: string
   role: string
+  type?: string
+  content?:
+    | string
+    | {
+        [key: string]: any
+      }
 }
 
 const mapVercelParams = (params: any): Record<string, any> => {
@@ -45,18 +50,60 @@ const mapVercelParams = (params: any): Record<string, any> => {
 
 const mapVercelPrompt = (prompt: LanguageModelV1Prompt): PostHogInput[] => {
   return prompt.map((p) => {
-    let content = ''
+    let content = {}
     if (Array.isArray(p.content)) {
-      content = p.content
-        .map((c) => {
-          if (c.type === 'text') {
-            return c.text
+      content = p.content.map((c) => {
+        if (c.type === 'text') {
+          return {
+            type: 'text',
+            content: c.text,
           }
-          return ''
-        })
-        .join('')
+        } else if (c.type === 'image') {
+          return {
+            type: 'image',
+            content: {
+              // if image is a url use it, or use "none supported"
+              image: c.image instanceof URL ? c.image.toString() : 'raw images not supported',
+              mimeType: c.mimeType,
+            },
+          }
+        } else if (c.type === 'file') {
+          return {
+            type: 'file',
+            content: {
+              file: c.data instanceof URL ? c.data.toString() : 'raw files not supported',
+              mimeType: c.mimeType,
+            },
+          }
+        } else if (c.type === 'tool-call') {
+          return {
+            type: 'tool-call',
+            content: {
+              toolCallId: c.toolCallId,
+              toolName: c.toolName,
+              args: c.args,
+            },
+          }
+        } else if (c.type === 'tool-result') {
+          return {
+            type: 'tool-result',
+            content: {
+              toolCallId: c.toolCallId,
+              toolName: c.toolName,
+              result: c.result,
+              isError: c.isError,
+            },
+          }
+        }
+        return {
+          content: '',
+        }
+      })
     } else {
-      content = p.content
+      content = {
+        type: 'text',
+        text: p.content,
+      }
     }
     return {
       role: p.role,
@@ -91,10 +138,22 @@ export const createInstrumentationMiddleware = (
           options.posthogModelOverride ?? (result.response?.modelId ? result.response.modelId : model.modelId)
         const provider = options.posthogProviderOverride ?? extractProvider(model)
         const baseURL = '' // cannot currently get baseURL from vercel
-        let content = result.text
-        if (!content) {
-          // support generate Object
-          content = result.toolCalls?.[0].args || JSON.stringify(result)
+        let content = result.text || JSON.stringify(result)
+        // let tools = result.toolCalls
+        let providerMetadata = result.providerMetadata
+        let additionalTokenValues = {
+          ...(providerMetadata?.openai?.reasoningTokens
+            ? { reasoningTokens: providerMetadata.openai.reasoningTokens }
+            : {}),
+          ...(providerMetadata?.openai?.cachedPromptToken
+            ? { cacheReadInputTokens: providerMetadata.openai.cachedPromptTokens }
+            : {}),
+          ...(providerMetadata?.anthropic
+            ? {
+                cacheReadInputTokens: providerMetadata.anthropic.cacheReadInputTokens,
+                cacheCreationInputTokens: providerMetadata.anthropic.cacheCreationInputTokens,
+              }
+            : {}),
         }
         sendEventToPosthog({
           client: phClient,
@@ -111,6 +170,7 @@ export const createInstrumentationMiddleware = (
           usage: {
             inputTokens: result.usage.promptTokens,
             outputTokens: result.usage.completionTokens,
+            ...additionalTokenValues,
           },
         })
 
@@ -143,7 +203,13 @@ export const createInstrumentationMiddleware = (
     wrapStream: async ({ doStream, params }) => {
       const startTime = Date.now()
       let generatedText = ''
-      let usage: { inputTokens?: number; outputTokens?: number } = {}
+      let usage: {
+        inputTokens?: number
+        outputTokens?: number
+        reasoningTokens?: any
+        cacheReadInputTokens?: any
+        cacheCreationInputTokens?: any
+      } = {}
       const mergedParams = {
         ...options,
         ...mapVercelParams(params),
@@ -164,6 +230,18 @@ export const createInstrumentationMiddleware = (
                 inputTokens: chunk.usage?.promptTokens,
                 outputTokens: chunk.usage?.completionTokens,
               }
+              if (chunk.providerMetadata?.openai?.reasoningTokens) {
+                usage.reasoningTokens = chunk.providerMetadata.openai.reasoningTokens
+              }
+              if (chunk.providerMetadata?.openai?.cachedPromptToken) {
+                usage.cacheReadInputTokens = chunk.providerMetadata.openai.cachedPromptToken
+              }
+              if (chunk.providerMetadata?.anthropic?.cacheReadInputTokens) {
+                usage.cacheReadInputTokens = chunk.providerMetadata.anthropic.cacheReadInputTokens
+              }
+              if (chunk.providerMetadata?.anthropic?.cacheCreationInputTokens) {
+                usage.cacheCreationInputTokens = chunk.providerMetadata.anthropic.cacheCreationInputTokens
+              }
             }
             controller.enqueue(chunk)
           },
diff --git a/posthog-ai/tests/openai.test.ts b/posthog-ai/tests/openai.test.ts
@@ -225,4 +225,40 @@ describe('PostHogOpenAI - Jest test suite', () => {
     expect(properties['$ai_stream']).toBe(false)
     expect(properties['foo']).toBe('bar')
   })
+
+  conditionalTest('reasoning and cache tokens', async () => {
+    // Set up mock response with standard token usage
+    mockOpenAiChatResponse.usage = {
+      prompt_tokens: 20,
+      completion_tokens: 10,
+      total_tokens: 30,
+      // Add the detailed token usage that OpenAI would return
+      completion_tokens_details: {
+        reasoning_tokens: 15,
+      },
+      prompt_tokens_details: {
+        cached_tokens: 5,
+      },
+    }
+
+    // Create a completion with additional token tracking
+    await client.chat.completions.create({
+      model: 'gpt-4',
+      messages: [{ role: 'user', content: 'Hello' }],
+      posthogDistinctId: 'test-id',
+      posthogProperties: { foo: 'bar' },
+    })
+
+    expect(mockPostHogClient.capture).toHaveBeenCalledTimes(1)
+    const [captureArgs] = (mockPostHogClient.capture as jest.Mock).mock.calls
+    const { properties } = captureArgs[0]
+
+    // Check standard token properties
+    expect(properties['$ai_input_tokens']).toBe(20)
+    expect(properties['$ai_output_tokens']).toBe(10)
+
+    // Check the new token properties
+    expect(properties['$ai_reasoning_tokens']).toBe(15)
+    expect(properties['$ai_cache_read_input_tokens']).toBe(5)
+  })
 })

Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "@posthog/ai",`
`3`		`- "version": "3.2.1",`
	`3`	`+ "version": "3.3.0",`
`4`	`4`	`"description": "PostHog Node.js AI integrations",`
`5`	`5`	`"repository": {`
`6`	`6`	`"type": "git",`