fix(openai): enrich token metrics on streaming requests (#183)

nirga · web-flow · commit 2ef0c13f16b0 · 2024-04-03T22:37:13.000+02:00
diff --git a/package-lock.json b/package-lock.json
diff --git a/packages/instrumentation-openai/package.json b/packages/instrumentation-openai/package.json
@@ -39,7 +39,8 @@
     "@opentelemetry/core": "^1.22.0",
     "@opentelemetry/instrumentation": "^0.49.0",
     "@opentelemetry/semantic-conventions": "^1.22.0",
-    "@traceloop/ai-semantic-conventions": "^0.5.27"
+    "@traceloop/ai-semantic-conventions": "^0.5.27",
+    "tiktoken": "^1.0.13"
   },
   "devDependencies": {
     "@pollyjs/adapter-node-http": "^6.0.6",
diff --git a/packages/instrumentation-openai/src/instrumentation.ts b/packages/instrumentation-openai/src/instrumentation.ts
@@ -45,6 +45,7 @@ import type {
 } from "openai/resources";
 import type { Stream } from "openai/streaming";
 import { version } from "../package.json";
+import { encoding_for_model, TiktokenModel, Tiktoken } from "tiktoken";
 
 export class OpenAIInstrumentation extends InstrumentationBase<any> {
   protected declare _config: OpenAIInstrumentationConfig;
@@ -198,6 +199,7 @@ export class OpenAIInstrumentation extends InstrumentationBase<any> {
             plugin._streamingWrapPromise({
               span,
               type,
+              params: args[0] as any,
               promise: execPromise,
             }),
           );
@@ -296,15 +298,18 @@ export class OpenAIInstrumentation extends InstrumentationBase<any> {
   private async *_streamingWrapPromise({
     span,
     type,
+    params,
     promise,
   }:
     | {
         span: Span;
         type: "chat";
+        params: ChatCompletionCreateParamsStreaming;
         promise: Promise<Stream<ChatCompletionChunk>>;
       }
     | {
         span: Span;
+        params: CompletionCreateParamsStreaming;
         type: "completion";
         promise: Promise<Stream<Completion>>;
       }) {
@@ -356,6 +361,29 @@ export class OpenAIInstrumentation extends InstrumentationBase<any> {
         this._addLogProbsEvent(span, result.choices[0].logprobs);
       }
 
+      if (this._config.enrichTokens) {
+        let promptTokens = 0;
+        for (const message of params.messages) {
+          promptTokens +=
+            this.tokenCountFromString(
+              message.content as string,
+              result.model,
+            ) ?? 0;
+        }
+
+        const completionTokens = this.tokenCountFromString(
+          result.choices[0].message.content ?? "",
+          result.model,
+        );
+        if (completionTokens) {
+          result.usage = {
+            prompt_tokens: promptTokens,
+            completion_tokens: completionTokens,
+            total_tokens: promptTokens + completionTokens,
+          };
+        }
+      }
+
       this._endSpan({ span, type, result });
     } else {
       const result: Completion = {
@@ -394,6 +422,23 @@ export class OpenAIInstrumentation extends InstrumentationBase<any> {
         this._addLogProbsEvent(span, result.choices[0].logprobs);
       }
 
+      if (this._config.enrichTokens) {
+        const promptTokens =
+          this.tokenCountFromString(params.prompt as string, result.model) ?? 0;
+
+        const completionTokens = this.tokenCountFromString(
+          result.choices[0].text ?? "",
+          result.model,
+        );
+        if (completionTokens) {
+          result.usage = {
+            prompt_tokens: promptTokens,
+            completion_tokens: completionTokens,
+            total_tokens: promptTokens + completionTokens,
+          };
+        }
+      }
+
       this._endSpan({ span, type, result });
     }
   }
@@ -588,4 +633,23 @@ export class OpenAIInstrumentation extends InstrumentationBase<any> {
 
     span.addEvent("logprobs", { logprobs: JSON.stringify(result) });
   }
+
+  private _encodingCache = new Map<string, Tiktoken>();
+
+  private tokenCountFromString(text: string, model: string) {
+    if (!this._encodingCache.has(model)) {
+      try {
+        const encoding = encoding_for_model(model as TiktokenModel);
+        this._encodingCache.set(model, encoding);
+      } catch (e) {
+        this._diag.warn(
+          `Failed to get tiktoken encoding for model_name: ${model}, error: ${e}`,
+        );
+        return;
+      }
+    }
+
+    const encoding = this._encodingCache.get(model);
+    return encoding!.encode(text).length;
+  }
 }
diff --git a/packages/instrumentation-openai/src/types.ts b/packages/instrumentation-openai/src/types.ts
@@ -6,4 +6,10 @@ export interface OpenAIInstrumentationConfig extends InstrumentationConfig {
    * @default true
    */
   traceContent?: boolean;
+
+  /**
+   * Whether to enrich token information if missing from the trace.
+   * @default false
+   */
+  enrichTokens?: boolean;
 }
diff --git a/packages/instrumentation-openai/test/instrumentation.test.ts b/packages/instrumentation-openai/test/instrumentation.test.ts
@@ -58,7 +58,7 @@ describe("Test OpenAI instrumentation", async function () {
       process.env.OPENAI_API_KEY = "test";
     }
     provider.addSpanProcessor(new SimpleSpanProcessor(memoryExporter));
-    instrumentation = new OpenAIInstrumentation();
+    instrumentation = new OpenAIInstrumentation({ enrichTokens: true });
     instrumentation.setTracerProvider(provider);
 
     const openAIModule: typeof OpenAIModule = await import("openai");
@@ -103,6 +103,18 @@ describe("Test OpenAI instrumentation", async function () {
       completionSpan.attributes[`${SpanAttributes.LLM_PROMPTS}.0.content`],
       "Tell me a joke about OpenTelemetry",
     );
+    assert.ok(
+      completionSpan.attributes[`${SpanAttributes.LLM_USAGE_TOTAL_TOKENS}`],
+    );
+    assert.equal(
+      completionSpan.attributes[`${SpanAttributes.LLM_USAGE_PROMPT_TOKENS}`],
+      "15",
+    );
+    assert.ok(
+      +completionSpan.attributes[
+        `${SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}`
+      ]! > 0,
+    );
   });
 
   it("should set attributes in span for streaming chat", async () => {
@@ -136,6 +148,18 @@ describe("Test OpenAI instrumentation", async function () {
       completionSpan.attributes[`${SpanAttributes.LLM_COMPLETIONS}.0.content`],
       result,
     );
+    assert.ok(
+      completionSpan.attributes[`${SpanAttributes.LLM_USAGE_TOTAL_TOKENS}`],
+    );
+    assert.equal(
+      completionSpan.attributes[`${SpanAttributes.LLM_USAGE_PROMPT_TOKENS}`],
+      "8",
+    );
+    assert.ok(
+      +completionSpan.attributes[
+        `${SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}`
+      ]! > 0,
+    );
   });
 
   it("should set attributes in span for streaming chat with new API", async () => {
@@ -169,6 +193,26 @@ describe("Test OpenAI instrumentation", async function () {
       completionSpan.attributes[`${SpanAttributes.LLM_COMPLETIONS}.0.content`],
       result,
     );
+    assert.ok(
+      completionSpan.attributes[`${SpanAttributes.LLM_USAGE_PROMPT_TOKENS}`],
+    );
+    assert.ok(
+      completionSpan.attributes[
+        `${SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}`
+      ],
+    );
+    assert.ok(
+      completionSpan.attributes[`${SpanAttributes.LLM_USAGE_TOTAL_TOKENS}`],
+    );
+    assert.equal(
+      completionSpan.attributes[`${SpanAttributes.LLM_USAGE_PROMPT_TOKENS}`],
+      "8",
+    );
+    assert.ok(
+      +completionSpan.attributes[
+        `${SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}`
+      ]! > 0,
+    );
   });
 
   it("should set attributes in span for completion", async () => {
diff --git a/packages/traceloop-sdk/src/lib/configuration/index.ts b/packages/traceloop-sdk/src/lib/configuration/index.ts
@@ -54,6 +54,10 @@ export const initialize = (options: InitializeOptions) => {
       options.traceloopSyncDevPollingInterval =
         Number(process.env.TRACELOOP_SYNC_DEV_POLLING_INTERVAL) || 5;
     }
+
+    if (options.shouldEnrichMetrics === undefined) {
+      options.shouldEnrichMetrics = true;
+    }
   }
 
   validateConfiguration(options);
diff --git a/packages/traceloop-sdk/src/lib/interfaces/initialize-options.interface.ts b/packages/traceloop-sdk/src/lib/interfaces/initialize-options.interface.ts
@@ -46,6 +46,12 @@ export interface InitializeOptions {
    */
   logLevel?: "debug" | "info" | "warn" | "error";
 
+  /**
+   * Whether to enrich metrics with additional data like OpenAI token usage for streaming requests. Optional.
+   * Defaults to true.
+   */
+  shouldEnrichMetrics?: boolean;
+
   /**
    * Whether to log prompts, completions and embeddings on traces. Optional.
    * Defaults to true.
diff --git a/packages/traceloop-sdk/src/lib/tracing/index.ts b/packages/traceloop-sdk/src/lib/tracing/index.ts
@@ -76,7 +76,9 @@ export const manuallyInitInstrumentations = (
   instrumentModules: InitializeOptions["instrumentModules"],
 ) => {
   if (instrumentModules?.openAI) {
-    openAIInstrumentation = new OpenAIInstrumentation();
+    openAIInstrumentation = new OpenAIInstrumentation({
+      enrichTokens: _configuration?.shouldEnrichMetrics,
+    });
     instrumentations.push(openAIInstrumentation);
     openAIInstrumentation.manuallyInstrument(instrumentModules.openAI);
   }
@@ -149,6 +151,7 @@ export const startTracing = (options: InitializeOptions) => {
   if (!shouldSendTraces()) {
     openAIInstrumentation?.setConfig({
       traceContent: false,
+      enrichTokens: _configuration?.shouldEnrichMetrics,
     });
     azureOpenAIInstrumentation?.setConfig({
       traceContent: false,

Original file line number	Diff line number	Diff line change
`@@ -54,6 +54,10 @@ export const initialize = (options: InitializeOptions) => {`
`54`	`54`	`options.traceloopSyncDevPollingInterval =`
`55`	`55`	`Number(process.env.TRACELOOP_SYNC_DEV_POLLING_INTERVAL) \|\| 5;`
`56`	`56`	`}`
	`57`	`+`
	`58`	`+ if (options.shouldEnrichMetrics === undefined) {`
	`59`	`+ options.shouldEnrichMetrics = true;`
	`60`	`+ }`
`57`	`61`	`}`
`58`	`62`
`59`	`63`	`validateConfiguration(options);`