Helicone · colegottdank · Feb 17, 2026 · Feb 17, 2026 · Feb 17, 2026 · Feb 17, 2026
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -169,6 +169,18 @@ className="bg-background text-foreground border-border"
 - Test in both light and dark modes
 - when making changes to /packages run the tests npx jest **tests**/ in /packages to make sure nothing else is broken
 
+### TypeScript Type Changes
+
+When modifying TypeScript types (interfaces, type aliases, union types):
+
+1. **Run type checking before committing**: `npx tsc --noEmit` in the relevant package to catch type errors (Jest tests alone won't catch all type mismatches)
+2. **Search for related types**: When updating union types (e.g., `"low" | "medium" | "high"`), grep for similar patterns to ensure all related types are updated:
+   ```bash
+   grep -r "thinkingLevel" packages/  # Find all usages of similar type
+   ```
+3. **Check cross-package dependencies**: Types in `/packages/` are often used across multiple packages (llm-mapper, prompts, cost). Verify changes don't break consumers.
+4. **Run the full build**: For `/packages/` changes, run `cd web && yarn build` or check Vercel preview to catch type errors across the monorepo.
+
 # Helicone Design System Guidelines
 
 ## Core Principles

diff --git a/packages/__tests__/llm-mapper/google-reasoning.test.ts b/packages/__tests__/llm-mapper/google-reasoning.test.ts
@@ -36,19 +36,19 @@ describe("Google Reasoning/Thinking Support", () => {
       });
     });
 
-    it("should map reasoning_effort 'medium' to thinkingLevel 'low'", () => {
+    it("should pass through reasoning_effort 'medium' as thinkingLevel 'medium'", () => {
       const openAIRequest: HeliconeChatCreateParams = {
-        model: "gemini-3-pro",
+        model: "gemini-3-flash-preview",
         messages: [{ role: "user", content: "Test" }],
         reasoning_effort: "medium",
       };
 
       const googleRequest = toGoogle(openAIRequest);
 
-      // Google only supports low/high, so medium maps to low
+      // Pass through reasoning_effort directly as thinkingLevel
       expect(googleRequest.generationConfig?.thinkingConfig).toEqual({
         includeThoughts: true,
-        thinkingLevel: "low",
+        thinkingLevel: "medium",
       });
     });
 

diff --git a/packages/llm-mapper/transform/providers/openai/request/toGoogle.ts b/packages/llm-mapper/transform/providers/openai/request/toGoogle.ts
@@ -258,16 +258,6 @@ function supportsThinkingLevel(model: string): boolean {
   return false;
 }
 
-/**
- * Maps OpenAI reasoning_effort to Google thinkingLevel.
- */
-function mapReasoningEffortToThinkingLevel(
-  effort: "low" | "medium" | "high"
-): "low" | "high" {
-  // Google only supports "low" and "high", so map "medium" to "low"
-  return effort === "high" ? "high" : "low";
-}
-
 /**
  * Builds the Google thinking configuration from OpenAI reasoning parameters.
  *
@@ -326,10 +316,9 @@ function buildThinkingConfig(
 
   // Handle reasoning_effort
   if (modelSupportsThinkingLevel) {
-    // Gemini 3+ models: use thinkingLevel
-    thinkingConfig.thinkingLevel = mapReasoningEffortToThinkingLevel(
-      reasoningEffort as "low" | "medium" | "high"
-    );
+    // Gemini 3+ models: pass through reasoning_effort as thinkingLevel
+    // Google supports "low", "medium", "high" (Flash also supports "minimal" via reasoning_options)
+    thinkingConfig.thinkingLevel = reasoningEffort as "low" | "medium" | "high";
   } else {
     // Gemini 2.5 models: use dynamic thinkingBudget (-1)
     thinkingConfig.thinkingBudget = -1;

diff --git a/packages/llm-mapper/transform/types/google.ts b/packages/llm-mapper/transform/types/google.ts
@@ -37,7 +37,7 @@ export type GeminiTool = {
 
 export type GeminiThinkingConfig = {
   includeThoughts?: boolean;
-  thinkingLevel?: "low" | "high";
+  thinkingLevel?: "minimal" | "low" | "medium" | "high";
   thinkingBudget?: number;
 };
 
@@ -87,8 +87,8 @@ export type ChatCompletionMessage =
 export interface GoogleReasoningOptions {
   /** Token budget for thinking (Gemini 2.5 models) */
   budget_tokens?: number;
-  /** Thinking level (Gemini 3+ models) */
-  thinking_level?: "low" | "high";
+  /** Thinking level (Gemini 3+ models): minimal (Flash only), low, medium (Flash only), high */
+  thinking_level?: "minimal" | "low" | "medium" | "high";
 }
 // === RESPONSE TYPES ===
 export interface GoogleFunctionCall {
@@ -157,10 +157,12 @@ export interface GoogleThinkingConfig {
   includeThoughts?: boolean;
   /**
    * Thinking level for Gemini 3+ models
+   * - "minimal" for minimal reasoning (Flash only)
    * - "low" for faster, less detailed reasoning
+   * - "medium" for balanced reasoning (Flash only)
    * - "high" for more detailed reasoning
    */
-  thinkingLevel?: "low" | "high";
+  thinkingLevel?: "minimal" | "low" | "medium" | "high";
   /**
    * Token budget for thinking (for Gemini 2.5 models)
    * - Specific token values (e.g., 1024)

diff --git a/packages/prompts/types.ts b/packages/prompts/types.ts
@@ -295,10 +295,12 @@ export interface HeliconeReasoningOptions {
 
     /**
      * Thinking level for Google Gemini 3+ models.
+     * - "minimal" for minimal thinking (Flash only)
      * - "low" for faster, less detailed reasoning
+     * - "medium" for balanced reasoning (Flash only)
      * - "high" for more detailed reasoning
      */
-    thinking_level?: "low" | "high";
+    thinking_level?: "minimal" | "low" | "medium" | "high";
   };
 }
 

diff --git a/worker/src/lib/clients/ProviderClient.ts b/worker/src/lib/clients/ProviderClient.ts
@@ -187,12 +187,30 @@ export async function callProviderWithRetry(
 ): Promise<Response> {
   let lastResponse;
 
+  // Convert ReadableStream body to string before retry loop to allow reuse across retries.
+  // ReadableStream bodies can only be consumed once, so we need to read them into a string
+  // that can be safely reused for each retry attempt.
+  let retryableBody = callProps.body;
+  if (retryableBody instanceof ReadableStream) {
+    const reader = retryableBody.getReader();
+    const chunks: Uint8Array[] = [];
+    while (true) {
+      const { done, value } = await reader.read();
+      if (done) break;
+      if (value) chunks.push(value);
+    }
+    const decoder = new TextDecoder();
+    retryableBody = chunks.map((chunk) => decoder.decode(chunk)).join("");
+  }
+
+  const retryableCallProps = { ...callProps, body: retryableBody };
+
   try {
     // Use async-retry to call the forwardRequestToOpenAi function with exponential backoff
     await retry(
       async (bail, attempt) => {
         try {
-          const res = await callProvider(callProps);
+          const res = await callProvider(retryableCallProps);
 
           lastResponse = res;
           // Throw an error if the status code is 429 or 5xx