Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,18 @@ className="bg-background text-foreground border-border"
- Test in both light and dark modes
- when making changes to /packages run the tests npx jest **tests**/ in /packages to make sure nothing else is broken

### TypeScript Type Changes

When modifying TypeScript types (interfaces, type aliases, union types):

1. **Run type checking before committing**: `npx tsc --noEmit` in the relevant package to catch type errors (Jest tests alone won't catch all type mismatches)
2. **Search for related types**: When updating union types (e.g., `"low" | "medium" | "high"`), grep for similar patterns to ensure all related types are updated:
```bash
grep -r "thinkingLevel" packages/ # Find all usages of similar type
```
3. **Check cross-package dependencies**: Types in `/packages/` are often used across multiple packages (llm-mapper, prompts, cost). Verify changes don't break consumers.
4. **Run the full build**: For `/packages/` changes, run `cd web && yarn build` or check Vercel preview to catch type errors across the monorepo.

# Helicone Design System Guidelines

## Core Principles
Expand Down
8 changes: 4 additions & 4 deletions packages/__tests__/llm-mapper/google-reasoning.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,19 +36,19 @@ describe("Google Reasoning/Thinking Support", () => {
});
});

it("should map reasoning_effort 'medium' to thinkingLevel 'low'", () => {
it("should pass through reasoning_effort 'medium' as thinkingLevel 'medium'", () => {
const openAIRequest: HeliconeChatCreateParams = {
model: "gemini-3-pro",
model: "gemini-3-flash-preview",
messages: [{ role: "user", content: "Test" }],
reasoning_effort: "medium",
};

const googleRequest = toGoogle(openAIRequest);

// Google only supports low/high, so medium maps to low
// Pass through reasoning_effort directly as thinkingLevel
expect(googleRequest.generationConfig?.thinkingConfig).toEqual({
includeThoughts: true,
thinkingLevel: "low",
thinkingLevel: "medium",
});
});

Expand Down
17 changes: 3 additions & 14 deletions packages/llm-mapper/transform/providers/openai/request/toGoogle.ts
Original file line number Diff line number Diff line change
Expand Up @@ -258,16 +258,6 @@ function supportsThinkingLevel(model: string): boolean {
return false;
}

/**
* Maps OpenAI reasoning_effort to Google thinkingLevel.
*/
function mapReasoningEffortToThinkingLevel(
effort: "low" | "medium" | "high"
): "low" | "high" {
// Google only supports "low" and "high", so map "medium" to "low"
return effort === "high" ? "high" : "low";
}

/**
* Builds the Google thinking configuration from OpenAI reasoning parameters.
*
Expand Down Expand Up @@ -326,10 +316,9 @@ function buildThinkingConfig(

// Handle reasoning_effort
if (modelSupportsThinkingLevel) {
// Gemini 3+ models: use thinkingLevel
thinkingConfig.thinkingLevel = mapReasoningEffortToThinkingLevel(
reasoningEffort as "low" | "medium" | "high"
);
// Gemini 3+ models: pass through reasoning_effort as thinkingLevel
// Google supports "low", "medium", "high" (Flash also supports "minimal" via reasoning_options)
thinkingConfig.thinkingLevel = reasoningEffort as "low" | "medium" | "high";
} else {
// Gemini 2.5 models: use dynamic thinkingBudget (-1)
thinkingConfig.thinkingBudget = -1;
Expand Down
10 changes: 6 additions & 4 deletions packages/llm-mapper/transform/types/google.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ export type GeminiTool = {

export type GeminiThinkingConfig = {
includeThoughts?: boolean;
thinkingLevel?: "low" | "high";
thinkingLevel?: "minimal" | "low" | "medium" | "high";
thinkingBudget?: number;
};

Expand Down Expand Up @@ -87,8 +87,8 @@ export type ChatCompletionMessage =
export interface GoogleReasoningOptions {
/** Token budget for thinking (Gemini 2.5 models) */
budget_tokens?: number;
/** Thinking level (Gemini 3+ models) */
thinking_level?: "low" | "high";
/** Thinking level (Gemini 3+ models): minimal (Flash only), low, medium (Flash only), high */
thinking_level?: "minimal" | "low" | "medium" | "high";
}
// === RESPONSE TYPES ===
export interface GoogleFunctionCall {
Expand Down Expand Up @@ -157,10 +157,12 @@ export interface GoogleThinkingConfig {
includeThoughts?: boolean;
/**
* Thinking level for Gemini 3+ models
* - "minimal" for minimal reasoning (Flash only)
* - "low" for faster, less detailed reasoning
* - "medium" for balanced reasoning (Flash only)
* - "high" for more detailed reasoning
*/
thinkingLevel?: "low" | "high";
thinkingLevel?: "minimal" | "low" | "medium" | "high";
/**
* Token budget for thinking (for Gemini 2.5 models)
* - Specific token values (e.g., 1024)
Expand Down
4 changes: 3 additions & 1 deletion packages/prompts/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -295,10 +295,12 @@ export interface HeliconeReasoningOptions {

/**
* Thinking level for Google Gemini 3+ models.
* - "minimal" for minimal thinking (Flash only)
* - "low" for faster, less detailed reasoning
* - "medium" for balanced reasoning (Flash only)
* - "high" for more detailed reasoning
*/
thinking_level?: "low" | "high";
thinking_level?: "minimal" | "low" | "medium" | "high";
};
}

Expand Down
20 changes: 19 additions & 1 deletion worker/src/lib/clients/ProviderClient.ts
Original file line number Diff line number Diff line change
Expand Up @@ -187,12 +187,30 @@ export async function callProviderWithRetry(
): Promise<Response> {
let lastResponse;

// Convert ReadableStream body to string before retry loop to allow reuse across retries.
// ReadableStream bodies can only be consumed once, so we need to read them into a string
// that can be safely reused for each retry attempt.
let retryableBody = callProps.body;
if (retryableBody instanceof ReadableStream) {
const reader = retryableBody.getReader();
const chunks: Uint8Array[] = [];
while (true) {
const { done, value } = await reader.read();
if (done) break;
if (value) chunks.push(value);
}
const decoder = new TextDecoder();
retryableBody = chunks.map((chunk) => decoder.decode(chunk)).join("");
}

const retryableCallProps = { ...callProps, body: retryableBody };

try {
// Use async-retry to call the forwardRequestToOpenAi function with exponential backoff
await retry(
async (bail, attempt) => {
try {
const res = await callProvider(callProps);
const res = await callProvider(retryableCallProps);

lastResponse = res;
// Throw an error if the status code is 429 or 5xx
Expand Down
Loading