Skip to content

Commit a4fed27

Browse files
authored
feat: [ENG-3365] implicit caching for anthropic endpoints on AI Gateway (#5127)
1 parent 1e2195d commit a4fed27

File tree

6 files changed

+141
-34
lines changed

6 files changed

+141
-34
lines changed

packages/cost/models/providers/bedrock.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,11 @@ export class BedrockProvider extends BaseProvider {
113113
if (endpoint.providerModelId.includes("claude-")) {
114114
const anthropicBody =
115115
context.bodyMapping === "OPENAI"
116-
? context.toAnthropic(context.parsedBody, endpoint.providerModelId)
116+
? context.toAnthropic(
117+
context.parsedBody,
118+
endpoint.providerModelId,
119+
{ includeCacheBreakpoints: false }
120+
)
117121
: context.parsedBody;
118122

119123
const updatedBody = {

packages/cost/models/providers/vertex.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,11 @@ export class VertexProvider extends BaseProvider {
9797
if (endpoint.providerModelId.includes("claude-")) {
9898
const anthropicBody =
9999
context.bodyMapping === "OPENAI"
100-
? context.toAnthropic(context.parsedBody, endpoint.providerModelId)
100+
? context.toAnthropic(
101+
context.parsedBody,
102+
endpoint.providerModelId,
103+
{ includeCacheBreakpoints: false }
104+
)
101105
: context.parsedBody;
102106
const updatedBody = {
103107
...anthropicBody,

packages/cost/models/types.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -265,5 +265,9 @@ export interface AuthResult {
265265
export interface RequestBodyContext {
266266
parsedBody: any;
267267
bodyMapping: BodyMappingType;
268-
toAnthropic: (body: any, providerModelId?: string) => any;
268+
toAnthropic: (
269+
body: any,
270+
providerModelId?: string,
271+
options?: { includeCacheBreakpoints?: boolean }
272+
) => any;
269273
}

packages/llm-mapper/transform/providers/openai/request/toAnthropic.ts

Lines changed: 115 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,12 @@ import { Plugin } from "@helicone-package/cost/models/types";
1414
export function toAnthropic(
1515
openAIBody: HeliconeChatCreateParams,
1616
providerModelId?: string,
17-
plugins?: Plugin[]
17+
plugins?: Plugin[],
18+
options?: {
19+
includeCacheBreakpoints?: boolean;
20+
}
1821
): AnthropicRequestBody {
22+
const includeCache = options?.includeCacheBreakpoints !== false;
1923
const antBody: AnthropicRequestBody = {
2024
model: providerModelId || openAIBody.model,
2125
messages: [],
@@ -35,8 +39,11 @@ export function toAnthropic(
3539
: [];
3640
}
3741

38-
const { messages, system } = extractSystemMessage(openAIBody.messages);
39-
antBody.messages = mapMessages(messages);
42+
const { messages, system } = extractSystemMessage(
43+
openAIBody.messages,
44+
includeCache
45+
);
46+
antBody.messages = mapMessages(messages, includeCache);
4047
if (system) {
4148
antBody.system = system;
4249
}
@@ -87,11 +94,18 @@ export function toAnthropic(
8794
throw new Error("Logit bias is not supported");
8895
}
8996

97+
// if and only if the user did not provide any cache control breakpoints,
98+
// add a default ephemeral 5m breakpoint at the end of the content.
99+
if (includeCache && !hasAnyCacheControl(openAIBody)) {
100+
appendDefaultCacheBreakpoint(antBody);
101+
}
102+
90103
return antBody;
91104
}
92105

93106
function openAIContentToAnthropicContent(
94-
content: string | HeliconeChatCompletionContentPart[] | null
107+
content: string | HeliconeChatCompletionContentPart[] | null,
108+
includeCache: boolean
95109
): string | AnthropicContentBlock[] {
96110
if (content === null) {
97111
return "";
@@ -107,7 +121,9 @@ function openAIContentToAnthropicContent(
107121
return {
108122
type: "text",
109123
text: part.text,
110-
cache_control: part.cache_control,
124+
...(includeCache && part.cache_control
125+
? { cache_control: part.cache_control }
126+
: {}),
111127
};
112128
case "image_url":
113129
// expected format: { type: "image_url", image_url: { url: string } }
@@ -132,7 +148,9 @@ function openAIContentToAnthropicContent(
132148
media_type: mediaType,
133149
data: base64Data,
134150
},
135-
cache_control: part.cache_control,
151+
...(includeCache && part.cache_control
152+
? { cache_control: part.cache_control }
153+
: {}),
136154
};
137155
} else {
138156
return {
@@ -141,7 +159,9 @@ function openAIContentToAnthropicContent(
141159
type: "url",
142160
url: url,
143161
},
144-
cache_control: part.cache_control,
162+
...(includeCache && part.cache_control
163+
? { cache_control: part.cache_control }
164+
: {}),
145165
};
146166
}
147167
case "input_audio":
@@ -168,7 +188,67 @@ function openAIContentToAnthropicContent(
168188
});
169189
}
170190

171-
function extractSystemMessage(messages: HeliconeChatCreateParams["messages"]): {
191+
function hasAnyCacheControl(body: HeliconeChatCreateParams): boolean {
192+
const msgs = body.messages || [];
193+
for (const m of msgs) {
194+
if ((m as any).cache_control) {
195+
return true;
196+
}
197+
const content = (m as any).content;
198+
if (Array.isArray(content)) {
199+
for (const part of content) {
200+
if ((part as any)?.cache_control) {
201+
return true;
202+
}
203+
}
204+
}
205+
}
206+
return false;
207+
}
208+
209+
function appendDefaultCacheBreakpoint(antBody: AnthropicRequestBody) {
210+
const defaultCache = { type: "ephemeral" as const, ttl: "5m" as const };
211+
212+
if (antBody.messages.length > 0) {
213+
const last = antBody.messages[antBody.messages.length - 1];
214+
if (typeof last.content === "string") {
215+
last.content = [
216+
{
217+
type: "text",
218+
text: last.content,
219+
cache_control: defaultCache,
220+
},
221+
];
222+
return;
223+
}
224+
if (Array.isArray(last.content) && last.content.length > 0) {
225+
const idx = last.content.length - 1;
226+
const block = last.content[idx];
227+
last.content[idx] = {
228+
...block,
229+
cache_control: defaultCache,
230+
} as AnthropicContentBlock;
231+
return;
232+
}
233+
}
234+
235+
if (antBody.system && Array.isArray(antBody.system) && antBody.system.length > 0) {
236+
const idx = antBody.system.length - 1;
237+
const block = antBody.system[idx];
238+
antBody.system[idx] = {
239+
...block,
240+
cache_control: defaultCache,
241+
} as AnthropicContentBlock;
242+
return;
243+
}
244+
245+
// If system is a string and there are no messages, it's okay not to add it
246+
}
247+
248+
function extractSystemMessage(
249+
messages: HeliconeChatCreateParams["messages"],
250+
includeCache: boolean
251+
): {
172252
messages: HeliconeChatCreateParams["messages"];
173253
system?: string | AnthropicContentBlock[];
174254
} {
@@ -183,7 +263,7 @@ function extractSystemMessage(messages: HeliconeChatCreateParams["messages"]): {
183263
typeof systemMessages[0].content === "string"
184264
) {
185265
const content = systemMessages[0].content;
186-
if (!systemMessages[0].cache_control) {
266+
if (!includeCache || !systemMessages[0].cache_control) {
187267
return {
188268
messages: otherMessages,
189269
system: content,
@@ -195,20 +275,27 @@ function extractSystemMessage(messages: HeliconeChatCreateParams["messages"]): {
195275
{
196276
type: "text",
197277
text: systemMessages[0].content,
198-
cache_control: systemMessages[0].cache_control,
278+
...(includeCache && systemMessages[0].cache_control
279+
? { cache_control: systemMessages[0].cache_control }
280+
: {}),
199281
},
200282
],
201283
};
202284
}
203285

204286
const systemMessageBlocks: AnthropicContentBlock[] = [];
205287
for (const msg of systemMessages) {
206-
const convertedBlock = openAIContentToAnthropicContent(msg.content);
288+
const convertedBlock = openAIContentToAnthropicContent(
289+
msg.content,
290+
includeCache
291+
);
207292
if (typeof convertedBlock === "string") {
208293
systemMessageBlocks.push({
209294
type: "text",
210295
text: convertedBlock,
211-
cache_control: msg.cache_control,
296+
...(includeCache && (msg as any).cache_control
297+
? { cache_control: (msg as any).cache_control }
298+
: {}),
212299
});
213300
} else {
214301
systemMessageBlocks.push(...convertedBlock);
@@ -222,7 +309,8 @@ function extractSystemMessage(messages: HeliconeChatCreateParams["messages"]): {
222309
}
223310

224311
function mapMessages(
225-
messages: HeliconeChatCreateParams["messages"]
312+
messages: HeliconeChatCreateParams["messages"],
313+
includeCache: boolean
226314
): AnthropicRequestBody["messages"] {
227315
if (!messages) {
228316
return [];
@@ -241,7 +329,9 @@ function mapMessages(
241329
type: "tool_result",
242330
tool_use_id: message.tool_call_id,
243331
content: typeof message.content === "string" ? message.content : "",
244-
cache_control: message.cache_control,
332+
...(includeCache && (message as any).cache_control
333+
? { cache_control: (message as any).cache_control }
334+
: {}),
245335
},
246336
],
247337
};
@@ -272,18 +362,19 @@ function mapMessages(
272362
let processedContent: string | AnthropicContentBlock[] = [];
273363
if (message.content) {
274364
const convertedContent = openAIContentToAnthropicContent(
275-
message.content
365+
message.content,
366+
includeCache
276367
);
277368
if (typeof convertedContent === "string") {
278369
// if the message requires forming a content array
279-
if (
280-
message.cache_control ||
281-
processedToolCallContent.length > 0
282-
) {
370+
const hasMsgCache = includeCache && !!(message as any).cache_control;
371+
if (hasMsgCache || processedToolCallContent.length > 0) {
283372
processedContent.push({
284373
type: "text",
285374
text: convertedContent,
286-
cache_control: message.cache_control,
375+
...(includeCache && (message as any).cache_control
376+
? { cache_control: (message as any).cache_control }
377+
: {}),
287378
});
288379
} else {
289380
// there was no cache control breakpoint, the content was just string,
@@ -300,8 +391,10 @@ function mapMessages(
300391
antMessage.content = processedContent;
301392
return antMessage;
302393
}
303-
304-
antMessage.content = openAIContentToAnthropicContent(message.content);
394+
antMessage.content = openAIContentToAnthropicContent(
395+
message.content,
396+
includeCache
397+
);
305398
return antMessage;
306399
});
307400
}

worker/src/lib/ai-gateway/AttemptExecutor.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ export class AttemptExecutor {
135135
const bodyResult = await buildRequestBody(endpoint, {
136136
parsedBody,
137137
bodyMapping: requestWrapper.heliconeHeaders.gatewayConfig.bodyMapping,
138-
toAnthropic: (body, modelId) => toAnthropic(body, modelId, plugins),
138+
toAnthropic: (body, modelId, options) => toAnthropic(body, modelId, plugins, options)
139139
});
140140

141141
if (isErr(bodyResult) || !bodyResult.data) {

worker/test/ai-gateway/map-anthropic.spec.ts

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -233,10 +233,10 @@ describe("Anthropic to OpenAI Response Mapper", () => {
233233

234234
expect(result.model).toBe("claude-3.5-haiku");
235235
expect(result.messages).toHaveLength(1);
236-
expect(result.messages[0]).toEqual({
237-
role: "user",
238-
content: "Hello, how are you?",
239-
});
236+
expect(result.messages[0].role).toBe("user");
237+
expect(result.messages[0].content).toEqual([
238+
{ type: "text", text: "Hello, how are you?", cache_control: { type: "ephemeral", ttl: "5m" } },
239+
]);
240240
expect(result.temperature).toBe(0.7);
241241
expect(result.max_tokens).toBe(1000);
242242
expect(result.stream).toBe(false);
@@ -321,6 +321,7 @@ describe("Anthropic to OpenAI Response Mapper", () => {
321321
type: "tool_result",
322322
tool_use_id: "call_DoWwQL7W9AJBpwCtn9tRC2gt",
323323
content: "100",
324+
cache_control: { type: "ephemeral", ttl: "5m" },
324325
});
325326

326327
expect(result.tools).toHaveLength(1);
@@ -375,6 +376,7 @@ describe("Anthropic to OpenAI Response Mapper", () => {
375376
id: "call_123",
376377
name: "calculate",
377378
input: { expression: "2+2" },
379+
cache_control: { type: "ephemeral", ttl: "5m" },
378380
});
379381
});
380382

@@ -397,10 +399,10 @@ describe("Anthropic to OpenAI Response Mapper", () => {
397399

398400
expect(result.system).toBe("You are a helpful assistant.");
399401
expect(result.messages).toHaveLength(1);
400-
expect(result.messages[0]).toEqual({
401-
role: "user",
402-
content: "Hello",
403-
});
402+
expect(result.messages[0].role).toBe("user");
403+
expect(result.messages[0].content).toEqual([
404+
{ type: "text", text: "Hello", cache_control: { type: "ephemeral", ttl: "5m" } },
405+
]);
404406
});
405407

406408
it("should handle cache control on system message", () => {

0 commit comments

Comments
 (0)