Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docker/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ GID='1000'
# LLM_PROVIDER='anthropic'
# ANTHROPIC_API_KEY=sk-ant-xxxx
# ANTHROPIC_MODEL_PREF='claude-2'
# ANTHROPIC_CACHE_CONTROL="5m" # Enable prompt caching (5m=5min cache, 1h=1hour cache). Reduces costs and improves speed by caching system prompts.

# LLM_PROVIDER='lmstudio'
# LMSTUDIO_BASE_PATH='http://your-server:1234/v1'
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import { useState, useEffect } from "react";
import System from "@/models/system";
import { CaretDown, CaretUp } from "@phosphor-icons/react";

export default function AnthropicAiOptions({ settings }) {
const [showAdvancedControls, setShowAdvancedControls] = useState(false);
const [inputValue, setInputValue] = useState(settings?.AnthropicApiKey);
const [anthropicApiKey, setAnthropicApiKey] = useState(
settings?.AnthropicApiKey
Expand All @@ -27,14 +29,63 @@ export default function AnthropicAiOptions({ settings }) {
onBlur={() => setAnthropicApiKey(inputValue)}
/>
</div>

{!settings?.credentialsOnly && (
<AnthropicModelSelection
apiKey={anthropicApiKey}
settings={settings}
/>
)}
</div>
<div className="flex justify-start mt-4">
<button
onClick={(e) => {
e.preventDefault();
setShowAdvancedControls(!showAdvancedControls);
}}
className="border-none text-theme-text-primary hover:text-theme-text-secondary flex items-center text-sm"
>
{showAdvancedControls ? "Hide" : "Show"} advanced settings
{showAdvancedControls ? (
<CaretUp size={14} className="ml-1" />
) : (
<CaretDown size={14} className="ml-1" />
)}
</button>
</div>
<div hidden={!showAdvancedControls}>
<div className="w-full flex items-start gap-4 mt-1.5">
<div className="flex flex-col w-60">
<div className="flex justify-between items-center mb-2">
<label className="text-white text-sm font-semibold">
Prompt Caching
</label>
</div>
<select
name="AnthropicCacheControl"
className="border-none bg-theme-settings-input-bg border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
>
<option
value="none"
selected={settings?.AnthropicCacheControl === "none"}
>
No caching
</option>
<option
value="5m"
selected={settings?.AnthropicCacheControl === "5m"}
>
5 minutes
</option>
<option
value="1h"
selected={settings?.AnthropicCacheControl === "1h"}
>
1 hour
</option>
</select>
</div>
</div>
</div>
</div>
);
}
Expand Down
1 change: 1 addition & 0 deletions server/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ SIG_SALT='salt' # Please generate random string at least 32 chars long.
# LLM_PROVIDER='anthropic'
# ANTHROPIC_API_KEY=sk-ant-xxxx
# ANTHROPIC_MODEL_PREF='claude-2'
# ANTHROPIC_CACHE_CONTROL="5m" # Enable prompt caching (5m=5min cache, 1h=1hour cache). Reduces costs and improves speed by caching system prompts.

# LLM_PROVIDER='lmstudio'
# LMSTUDIO_BASE_PATH='http://your-server:1234/v1'
Expand Down
1 change: 1 addition & 0 deletions server/models/systemSettings.js
Original file line number Diff line number Diff line change
Expand Up @@ -481,6 +481,7 @@ const SystemSettings = {
// Anthropic Keys
AnthropicApiKey: !!process.env.ANTHROPIC_API_KEY,
AnthropicModelPref: process.env.ANTHROPIC_MODEL_PREF || "claude-2",
AnthropicCacheControl: process.env.ANTHROPIC_CACHE_CONTROL || "none",

// Gemini Keys
GeminiLLMApiKey: !!process.env.GEMINI_API_KEY,
Expand Down
53 changes: 50 additions & 3 deletions server/utils/AiProviders/anthropic/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,9 @@ class AnthropicLLM {

this.embedder = embedder ?? new NativeEmbedder();
this.defaultTemp = 0.7;
this.log(`Initialized with ${this.model}`);
this.log(
`Initialized with ${this.model}. Cache ${this.cacheControl ? `enabled (${this.cacheControl.ttl})` : "disabled"}`
);
}

log(text, ...args) {
Expand All @@ -57,6 +59,48 @@ class AnthropicLLM {
return true;
}

/**
* Parses the cache control ENV variable
*
* If caching is enabled, we can pass less than 1024 tokens and Anthropic will just
* ignore it unless it is above the model's minimum. Since this feature is opt-in
* we can safely assume that if caching is enabled that we should just pass the content as is.
* https://docs.claude.com/en/docs/build-with-claude/prompt-caching#cache-limitations
*
* @param {string} value - The ENV value (5m or 1h)
* @returns {null|{type: "ephemeral", ttl: "5m" | "1h"}} Cache control configuration
*/
get cacheControl() {
// Store result in instance variable to avoid recalculating
if (this._cacheControl) return this._cacheControl;

if (!process.env.ANTHROPIC_CACHE_CONTROL) this._cacheControl = null;
else {
const normalized =
process.env.ANTHROPIC_CACHE_CONTROL.toLowerCase().trim();
if (["5m", "1h"].includes(normalized))
this._cacheControl = { type: "ephemeral", ttl: normalized };
else this._cacheControl = null;
}
return this._cacheControl;
}

/**
* Builds system parameter with cache control if applicable
* @param {string} systemContent - The system prompt content
* @returns {string|array} System parameter for API call
*/
#buildSystemPrompt(systemContent) {
if (!systemContent || !this.cacheControl) return systemContent;
return [
{
type: "text",
text: systemContent,
cache_control: this.cacheControl,
},
];
}

/**
* Generates appropriate content array for a message + attachments.
* @param {{userPrompt:string, attachments: import("../../helpers").Attachment[]}}
Expand Down Expand Up @@ -105,18 +149,20 @@ class AnthropicLLM {

async getChatCompletion(messages = null, { temperature = 0.7 }) {
try {
const systemContent = messages[0].content;
const result = await LLMPerformanceMonitor.measureAsyncFunction(
this.anthropic.messages.create({
model: this.model,
max_tokens: 4096,
system: messages[0].content, // Strip out the system message
system: this.#buildSystemPrompt(systemContent),
messages: messages.slice(1), // Pop off the system message
temperature: Number(temperature ?? this.defaultTemp),
})
);

const promptTokens = result.output.usage.input_tokens;
const completionTokens = result.output.usage.output_tokens;

return {
textResponse: result.output.content[0].text,
metrics: {
Expand All @@ -134,11 +180,12 @@ class AnthropicLLM {
}

async streamGetChatCompletion(messages = null, { temperature = 0.7 }) {
const systemContent = messages[0].content;
const measuredStreamRequest = await LLMPerformanceMonitor.measureStream(
this.anthropic.messages.stream({
model: this.model,
max_tokens: 4096,
system: messages[0].content, // Strip out the system message
system: this.#buildSystemPrompt(systemContent),
messages: messages.slice(1), // Pop off the system message
temperature: Number(temperature ?? this.defaultTemp),
}),
Expand Down
47 changes: 44 additions & 3 deletions server/utils/agents/aibitat/providers/anthropic.js
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,55 @@ class AnthropicProvider extends Provider {
const client = new Anthropic(options);

super(client);

this.model = model;
}

/**
* Parses the cache control ENV variable
*
* If caching is enabled, we can pass less than 1024 tokens and Anthropic will just
* ignore it unless it is above the model's minimum. Since this feature is opt-in
* we can safely assume that if caching is enabled that we should just pass the content as is.
* https://docs.claude.com/en/docs/build-with-claude/prompt-caching#cache-limitations
*
* @param {string} value - The ENV value (5m or 1h)
* @returns {null|{type: "ephemeral", ttl: "5m" | "1h"}} Cache control configuration
*/
get cacheControl() {
// Store result in instance variable to avoid recalculating
if (this._cacheControl) return this._cacheControl;

if (!process.env.ANTHROPIC_CACHE_CONTROL) this._cacheControl = null;
else {
const normalized =
process.env.ANTHROPIC_CACHE_CONTROL.toLowerCase().trim();
if (["5m", "1h"].includes(normalized))
this._cacheControl = { type: "ephemeral", ttl: normalized };
else this._cacheControl = null;
}
return this._cacheControl;
}

get supportsAgentStreaming() {
return true;
}

/**
* Builds system parameter with cache control if applicable
* @param {string} systemContent - The system prompt content
* @returns {string|array} System parameter for API call
*/
#buildSystemPrompt(systemContent) {
if (!systemContent || !this.cacheControl) return systemContent;
return [
{
type: "text",
text: systemContent,
cache_control: this.cacheControl,
},
];
}

#prepareMessages(messages = []) {
// Extract system prompt and filter out any system messages from the main chat.
let systemPrompt =
Expand Down Expand Up @@ -149,7 +190,7 @@ class AnthropicProvider extends Provider {
{
model: this.model,
max_tokens: 4096,
system: systemPrompt,
system: this.#buildSystemPrompt(systemPrompt),
messages: chats,
stream: true,
...(Array.isArray(functions) && functions?.length > 0
Expand Down Expand Up @@ -276,7 +317,7 @@ class AnthropicProvider extends Provider {
{
model: this.model,
max_tokens: 4096,
system: systemPrompt,
system: this.#buildSystemPrompt(systemPrompt),
messages: chats,
stream: false,
...(Array.isArray(functions) && functions?.length > 0
Expand Down
9 changes: 9 additions & 0 deletions server/utils/helpers/updateENV.js
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,15 @@ const KEY_MAPPING = {
envKey: "ANTHROPIC_MODEL_PREF",
checks: [isNotEmpty],
},
AnthropicCacheControl: {
envKey: "ANTHROPIC_CACHE_CONTROL",
checks: [
(input) =>
["none", "5m", "1h"].includes(input)
? null
: "Invalid cache control. Must be one of: 5m, 1h.",
],
},

GeminiLLMApiKey: {
envKey: "GEMINI_API_KEY",
Expand Down