Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions api/_utils/_aiModels.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import { openai } from "@ai-sdk/openai";
import { anthropic } from "@ai-sdk/anthropic";
import { google } from "@ai-sdk/google";
import type { LanguageModel } from "ai";
import type { LanguageModel, ProviderOptions } from "ai";
import { mergeProviderOptions } from "./prompt-caching.js";

// ============================================================================
// AI Model Types and Constants (duplicated from src/types/aiModels.ts)
Expand Down Expand Up @@ -54,7 +55,7 @@ export const getModelInstance = (model: SupportedModel): LanguageModel => {

export function getOpenAIProviderOptions(
model: SupportedModel
): { openai: { reasoningEffort?: OpenAIReasoningEffort } } | undefined {
): ProviderOptions | undefined {
if (AI_MODELS[model].provider !== "OpenAI") {
return undefined;
}
Expand All @@ -76,3 +77,12 @@ export function getOpenAIProviderOptions(
openai: openaiOptions,
};
}

export function getPromptOptimizedProviderOptions(
model: SupportedModel,
...options: Array<ProviderOptions | undefined>
): ProviderOptions | undefined {
const openAIOptions = getOpenAIProviderOptions(model);

return mergeProviderOptions(openAIOptions, ...options);
}
108 changes: 108 additions & 0 deletions api/_utils/prompt-caching.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
import type {
LanguageModel,
ModelMessage,
ProviderOptions,
SystemModelMessage,
} from "ai";

export const ANTHROPIC_PROMPT_CACHE_PROVIDER_OPTIONS = {
anthropic: {
cacheControl: { type: "ephemeral" },
},
} as const satisfies ProviderOptions;

export function mergeProviderOptions(
...options: Array<ProviderOptions | undefined>
): ProviderOptions | undefined {
const merged: ProviderOptions = {};

for (const option of options) {
if (!option) continue;

for (const [provider, providerOptions] of Object.entries(option)) {
merged[provider] = {
...((merged[provider] as Record<string, unknown> | undefined) ?? {}),
...providerOptions,
};
}
}

return Object.keys(merged).length > 0 ? merged : undefined;
}

const textChars = (content: ModelMessage["content"]): number => {
if (typeof content === "string") {
return content.length;
}

return content.reduce((total, part) => {
if ("text" in part && typeof part.text === "string") {
return total + part.text.length;
}

return total;
}, 0);
};

export function withPromptCacheForLongContent<
T extends { content: ModelMessage["content"]; providerOptions?: ProviderOptions },
>(value: T, minChars = 3000): T & { providerOptions?: ProviderOptions } {
return textChars(value.content) >= minChars ? withPromptCache(value) : value;
}

export function withPromptCache<T extends { providerOptions?: ProviderOptions }>(
value: T
): T & { providerOptions: ProviderOptions } {
return {
...value,
providerOptions: mergeProviderOptions(
value.providerOptions,
ANTHROPIC_PROMPT_CACHE_PROVIDER_OPTIONS
),
} as T & { providerOptions: ProviderOptions };
}

export function createCachedSystemMessage(
content: string
): SystemModelMessage {
return withPromptCache({
role: "system",
content,
});
}

export function isAnthropicModel(model: LanguageModel): boolean {
if (typeof model === "string") {
return /anthropic|claude/i.test(model);
}

const modelDetails = model as { provider?: string; modelId?: string };
return [modelDetails.provider, modelDetails.modelId].some(
(value) => typeof value === "string" && /anthropic|claude/i.test(value)
);
}

export function addPromptCacheToLastMessage(
messages: ModelMessage[],
model: LanguageModel
): ModelMessage[] {
if (messages.length === 0 || !isAnthropicModel(model)) {
return messages;
}

return messages.map((message, index) =>
index === messages.length - 1 ? withPromptCache(message) : message
);
}

export function preparePromptCachingStep({
messages,
model,
}: {
messages: ModelMessage[];
model: LanguageModel;
}): { messages: ModelMessage[] } {
return {
messages: addPromptCacheToLastMessage(messages, model),
};
}
13 changes: 2 additions & 11 deletions api/_utils/ryo-conversation.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ import {
type ChatToolProfile,
type ChatToolsContext,
} from "../chat/tools/index.js";
import { createCachedSystemMessage } from "./prompt-caching.js";

export interface RyoConversationSystemState {
username?: string | null;
Expand Down Expand Up @@ -160,12 +161,6 @@ export interface PreparedRyoConversation {
userTimeZone?: string;
}

const CACHE_CONTROL_OPTIONS = {
providerOptions: {
anthropic: { cacheControl: { type: "ephemeral" } },
},
} as const;

const CHANNEL_PROMPT_SECTIONS = {
chat: [
CORE_PRIORITY_INSTRUCTIONS,
Expand Down Expand Up @@ -712,11 +707,7 @@ export async function prepareRyoConversationModelInput(
});

const enrichedMessages = [
{
role: "system" as const,
content: staticSystemPrompt,
...CACHE_CONTROL_OPTIONS,
},
createCachedSystemMessage(staticSystemPrompt),
...(dynamicSystemPrompt
? [{ role: "system" as const, content: dynamicSystemPrompt }]
: []),
Expand Down
28 changes: 22 additions & 6 deletions api/ai/extract-memories.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ import {
markDailyNoteProcessed,
MAX_MEMORIES_PER_USER,
} from "../_utils/_memory.js";
import {
createCachedSystemMessage,
withPromptCacheForLongContent,
} from "../_utils/prompt-caching.js";

export const runtime = "nodejs";
export const maxDuration = 60;
Expand Down Expand Up @@ -320,9 +324,15 @@ export async function extractMemoriesFromConversation({
const { object: result } = await generateObject({
model: google("gemini-3-flash-preview"),
schema: extractionSchema,
prompt:
`${EXTRACTION_PROMPT}${existingStateSection}\n\n--- CONVERSATION ---\n${conversationText}\n--- END CONVERSATION ---\n\n` +
`Extract up to 8 daily notes and up to ${maxLongTerm} long-term memories. Return empty arrays if nothing qualifies.`,
messages: [
createCachedSystemMessage(EXTRACTION_PROMPT),
withPromptCacheForLongContent({
role: "user",
content:
`${existingStateSection}\n\n--- CONVERSATION ---\n${conversationText}\n--- END CONVERSATION ---\n\n` +
`Extract up to 8 daily notes and up to ${maxLongTerm} long-term memories. Return empty arrays if nothing qualifies.`,
}),
],
temperature: 0.3,
});

Expand Down Expand Up @@ -400,9 +410,15 @@ export async function extractMemoriesFromConversation({
const { object: consolidated } = await generateObject({
model: google("gemini-3-flash-preview"),
schema: consolidationSchema,
prompt:
`${CONSOLIDATION_PROMPT}\n\nNEW:\nSummary: ${mem.summary}\nContent: ${mem.content}\n\nEXISTING:\n${existingContentText}\n\n` +
"Merge into one clean, deduplicated entry.",
messages: [
createCachedSystemMessage(CONSOLIDATION_PROMPT),
withPromptCacheForLongContent({
role: "user",
content:
`NEW:\nSummary: ${mem.summary}\nContent: ${mem.content}\n\nEXISTING:\n${existingContentText}\n\n` +
"Merge into one clean, deduplicated entry.",
}),
],
temperature: 0.3,
});

Expand Down
20 changes: 18 additions & 2 deletions api/ai/process-daily-notes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ import {
MAX_MEMORIES_PER_USER,
CANONICAL_MEMORY_KEYS,
} from "../_utils/_memory.js";
import {
createCachedSystemMessage,
withPromptCacheForLongContent,
} from "../_utils/prompt-caching.js";

export const runtime = "nodejs";
export const maxDuration = 60;
Expand Down Expand Up @@ -403,7 +407,13 @@ async function _processSingleDayBatch(
const { object: result } = await generateObject({
model: google("gemini-3-flash-preview"),
schema: dailyNotesExtractionSchema,
prompt: `${DAILY_NOTES_EXTRACTION_PROMPT}${existingStateSection}\n\n--- DAILY NOTES ---\n${dailyNotesText}\n--- END DAILY NOTES ---\n\nExtract up to ${Math.max(maxExtract, 3)} long-term memories. For existing keys, you may suggest updates via relatedKeys. Return empty array if nothing qualifies.`,
messages: [
createCachedSystemMessage(DAILY_NOTES_EXTRACTION_PROMPT),
withPromptCacheForLongContent({
role: "user",
content: `${existingStateSection}\n\n--- DAILY NOTES ---\n${dailyNotesText}\n--- END DAILY NOTES ---\n\nExtract up to ${Math.max(maxExtract, 3)} long-term memories. For existing keys, you may suggest updates via relatedKeys. Return empty array if nothing qualifies.`,
}),
],
temperature: 0.3,
});

Expand Down Expand Up @@ -457,7 +467,13 @@ async function _processSingleDayBatch(
const { object: consolidated } = await generateObject({
model: google("gemini-3-flash-preview"),
schema: consolidationSchema,
prompt: `${CONSOLIDATION_PROMPT}\n\nNEW:\nSummary: ${mem.summary}\nContent: ${mem.content}\n\nEXISTING:\n${existingContentText}\n\nMerge into one clean, deduplicated entry.`,
messages: [
createCachedSystemMessage(CONSOLIDATION_PROMPT),
withPromptCacheForLongContent({
role: "user",
content: `NEW:\nSummary: ${mem.summary}\nContent: ${mem.content}\n\nEXISTING:\n${existingContentText}\n\nMerge into one clean, deduplicated entry.`,
}),
],
temperature: 0.3,
});

Expand Down
3 changes: 2 additions & 1 deletion api/ai/ryo-reply.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import { roomExists, addMessage, generateId, getCurrentTimestamp } from "../room
import { broadcastNewMessage } from "../rooms/_helpers/_pusher.js";
import type { Message } from "../rooms/_helpers/_types.js";
import { apiHandler } from "../_utils/api-handler.js";
import { createCachedSystemMessage } from "../_utils/prompt-caching.js";

export const runtime = "nodejs";

Expand Down Expand Up @@ -111,7 +112,7 @@ export default apiHandler<RyoReplyRequest>(
}

const messages = [
{ role: "system" as const, content: STATIC_SYSTEM_PROMPT },
createCachedSystemMessage(STATIC_SYSTEM_PROMPT),
systemState?.chatRoomContext
? {
role: "system" as const,
Expand Down
3 changes: 2 additions & 1 deletion api/applet-ai.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import * as RateLimit from "./_utils/_rate-limit.js";
import { getClientIp } from "./_utils/_rate-limit.js";
import { apiHandler } from "./_utils/api-handler.js";
import { isAllowedAppHost } from "./_utils/runtime-config.js";
import { createCachedSystemMessage } from "./_utils/prompt-caching.js";

export const runtime = "nodejs";
export const maxDuration = 60;
Expand Down Expand Up @@ -236,7 +237,7 @@ const buildModelMessages = (
context?: string
): ModelMessage[] => {
const messages: ModelMessage[] = [
{ role: "system", content: APPLET_SYSTEM_PROMPT.trim() },
createCachedSystemMessage(APPLET_SYSTEM_PROMPT.trim()),
];

if (context) {
Expand Down
28 changes: 20 additions & 8 deletions api/chat.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import { google } from "@ai-sdk/google";
import {
DEFAULT_MODEL,
SUPPORTED_AI_MODELS,
getOpenAIProviderOptions,
getPromptOptimizedProviderOptions,
type SupportedModel,
} from "./_utils/_aiModels.js";
import {
Expand All @@ -21,6 +21,10 @@ import {
type RyoConversationSystemState,
type SimpleConversationMessage,
} from "./_utils/ryo-conversation.js";
import {
createCachedSystemMessage,
preparePromptCachingStep,
} from "./_utils/prompt-caching.js";
import { checkAndIncrementAIMessageCount } from "./_utils/_rate-limit.js";
import { apiHandler } from "./_utils/api-handler.js";
import { getHeader } from "./_utils/request-helpers.js";
Expand Down Expand Up @@ -245,7 +249,9 @@ export default apiHandler<{
model: google("gemini-3-flash-preview"),
temperature: 1,
maxOutputTokens: 2000,
system: `You are Ryo, a friendly AI assistant. You're greeting a returning user at the start of a new chat.
messages: [
createCachedSystemMessage(
`You are Ryo, a friendly AI assistant. You're greeting a returning user at the start of a new chat.

Your style:
- Lowercase, casual, warm
Expand All @@ -256,10 +262,6 @@ Your style:
- Be specific — reference something from their memories or recent activity
- Mix it up: sometimes ask a question, sometimes share an observation, sometimes reference a shared interest

It's ${dayOfWeek} ${sfTime}. The user's name is "${username}".

${greetingMemoryContext}

Generate ONE short proactive greeting. Pick one interesting angle from the context — a recent topic, a memory, something timely — and use it naturally. Don't try to cover everything.

Examples of good greetings:
Expand All @@ -269,7 +271,16 @@ Examples of good greetings:
- "hey ryo. happy friday — any plans?"

Do NOT start with generic greetings like "hey! i'm ryo" or "welcome back". Jump straight into something specific and interesting. Output ONLY the greeting text, nothing else.`,
prompt: "Generate a proactive greeting.",
),
{
role: "system",
content: `It's ${dayOfWeek} ${sfTime}. The user's name is "${username}".\n\n${greetingMemoryContext}`,
},
{
role: "user",
content: "Generate a proactive greeting.",
},
],
});

const greeting = text.trim();
Expand Down Expand Up @@ -330,13 +341,14 @@ Do NOT start with generic greetings like "hey! i'm ryo" or "welcome back". Jump
experimental_transform: smoothStream({
chunking: /[\u4E00-\u9FFF]|\S+\s+/,
}),
prepareStep: preparePromptCachingStep,
headers: {
// Enable fine-grained tool streaming for Anthropic models
...(model.startsWith("claude")
? { "anthropic-beta": "fine-grained-tool-streaming-2025-05-14" }
: {}),
},
providerOptions: getOpenAIProviderOptions(model as SupportedModel),
providerOptions: getPromptOptimizedProviderOptions(model as SupportedModel),
});

// Set CORS headers
Expand Down
6 changes: 4 additions & 2 deletions api/cron/telegram-heartbeat.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,11 @@ import {
prepareRyoConversationModelInput,
type SimpleConversationMessage,
} from "../_utils/ryo-conversation.js";
import { preparePromptCachingStep } from "../_utils/prompt-caching.js";
import {
TELEGRAM_DEFAULT_MODEL,
SUPPORTED_AI_MODELS,
getOpenAIProviderOptions,
getPromptOptimizedProviderOptions,
type SupportedModel,
} from "../_utils/_aiModels.js";
import { getHeader } from "../_utils/request-helpers.js";
Expand Down Expand Up @@ -413,7 +414,8 @@ export default async function handler(
temperature: 0.7,
maxOutputTokens: 4000,
stopWhen: stepCountIs(6),
providerOptions: getOpenAIProviderOptions(telegramModel),
prepareStep: preparePromptCachingStep,
providerOptions: getPromptOptimizedProviderOptions(telegramModel),
onStepFinish: async (stepResult) => {
if (stepResult.toolResults.length > 0) {
logger.info("Telegram heartbeat completed tool step", {
Expand Down
Loading
Loading