Skip to content

Commit 299197e

Browse files
committed
separate parts
1 parent 4a6368b commit 299197e

7 files changed

Lines changed: 127 additions & 104 deletions

File tree

server-cloudflare/models/llm.ts

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
import type { Env } from "../src/types";
2+
import { getFirstMessagePrompt, getSystemPrompt } from "../src/prompt";
3+
4+
export interface ChatMessage {
5+
role: "system" | "user" | "assistant";
6+
content: string;
7+
}
8+
9+
export async function generateOpenAIReply(
10+
env: Env,
11+
transcript: string | null,
12+
history: ChatMessage[],
13+
): Promise<string> {
14+
if (!env.OPENAI_API_KEY?.trim()) {
15+
throw new Error("OPENAI_API_KEY is missing");
16+
}
17+
18+
const messages: ChatMessage[] = [
19+
{ role: "system", content: getSystemPrompt(env) },
20+
...history,
21+
];
22+
23+
if (transcript && transcript.trim().length > 0) {
24+
messages.push({ role: "user", content: transcript });
25+
} else {
26+
messages.push({ role: "user", content: getFirstMessagePrompt(env) });
27+
}
28+
29+
const response = await fetch("https://api.openai.com/v1/chat/completions", {
30+
method: "POST",
31+
headers: {
32+
Authorization: `Bearer ${env.OPENAI_API_KEY}`,
33+
"Content-Type": "application/json",
34+
},
35+
body: JSON.stringify({
36+
model: env.ELATO_OPENAI_MODEL || "gpt-4.1-mini",
37+
messages,
38+
temperature: 0.7,
39+
}),
40+
});
41+
42+
if (!response.ok) {
43+
throw new Error(`OpenAI request failed: ${response.status} ${await response.text()}`);
44+
}
45+
46+
const data = (await response.json()) as {
47+
choices?: Array<{ message?: { content?: string } }>;
48+
};
49+
50+
return (
51+
data.choices?.[0]?.message?.content?.trim() ||
52+
"I heard you, but I do not have a response yet."
53+
);
54+
}
Lines changed: 14 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,10 @@
11
import { DurableObject } from "cloudflare:workers";
2-
import { WorkersAIFluxSTT, type TranscriberSession } from "@cloudflare/voice";
32
import type { Env } from "../src/types";
43
import { createOpusPacketizer } from "../src/opus";
5-
import { getFirstMessagePrompt, getSystemPrompt } from "../src/prompt";
6-
7-
const AUDIO_OUTPUT_SAMPLE_RATE = 24_000;
8-
const STT_SAMPLE_RATE = 16_000;
9-
10-
interface OpenAIChatMessage {
11-
role: "system" | "user" | "assistant";
12-
content: string;
13-
}
4+
import { createSttSession } from "./stt";
5+
import { generateOpenAIReply, type ChatMessage } from "./llm";
6+
import { synthesizeSpeech } from "./tts";
7+
import type { TranscriberSession } from "@cloudflare/voice";
148

159
function createAuthMessage() {
1610
return {
@@ -37,80 +31,13 @@ function errorMessage(error: unknown): string {
3731
return String(error);
3832
}
3933

40-
async function generateOpenAIReply(
41-
env: Env,
42-
transcript: string | null,
43-
history: OpenAIChatMessage[],
44-
): Promise<string> {
45-
if (!env.OPENAI_API_KEY?.trim()) {
46-
throw new Error("OPENAI_API_KEY is missing");
47-
}
48-
49-
const messages: OpenAIChatMessage[] = [
50-
{ role: "system", content: getSystemPrompt(env) },
51-
...history,
52-
];
53-
54-
if (transcript && transcript.trim().length > 0) {
55-
messages.push({ role: "user", content: transcript });
56-
} else {
57-
messages.push({ role: "user", content: getFirstMessagePrompt(env) });
58-
}
59-
60-
const response = await fetch("https://api.openai.com/v1/chat/completions", {
61-
method: "POST",
62-
headers: {
63-
Authorization: `Bearer ${env.OPENAI_API_KEY}`,
64-
"Content-Type": "application/json",
65-
},
66-
body: JSON.stringify({
67-
model: env.ELATO_OPENAI_MODEL || "gpt-4.1-mini",
68-
messages,
69-
temperature: 0.7,
70-
}),
71-
});
72-
73-
if (!response.ok) {
74-
throw new Error(`OpenAI request failed: ${response.status} ${await response.text()}`);
75-
}
76-
77-
const data = (await response.json()) as {
78-
choices?: Array<{ message?: { content?: string } }>;
79-
};
80-
81-
return (
82-
data.choices?.[0]?.message?.content?.trim() ||
83-
"I heard you, but I do not have a response yet."
84-
);
85-
}
86-
87-
async function synthesizeSpeech(env: Env, text: string): Promise<Response> {
88-
if (!env.AI) {
89-
throw new Error("Cloudflare AI binding is missing");
90-
}
91-
92-
return env.AI.run(
93-
"@cf/deepgram/aura-2-en",
94-
{
95-
text,
96-
speaker: "asteria",
97-
encoding: "linear16",
98-
container: "none",
99-
sample_rate: AUDIO_OUTPUT_SAMPLE_RATE,
100-
},
101-
{
102-
returnRawResponse: true,
103-
},
104-
) as Promise<Response>;
105-
}
106-
107-
export class ElatoOpenAiVoiceAgent extends DurableObject<Env> {
34+
export class ElatoVoiceSession extends DurableObject<Env> {
10835
private isGenerating = false;
10936
private opusPromise: Promise<Awaited<ReturnType<typeof createOpusPacketizer>>> | null = null;
11037
private hasStartedConversation = false;
11138
private transcriberSession: TranscriberSession | null = null;
11239
private currentWebSocket: WebSocket | null = null;
113-
private history: OpenAIChatMessage[] = [];
40+
private history: ChatMessage[] = [];
11441

11542
constructor(ctx: DurableObjectState, env: Env) {
11643
super(ctx, env);
@@ -141,23 +68,12 @@ export class ElatoOpenAiVoiceAgent extends DurableObject<Env> {
14168
return;
14269
}
14370

144-
const transcriber = new WorkersAIFluxSTT(this.env.AI, {
145-
sampleRate: STT_SAMPLE_RATE,
146-
eotTimeoutMs: 1000,
147-
});
148-
149-
this.transcriberSession = transcriber.createSession({
150-
onInterim: (text) => {
151-
if (text.trim()) {
152-
console.log(`[cloudflare][stt] interim: ${text}`);
153-
}
71+
this.transcriberSession = createSttSession(
72+
this.env,
73+
(text) => {
74+
console.log(`[cloudflare][stt] interim: ${text}`);
15475
},
155-
onUtterance: (transcript) => {
156-
const text = transcript.trim();
157-
if (!text) {
158-
return;
159-
}
160-
76+
(text) => {
16177
void this.ctx.blockConcurrencyWhile(async () => {
16278
if (!this.currentWebSocket || this.isGenerating) {
16379
return;
@@ -174,7 +90,7 @@ export class ElatoOpenAiVoiceAgent extends DurableObject<Env> {
17490
}
17591
});
17692
},
177-
});
93+
);
17894

17995
console.log("[cloudflare][stt] started continuous Flux session");
18096
}
@@ -262,11 +178,12 @@ export class ElatoOpenAiVoiceAgent extends DurableObject<Env> {
262178
return new Response("Expected websocket", { status: 426 });
263179
}
264180

181+
this.resetSessionState();
182+
265183
const pair = new WebSocketPair();
266184
const [client, server] = Object.values(pair);
267185
server.accept();
268186

269-
this.resetSessionState();
270187
this.currentWebSocket = server;
271188
this.ensureTranscriberSession();
272189

server-cloudflare/models/stt.ts

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
import { WorkersAIFluxSTT, type TranscriberSession } from "@cloudflare/voice";
2+
import type { Env } from "../src/types";
3+
4+
const STT_SAMPLE_RATE = 16_000;
5+
6+
export function createSttSession(
7+
env: Env,
8+
onInterim: (text: string) => void,
9+
onUtterance: (transcript: string) => void,
10+
): TranscriberSession {
11+
const transcriber = new WorkersAIFluxSTT(env.AI, {
12+
sampleRate: STT_SAMPLE_RATE,
13+
eotTimeoutMs: 1000,
14+
});
15+
16+
return transcriber.createSession({
17+
onInterim: (text) => {
18+
if (text.trim()) {
19+
onInterim(text);
20+
}
21+
},
22+
onUtterance: (transcript) => {
23+
const text = transcript.trim();
24+
if (text) {
25+
onUtterance(text);
26+
}
27+
},
28+
});
29+
}

server-cloudflare/models/tts.ts

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
import type { Env } from "../src/types";
2+
3+
const AUDIO_OUTPUT_SAMPLE_RATE = 24_000;
4+
5+
export async function synthesizeSpeech(env: Env, text: string): Promise<Response> {
6+
if (!env.AI) {
7+
throw new Error("Cloudflare AI binding is missing");
8+
}
9+
10+
return env.AI.run(
11+
"@cf/deepgram/aura-2-en",
12+
{
13+
text,
14+
speaker: "asteria",
15+
encoding: "linear16",
16+
container: "none",
17+
sample_rate: AUDIO_OUTPUT_SAMPLE_RATE,
18+
},
19+
{
20+
returnRawResponse: true,
21+
},
22+
) as Promise<Response>;
23+
}

server-cloudflare/src/index.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import type { Env } from "./types";
22

3-
export { ElatoOpenAiVoiceAgent } from "../models/openai";
3+
export { ElatoVoiceSession } from "../models/session";
44

55
export default {
66
async fetch(request: Request, env: Env): Promise<Response> {
@@ -13,8 +13,8 @@ export default {
1313
if (url.pathname === "/ws/esp32" || url.pathname.startsWith("/ws/esp32/")) {
1414
/* Add AUTH here */
1515

16-
const stub = env.ElatoOpenAiVoiceAgent.get(
17-
env.ElatoOpenAiVoiceAgent.newUniqueId(),
16+
const stub = env.ElatoVoiceSession.get(
17+
env.ElatoVoiceSession.newUniqueId(),
1818
);
1919
return stub.fetch(request);
2020
}

server-cloudflare/src/types.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,5 +5,5 @@ export interface Env {
55
ELATO_OPENAI_MODEL?: string;
66
ELATO_OPENAI_SYSTEM_PROMPT?: string;
77
ELATO_OPENAI_FIRST_MESSAGE?: string;
8-
ElatoOpenAiVoiceAgent: DurableObjectNamespace;
8+
ElatoVoiceSession: DurableObjectNamespace;
99
}

server-cloudflare/wrangler.toml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,12 @@ compatibility_flags = ["nodejs_compat"]
77
binding = "AI"
88

99
[[durable_objects.bindings]]
10-
name = "ElatoOpenAiVoiceAgent"
11-
class_name = "ElatoOpenAiVoiceAgent"
10+
name = "ElatoVoiceSession"
11+
class_name = "ElatoVoiceSession"
1212

1313
[[migrations]]
1414
tag = "v1"
15-
new_sqlite_classes = ["ElatoOpenAiVoiceAgent"]
15+
new_sqlite_classes = ["ElatoVoiceSession"]
1616

1717
[observability]
1818
[observability.logs]

0 commit comments

Comments
 (0)