11import { DurableObject } from "cloudflare:workers" ;
2- import { WorkersAIFluxSTT , type TranscriberSession } from "@cloudflare/voice" ;
32import type { Env } from "../src/types" ;
43import { createOpusPacketizer } from "../src/opus" ;
5- import { getFirstMessagePrompt , getSystemPrompt } from "../src/prompt" ;
6-
7- const AUDIO_OUTPUT_SAMPLE_RATE = 24_000 ;
8- const STT_SAMPLE_RATE = 16_000 ;
9-
10- interface OpenAIChatMessage {
11- role : "system" | "user" | "assistant" ;
12- content : string ;
13- }
4+ import { createSttSession } from "./stt" ;
5+ import { generateOpenAIReply , type ChatMessage } from "./llm" ;
6+ import { synthesizeSpeech } from "./tts" ;
7+ import type { TranscriberSession } from "@cloudflare/voice" ;
148
159function createAuthMessage ( ) {
1610 return {
@@ -37,80 +31,13 @@ function errorMessage(error: unknown): string {
3731 return String ( error ) ;
3832}
3933
40- async function generateOpenAIReply (
41- env : Env ,
42- transcript : string | null ,
43- history : OpenAIChatMessage [ ] ,
44- ) : Promise < string > {
45- if ( ! env . OPENAI_API_KEY ?. trim ( ) ) {
46- throw new Error ( "OPENAI_API_KEY is missing" ) ;
47- }
48-
49- const messages : OpenAIChatMessage [ ] = [
50- { role : "system" , content : getSystemPrompt ( env ) } ,
51- ...history ,
52- ] ;
53-
54- if ( transcript && transcript . trim ( ) . length > 0 ) {
55- messages . push ( { role : "user" , content : transcript } ) ;
56- } else {
57- messages . push ( { role : "user" , content : getFirstMessagePrompt ( env ) } ) ;
58- }
59-
60- const response = await fetch ( "https://api.openai.com/v1/chat/completions" , {
61- method : "POST" ,
62- headers : {
63- Authorization : `Bearer ${ env . OPENAI_API_KEY } ` ,
64- "Content-Type" : "application/json" ,
65- } ,
66- body : JSON . stringify ( {
67- model : env . ELATO_OPENAI_MODEL || "gpt-4.1-mini" ,
68- messages,
69- temperature : 0.7 ,
70- } ) ,
71- } ) ;
72-
73- if ( ! response . ok ) {
74- throw new Error ( `OpenAI request failed: ${ response . status } ${ await response . text ( ) } ` ) ;
75- }
76-
77- const data = ( await response . json ( ) ) as {
78- choices ?: Array < { message ?: { content ?: string } } > ;
79- } ;
80-
81- return (
82- data . choices ?. [ 0 ] ?. message ?. content ?. trim ( ) ||
83- "I heard you, but I do not have a response yet."
84- ) ;
85- }
86-
87- async function synthesizeSpeech ( env : Env , text : string ) : Promise < Response > {
88- if ( ! env . AI ) {
89- throw new Error ( "Cloudflare AI binding is missing" ) ;
90- }
91-
92- return env . AI . run (
93- "@cf/deepgram/aura-2-en" ,
94- {
95- text,
96- speaker : "asteria" ,
97- encoding : "linear16" ,
98- container : "none" ,
99- sample_rate : AUDIO_OUTPUT_SAMPLE_RATE ,
100- } ,
101- {
102- returnRawResponse : true ,
103- } ,
104- ) as Promise < Response > ;
105- }
106-
107- export class ElatoOpenAiVoiceAgent extends DurableObject < Env > {
34+ export class ElatoVoiceSession extends DurableObject < Env > {
10835 private isGenerating = false ;
10936 private opusPromise : Promise < Awaited < ReturnType < typeof createOpusPacketizer > > > | null = null ;
11037 private hasStartedConversation = false ;
11138 private transcriberSession : TranscriberSession | null = null ;
11239 private currentWebSocket : WebSocket | null = null ;
113- private history : OpenAIChatMessage [ ] = [ ] ;
40+ private history : ChatMessage [ ] = [ ] ;
11441
11542 constructor ( ctx : DurableObjectState , env : Env ) {
11643 super ( ctx , env ) ;
@@ -141,23 +68,12 @@ export class ElatoOpenAiVoiceAgent extends DurableObject<Env> {
14168 return ;
14269 }
14370
144- const transcriber = new WorkersAIFluxSTT ( this . env . AI , {
145- sampleRate : STT_SAMPLE_RATE ,
146- eotTimeoutMs : 1000 ,
147- } ) ;
148-
149- this . transcriberSession = transcriber . createSession ( {
150- onInterim : ( text ) => {
151- if ( text . trim ( ) ) {
152- console . log ( `[cloudflare][stt] interim: ${ text } ` ) ;
153- }
71+ this . transcriberSession = createSttSession (
72+ this . env ,
73+ ( text ) => {
74+ console . log ( `[cloudflare][stt] interim: ${ text } ` ) ;
15475 } ,
155- onUtterance : ( transcript ) => {
156- const text = transcript . trim ( ) ;
157- if ( ! text ) {
158- return ;
159- }
160-
76+ ( text ) => {
16177 void this . ctx . blockConcurrencyWhile ( async ( ) => {
16278 if ( ! this . currentWebSocket || this . isGenerating ) {
16379 return ;
@@ -174,7 +90,7 @@ export class ElatoOpenAiVoiceAgent extends DurableObject<Env> {
17490 }
17591 } ) ;
17692 } ,
177- } ) ;
93+ ) ;
17894
17995 console . log ( "[cloudflare][stt] started continuous Flux session" ) ;
18096 }
@@ -262,11 +178,12 @@ export class ElatoOpenAiVoiceAgent extends DurableObject<Env> {
262178 return new Response ( "Expected websocket" , { status : 426 } ) ;
263179 }
264180
181+ this . resetSessionState ( ) ;
182+
265183 const pair = new WebSocketPair ( ) ;
266184 const [ client , server ] = Object . values ( pair ) ;
267185 server . accept ( ) ;
268186
269- this . resetSessionState ( ) ;
270187 this . currentWebSocket = server ;
271188 this . ensureTranscriberSession ( ) ;
272189
0 commit comments