@@ -736,7 +736,6 @@ async function callExternalLLM(cfg, messages, maxTokens, stream = false) {
736736 * Returns { response: string, usage?: object }
737737 */
738738const CF_DEFAULT_MODEL = "@cf/meta/llama-3.3-70b-instruct-fp8-fast" ;
739- const CF_FAST_MODEL = "@cf/zhipu/glm-4.7-flash" ; // 100+ languages, 131K ctx, much faster for multilingual
740739const CF_LIGHT_MODEL = "@cf/meta/llama-3.1-8b-instruct" ;
741740
742741async function runLLM ( env , tenantId , messages , maxTokens = 1024 ) {
@@ -4101,21 +4100,15 @@ async function handleInterviewQuestion(request, env, ctx) {
41014100 } ) ;
41024101}
41034102
4104- // ── TTS via Cloudflare Workers AI ──────────────────────────────────────────
4105- // Selects the best TTS model based on language:
4106- // English → Deepgram Aura 2 (upgraded, context-aware)
4107- // Spanish → Deepgram Aura 2 ES
4108- // French, Japanese, Korean, Chinese → MeloTTS (multi-lingual)
4109- // Other → Deepgram Aura 2 EN fallback (text should be English)
4103+ // ── TTS via Cloudflare Workers AI (Deepgram Aura 2) ────────────────────────
4104+ // Upgraded from Aura 1 → Aura 2: better pronunciation, lower latency,
4105+ // enterprise-grade clarity. Aura 2 ES for Spanish interviews.
4106+ // Non-English/non-Spanish languages skip cloud TTS (frontend uses browser speechSynthesis).
41104107const TTS_MODEL_MAP = {
4111- en : { model : "@cf/deepgram/aura-2-en" , type : "deepgram" } ,
4112- es : { model : "@cf/deepgram/aura-2-es" , type : "deepgram" } ,
4113- fr : { model : "@cf/myshell-ai/melotts" , type : "melotts" } ,
4114- ja : { model : "@cf/myshell-ai/melotts" , type : "melotts" } ,
4115- ko : { model : "@cf/myshell-ai/melotts" , type : "melotts" } ,
4116- zh : { model : "@cf/myshell-ai/melotts" , type : "melotts" } ,
4108+ en : "@cf/deepgram/aura-2-en" ,
4109+ es : "@cf/deepgram/aura-2-es" ,
41174110} ;
4118- const TTS_DEFAULT = { model : "@cf/deepgram/aura-2-en" , type : "deepgram" } ;
4111+ const TTS_DEFAULT_MODEL = "@cf/deepgram/aura-2-en" ;
41194112
41204113async function handleTTS ( request , env , ctx ) {
41214114 const body = await safeJson ( request ) ;
@@ -4128,45 +4121,23 @@ async function handleTTS(request, env, ctx) {
41284121
41294122 const cleanText = text . trim ( ) . slice ( 0 , 5000 ) ;
41304123 const langBase = ( lang || 'en' ) . split ( '-' ) [ 0 ] . toLowerCase ( ) ;
4131- const ttsConfig = TTS_MODEL_MAP [ langBase ] || TTS_DEFAULT ;
4124+ const ttsModel = TTS_MODEL_MAP [ langBase ] || TTS_DEFAULT_MODEL ;
41324125
4133- console . log ( `[TTS] Generating speech: ${ cleanText . length } chars, lang=${ langBase } , model=${ ttsConfig . model } ` ) ;
4126+ console . log ( `[TTS] Generating speech: ${ cleanText . length } chars, lang=${ langBase } , model=${ ttsModel } ` ) ;
41344127
41354128 try {
4136- if ( ttsConfig . type === "melotts" ) {
4137- // MeloTTS returns { audio: base64_string }
4138- const result = await env . AI . run ( ttsConfig . model , {
4139- prompt : cleanText ,
4140- lang : langBase ,
4141- } ) ;
4142-
4143- if ( result && result . audio ) {
4144- const binaryString = atob ( result . audio ) ;
4145- const bytes = new Uint8Array ( binaryString . length ) ;
4146- for ( let i = 0 ; i < binaryString . length ; i ++ ) {
4147- bytes [ i ] = binaryString . charCodeAt ( i ) ;
4148- }
4149- return new Response ( bytes . buffer , {
4150- status : 200 ,
4151- headers : { ...CORS_HEADERS , "Content-Type" : "audio/mpeg" , "Cache-Control" : "public, max-age=3600" } ,
4152- } ) ;
4153- }
4154- throw new Error ( "MeloTTS returned no audio" ) ;
4155- } else {
4156- // Deepgram Aura 2 — returns audio stream directly
4157- const audioResponse = await env . AI . run ( ttsConfig . model , { text : cleanText } ) ;
4158- return new Response ( audioResponse , {
4159- status : 200 ,
4160- headers : { ...CORS_HEADERS , "Content-Type" : "audio/mpeg" , "Cache-Control" : "public, max-age=3600" } ,
4161- } ) ;
4162- }
4129+ const audioResponse = await env . AI . run ( ttsModel , { text : cleanText } ) ;
4130+ return new Response ( audioResponse , {
4131+ status : 200 ,
4132+ headers : { ...CORS_HEADERS , "Content-Type" : "audio/mpeg" , "Cache-Control" : "public, max-age=3600" } ,
4133+ } ) ;
41634134 } catch ( err ) {
4164- console . error ( `[TTS] AI.run error (${ ttsConfig . model } ): ${ err . message } ` ) ;
4165- // Fallback: try Aura 2 EN if a language-specific model failed
4166- if ( ttsConfig . model !== TTS_DEFAULT . model ) {
4167- console . log ( `[TTS] Retrying with fallback model : ${ TTS_DEFAULT . model } ` ) ;
4135+ console . error ( `[TTS] AI.run error (${ ttsModel } ): ${ err . message } ` ) ;
4136+ // Fallback: try default EN model if Spanish model failed
4137+ if ( ttsModel !== TTS_DEFAULT_MODEL ) {
4138+ console . log ( `[TTS] Retrying with fallback: ${ TTS_DEFAULT_MODEL } ` ) ;
41684139 try {
4169- const fallback = await env . AI . run ( TTS_DEFAULT . model , { text : cleanText } ) ;
4140+ const fallback = await env . AI . run ( TTS_DEFAULT_MODEL , { text : cleanText } ) ;
41704141 return new Response ( fallback , {
41714142 status : 200 ,
41724143 headers : { ...CORS_HEADERS , "Content-Type" : "audio/mpeg" , "Cache-Control" : "public, max-age=3600" } ,
0 commit comments