@@ -736,6 +736,7 @@ async function callExternalLLM(cfg, messages, maxTokens, stream = false) {
736736 * Returns { response: string, usage?: object }
737737 */
738738const CF_DEFAULT_MODEL = "@cf/meta/llama-3.3-70b-instruct-fp8-fast" ;
739+ const CF_FAST_MODEL = "@cf/zhipu/glm-4.7-flash" ; // 100+ languages, 131K ctx, much faster for multilingual
739740const CF_LIGHT_MODEL = "@cf/meta/llama-3.1-8b-instruct" ;
740741
741742async function runLLM ( env , tenantId , messages , maxTokens = 1024 ) {
@@ -4100,34 +4101,80 @@ async function handleInterviewQuestion(request, env, ctx) {
41004101 } ) ;
41014102}
41024103
4103- // ── TTS via Cloudflare Workers AI (Deepgram Aura) ──────────────────────────
4104+ // ── TTS via Cloudflare Workers AI ──────────────────────────────────────────
4105+ // Selects the best TTS model based on language:
4106+ // English → Deepgram Aura 2 (upgraded, context-aware)
4107+ // Spanish → Deepgram Aura 2 ES
4108+ // French, Japanese, Korean, Chinese → MeloTTS (multi-lingual)
4109+ // Other → Deepgram Aura 2 EN fallback (text should be English)
4110+ const TTS_MODEL_MAP = {
4111+ en : { model : "@cf/deepgram/aura-2-en" , type : "deepgram" } ,
4112+ es : { model : "@cf/deepgram/aura-2-es" , type : "deepgram" } ,
4113+ fr : { model : "@cf/myshell-ai/melotts" , type : "melotts" } ,
4114+ ja : { model : "@cf/myshell-ai/melotts" , type : "melotts" } ,
4115+ ko : { model : "@cf/myshell-ai/melotts" , type : "melotts" } ,
4116+ zh : { model : "@cf/myshell-ai/melotts" , type : "melotts" } ,
4117+ } ;
4118+ const TTS_DEFAULT = { model : "@cf/deepgram/aura-2-en" , type : "deepgram" } ;
4119+
41044120async function handleTTS ( request , env , ctx ) {
4105- const { text } = await safeJson ( request ) ;
4121+ const body = await safeJson ( request ) ;
4122+ const { text, lang } = body ;
41064123 if ( ! text || typeof text !== 'string' || text . trim ( ) . length === 0 ) {
41074124 throw new ValidationError ( "text is required" ) ;
41084125 }
41094126
41104127 if ( ! env . AI ) throw new ServiceUnavailableError ( "AI" ) ;
41114128
4112- // Limit text to prevent abuse
41134129 const cleanText = text . trim ( ) . slice ( 0 , 5000 ) ;
4114- console . log ( `[TTS] Generating speech: ${ cleanText . length } chars` ) ;
4130+ const langBase = ( lang || 'en' ) . split ( '-' ) [ 0 ] . toLowerCase ( ) ;
4131+ const ttsConfig = TTS_MODEL_MAP [ langBase ] || TTS_DEFAULT ;
41154132
4116- try {
4117- const audioResponse = await env . AI . run ( "@cf/deepgram/aura-1" , {
4118- text : cleanText ,
4119- } ) ;
4133+ console . log ( `[TTS] Generating speech: ${ cleanText . length } chars, lang=${ langBase } , model=${ ttsConfig . model } ` ) ;
41204134
4121- // audioResponse is an ArrayBuffer or ReadableStream
4122- const headers = {
4123- ...CORS_HEADERS ,
4124- "Content-Type" : "audio/mpeg" ,
4125- "Cache-Control" : "public, max-age=3600" ,
4126- } ;
4135+ try {
4136+ if ( ttsConfig . type === "melotts" ) {
4137+ // MeloTTS returns { audio: base64_string }
4138+ const result = await env . AI . run ( ttsConfig . model , {
4139+ prompt : cleanText ,
4140+ lang : langBase ,
4141+ } ) ;
41274142
4128- return new Response ( audioResponse , { status : 200 , headers } ) ;
4143+ if ( result && result . audio ) {
4144+ const binaryString = atob ( result . audio ) ;
4145+ const bytes = new Uint8Array ( binaryString . length ) ;
4146+ for ( let i = 0 ; i < binaryString . length ; i ++ ) {
4147+ bytes [ i ] = binaryString . charCodeAt ( i ) ;
4148+ }
4149+ return new Response ( bytes . buffer , {
4150+ status : 200 ,
4151+ headers : { ...CORS_HEADERS , "Content-Type" : "audio/mpeg" , "Cache-Control" : "public, max-age=3600" } ,
4152+ } ) ;
4153+ }
4154+ throw new Error ( "MeloTTS returned no audio" ) ;
4155+ } else {
4156+ // Deepgram Aura 2 — returns audio stream directly
4157+ const audioResponse = await env . AI . run ( ttsConfig . model , { text : cleanText } ) ;
4158+ return new Response ( audioResponse , {
4159+ status : 200 ,
4160+ headers : { ...CORS_HEADERS , "Content-Type" : "audio/mpeg" , "Cache-Control" : "public, max-age=3600" } ,
4161+ } ) ;
4162+ }
41294163 } catch ( err ) {
4130- console . error ( `[TTS] AI.run error: ${ err . message } ` ) ;
4164+ console . error ( `[TTS] AI.run error (${ ttsConfig . model } ): ${ err . message } ` ) ;
4165+ // Fallback: try Aura 2 EN if a language-specific model failed
4166+ if ( ttsConfig . model !== TTS_DEFAULT . model ) {
4167+ console . log ( `[TTS] Retrying with fallback model: ${ TTS_DEFAULT . model } ` ) ;
4168+ try {
4169+ const fallback = await env . AI . run ( TTS_DEFAULT . model , { text : cleanText } ) ;
4170+ return new Response ( fallback , {
4171+ status : 200 ,
4172+ headers : { ...CORS_HEADERS , "Content-Type" : "audio/mpeg" , "Cache-Control" : "public, max-age=3600" } ,
4173+ } ) ;
4174+ } catch ( e2 ) {
4175+ console . error ( `[TTS] Fallback also failed: ${ e2 . message } ` ) ;
4176+ }
4177+ }
41314178 throw new AppError ( `TTS generation failed: ${ err . message } ` , 500 , "TTS_ERROR" ) ;
41324179 }
41334180}
0 commit comments