cleanup: remove unproven models, keep only Aura 2 upgrade

simpaticohr · simpaticohr · commit bd957d648aea · 2026-05-12T23:16:23.000+05:30
- Removed MeloTTS (reported gibberish/instability for non-English)
- Removed GLM-4.7-Flash constant (not proven better than Llama 3.3-70B)
- Kept Aura 2 EN/ES (genuinely better: pronunciation, latency, clarity)
- Simplified TTS handler: no more MeloTTS base64 decode path
diff --git a/backend/simpatico-ats.js b/backend/simpatico-ats.js
@@ -736,7 +736,6 @@ async function callExternalLLM(cfg, messages, maxTokens, stream = false) {
  * Returns { response: string, usage?: object }
  */
 const CF_DEFAULT_MODEL = "@cf/meta/llama-3.3-70b-instruct-fp8-fast";
-const CF_FAST_MODEL    = "@cf/zhipu/glm-4.7-flash";        // 100+ languages, 131K ctx, much faster for multilingual
 const CF_LIGHT_MODEL   = "@cf/meta/llama-3.1-8b-instruct";
 
 async function runLLM(env, tenantId, messages, maxTokens = 1024) {
@@ -4101,21 +4100,15 @@ async function handleInterviewQuestion(request, env, ctx) {
   });
 }
 
-// ── TTS via Cloudflare Workers AI ──────────────────────────────────────────
-// Selects the best TTS model based on language:
-//   English  → Deepgram Aura 2 (upgraded, context-aware)
-//   Spanish  → Deepgram Aura 2 ES
-//   French, Japanese, Korean, Chinese → MeloTTS (multi-lingual)
-//   Other    → Deepgram Aura 2 EN fallback (text should be English)
+// ── TTS via Cloudflare Workers AI (Deepgram Aura 2) ────────────────────────
+// Upgraded from Aura 1 → Aura 2: better pronunciation, lower latency,
+// enterprise-grade clarity. Aura 2 ES for Spanish interviews.
+// Non-English/non-Spanish languages skip cloud TTS (frontend uses browser speechSynthesis).
 const TTS_MODEL_MAP = {
-  en: { model: "@cf/deepgram/aura-2-en", type: "deepgram" },
-  es: { model: "@cf/deepgram/aura-2-es", type: "deepgram" },
-  fr: { model: "@cf/myshell-ai/melotts",  type: "melotts" },
-  ja: { model: "@cf/myshell-ai/melotts",  type: "melotts" },
-  ko: { model: "@cf/myshell-ai/melotts",  type: "melotts" },
-  zh: { model: "@cf/myshell-ai/melotts",  type: "melotts" },
+  en: "@cf/deepgram/aura-2-en",
+  es: "@cf/deepgram/aura-2-es",
 };
-const TTS_DEFAULT = { model: "@cf/deepgram/aura-2-en", type: "deepgram" };
+const TTS_DEFAULT_MODEL = "@cf/deepgram/aura-2-en";
 
 async function handleTTS(request, env, ctx) {
   const body = await safeJson(request);
@@ -4128,45 +4121,23 @@ async function handleTTS(request, env, ctx) {
 
   const cleanText = text.trim().slice(0, 5000);
   const langBase = (lang || 'en').split('-')[0].toLowerCase();
-  const ttsConfig = TTS_MODEL_MAP[langBase] || TTS_DEFAULT;
+  const ttsModel = TTS_MODEL_MAP[langBase] || TTS_DEFAULT_MODEL;
 
-  console.log(`[TTS] Generating speech: ${cleanText.length} chars, lang=${langBase}, model=${ttsConfig.model}`);
+  console.log(`[TTS] Generating speech: ${cleanText.length} chars, lang=${langBase}, model=${ttsModel}`);
 
   try {
-    if (ttsConfig.type === "melotts") {
-      // MeloTTS returns { audio: base64_string }
-      const result = await env.AI.run(ttsConfig.model, {
-        prompt: cleanText,
-        lang: langBase,
-      });
-
-      if (result && result.audio) {
-        const binaryString = atob(result.audio);
-        const bytes = new Uint8Array(binaryString.length);
-        for (let i = 0; i < binaryString.length; i++) {
-          bytes[i] = binaryString.charCodeAt(i);
-        }
-        return new Response(bytes.buffer, {
-          status: 200,
-          headers: { ...CORS_HEADERS, "Content-Type": "audio/mpeg", "Cache-Control": "public, max-age=3600" },
-        });
-      }
-      throw new Error("MeloTTS returned no audio");
-    } else {
-      // Deepgram Aura 2 — returns audio stream directly
-      const audioResponse = await env.AI.run(ttsConfig.model, { text: cleanText });
-      return new Response(audioResponse, {
-        status: 200,
-        headers: { ...CORS_HEADERS, "Content-Type": "audio/mpeg", "Cache-Control": "public, max-age=3600" },
-      });
-    }
+    const audioResponse = await env.AI.run(ttsModel, { text: cleanText });
+    return new Response(audioResponse, {
+      status: 200,
+      headers: { ...CORS_HEADERS, "Content-Type": "audio/mpeg", "Cache-Control": "public, max-age=3600" },
+    });
   } catch (err) {
-    console.error(`[TTS] AI.run error (${ttsConfig.model}): ${err.message}`);
-    // Fallback: try Aura 2 EN if a language-specific model failed
-    if (ttsConfig.model !== TTS_DEFAULT.model) {
-      console.log(`[TTS] Retrying with fallback model: ${TTS_DEFAULT.model}`);
+    console.error(`[TTS] AI.run error (${ttsModel}): ${err.message}`);
+    // Fallback: try default EN model if Spanish model failed
+    if (ttsModel !== TTS_DEFAULT_MODEL) {
+      console.log(`[TTS] Retrying with fallback: ${TTS_DEFAULT_MODEL}`);
       try {
-        const fallback = await env.AI.run(TTS_DEFAULT.model, { text: cleanText });
+        const fallback = await env.AI.run(TTS_DEFAULT_MODEL, { text: cleanText });
         return new Response(fallback, {
           status: 200,
           headers: { ...CORS_HEADERS, "Content-Type": "audio/mpeg", "Cache-Control": "public, max-age=3600" },