Skip to content

Commit bd957d6

Browse files
committed
cleanup: remove unproven models, keep only Aura 2 upgrade
- Removed MeloTTS (reported gibberish/instability for non-English) - Removed GLM-4.7-Flash constant (not proven better than Llama 3.3-70B) - Kept Aura 2 EN/ES (genuinely better: pronunciation, latency, clarity) - Simplified TTS handler: no more MeloTTS base64 decode path
1 parent c2f9679 commit bd957d6

1 file changed

Lines changed: 19 additions & 48 deletions

File tree

backend/simpatico-ats.js

Lines changed: 19 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -736,7 +736,6 @@ async function callExternalLLM(cfg, messages, maxTokens, stream = false) {
736736
* Returns { response: string, usage?: object }
737737
*/
738738
const CF_DEFAULT_MODEL = "@cf/meta/llama-3.3-70b-instruct-fp8-fast";
739-
const CF_FAST_MODEL = "@cf/zhipu/glm-4.7-flash"; // 100+ languages, 131K ctx, much faster for multilingual
740739
const CF_LIGHT_MODEL = "@cf/meta/llama-3.1-8b-instruct";
741740

742741
async function runLLM(env, tenantId, messages, maxTokens = 1024) {
@@ -4101,21 +4100,15 @@ async function handleInterviewQuestion(request, env, ctx) {
41014100
});
41024101
}
41034102

4104-
// ── TTS via Cloudflare Workers AI ──────────────────────────────────────────
4105-
// Selects the best TTS model based on language:
4106-
// English → Deepgram Aura 2 (upgraded, context-aware)
4107-
// Spanish → Deepgram Aura 2 ES
4108-
// French, Japanese, Korean, Chinese → MeloTTS (multi-lingual)
4109-
// Other → Deepgram Aura 2 EN fallback (text should be English)
4103+
// ── TTS via Cloudflare Workers AI (Deepgram Aura 2) ────────────────────────
4104+
// Upgraded from Aura 1 → Aura 2: better pronunciation, lower latency,
4105+
// enterprise-grade clarity. Aura 2 ES for Spanish interviews.
4106+
// Non-English/non-Spanish languages skip cloud TTS (frontend uses browser speechSynthesis).
41104107
const TTS_MODEL_MAP = {
4111-
en: { model: "@cf/deepgram/aura-2-en", type: "deepgram" },
4112-
es: { model: "@cf/deepgram/aura-2-es", type: "deepgram" },
4113-
fr: { model: "@cf/myshell-ai/melotts", type: "melotts" },
4114-
ja: { model: "@cf/myshell-ai/melotts", type: "melotts" },
4115-
ko: { model: "@cf/myshell-ai/melotts", type: "melotts" },
4116-
zh: { model: "@cf/myshell-ai/melotts", type: "melotts" },
4108+
en: "@cf/deepgram/aura-2-en",
4109+
es: "@cf/deepgram/aura-2-es",
41174110
};
4118-
const TTS_DEFAULT = { model: "@cf/deepgram/aura-2-en", type: "deepgram" };
4111+
const TTS_DEFAULT_MODEL = "@cf/deepgram/aura-2-en";
41194112

41204113
async function handleTTS(request, env, ctx) {
41214114
const body = await safeJson(request);
@@ -4128,45 +4121,23 @@ async function handleTTS(request, env, ctx) {
41284121

41294122
const cleanText = text.trim().slice(0, 5000);
41304123
const langBase = (lang || 'en').split('-')[0].toLowerCase();
4131-
const ttsConfig = TTS_MODEL_MAP[langBase] || TTS_DEFAULT;
4124+
const ttsModel = TTS_MODEL_MAP[langBase] || TTS_DEFAULT_MODEL;
41324125

4133-
console.log(`[TTS] Generating speech: ${cleanText.length} chars, lang=${langBase}, model=${ttsConfig.model}`);
4126+
console.log(`[TTS] Generating speech: ${cleanText.length} chars, lang=${langBase}, model=${ttsModel}`);
41344127

41354128
try {
4136-
if (ttsConfig.type === "melotts") {
4137-
// MeloTTS returns { audio: base64_string }
4138-
const result = await env.AI.run(ttsConfig.model, {
4139-
prompt: cleanText,
4140-
lang: langBase,
4141-
});
4142-
4143-
if (result && result.audio) {
4144-
const binaryString = atob(result.audio);
4145-
const bytes = new Uint8Array(binaryString.length);
4146-
for (let i = 0; i < binaryString.length; i++) {
4147-
bytes[i] = binaryString.charCodeAt(i);
4148-
}
4149-
return new Response(bytes.buffer, {
4150-
status: 200,
4151-
headers: { ...CORS_HEADERS, "Content-Type": "audio/mpeg", "Cache-Control": "public, max-age=3600" },
4152-
});
4153-
}
4154-
throw new Error("MeloTTS returned no audio");
4155-
} else {
4156-
// Deepgram Aura 2 — returns audio stream directly
4157-
const audioResponse = await env.AI.run(ttsConfig.model, { text: cleanText });
4158-
return new Response(audioResponse, {
4159-
status: 200,
4160-
headers: { ...CORS_HEADERS, "Content-Type": "audio/mpeg", "Cache-Control": "public, max-age=3600" },
4161-
});
4162-
}
4129+
const audioResponse = await env.AI.run(ttsModel, { text: cleanText });
4130+
return new Response(audioResponse, {
4131+
status: 200,
4132+
headers: { ...CORS_HEADERS, "Content-Type": "audio/mpeg", "Cache-Control": "public, max-age=3600" },
4133+
});
41634134
} catch (err) {
4164-
console.error(`[TTS] AI.run error (${ttsConfig.model}): ${err.message}`);
4165-
// Fallback: try Aura 2 EN if a language-specific model failed
4166-
if (ttsConfig.model !== TTS_DEFAULT.model) {
4167-
console.log(`[TTS] Retrying with fallback model: ${TTS_DEFAULT.model}`);
4135+
console.error(`[TTS] AI.run error (${ttsModel}): ${err.message}`);
4136+
// Fallback: try default EN model if Spanish model failed
4137+
if (ttsModel !== TTS_DEFAULT_MODEL) {
4138+
console.log(`[TTS] Retrying with fallback: ${TTS_DEFAULT_MODEL}`);
41684139
try {
4169-
const fallback = await env.AI.run(TTS_DEFAULT.model, { text: cleanText });
4140+
const fallback = await env.AI.run(TTS_DEFAULT_MODEL, { text: cleanText });
41704141
return new Response(fallback, {
41714142
status: 200,
41724143
headers: { ...CORS_HEADERS, "Content-Type": "audio/mpeg", "Cache-Control": "public, max-age=3600" },

0 commit comments

Comments
 (0)