THU-MAIC · wyuc · Mar 22, 2026 · Mar 19, 2026 · Mar 21, 2026 · Mar 21, 2026
diff --git a/.env.example b/.env.example
@@ -66,6 +66,9 @@ TTS_GLM_BASE_URL=
 TTS_QWEN_API_KEY=
 TTS_QWEN_BASE_URL=
 
+TTS_ELEVENLABS_API_KEY=
+TTS_ELEVENLABS_BASE_URL=
+
 # --- ASR (Automatic Speech Recognition) --------------------------------------
 
 ASR_OPENAI_API_KEY=

diff --git a/components/generation/media-popover.tsx b/components/generation/media-popover.tsx
@@ -88,6 +88,7 @@ function getTTSProviderName(providerId: TTSProviderId, t: (key: string) => strin
     'azure-tts': t('settings.providerAzureTTS'),
     'glm-tts': t('settings.providerGLMTTS'),
     'qwen-tts': t('settings.providerQwenTTS'),
+    'elevenlabs-tts': t('settings.providerElevenLabsTTS'),
     'browser-native-tts': t('settings.providerBrowserNativeTTS'),
   };
   return names[providerId] || providerId;

diff --git a/components/settings/audio-settings.tsx b/components/settings/audio-settings.tsx
@@ -43,6 +43,7 @@ function getTTSProviderName(providerId: TTSProviderId, t: (key: string) => strin
     'azure-tts': t('settings.providerAzureTTS'),
     'glm-tts': t('settings.providerGLMTTS'),
     'qwen-tts': t('settings.providerQwenTTS'),
+    'elevenlabs-tts': t('settings.providerElevenLabsTTS'),
     'browser-native-tts': t('settings.providerBrowserNativeTTS'),
   };
   return names[providerId];

diff --git a/components/settings/index.tsx b/components/settings/index.tsx
@@ -121,6 +121,7 @@ function getTTSProviderName(providerId: TTSProviderId, t: (key: string) => strin
     'azure-tts': t('settings.providerAzureTTS'),
     'glm-tts': t('settings.providerGLMTTS'),
     'qwen-tts': t('settings.providerQwenTTS'),
+    'elevenlabs-tts': t('settings.providerElevenLabsTTS'),
     'browser-native-tts': t('settings.providerBrowserNativeTTS'),
   };
   return names[providerId];

diff --git a/components/settings/tts-settings.tsx b/components/settings/tts-settings.tsx
@@ -164,6 +164,9 @@ export function TTSSettings({ selectedProviderId }: TTSSettingsProps) {
               case 'qwen-tts':
                 endpointPath = '/services/aigc/multimodal-generation/generation';
                 break;
+              case 'elevenlabs-tts':
+                endpointPath = '/text-to-speech';
+                break;
             }
             if (!endpointPath) return null;
             return (

diff --git a/lib/audio/constants.ts b/lib/audio/constants.ts
@@ -606,6 +606,68 @@ export const TTS_PROVIDERS: Record<TTSProviderId, TTSProviderConfig> = {
     supportedFormats: ['mp3', 'wav', 'pcm'],
   },
 
+  'elevenlabs-tts': {
+    id: 'elevenlabs-tts',
+    name: 'ElevenLabs TTS',
+    requiresApiKey: true,
+    defaultBaseUrl: 'https://api.elevenlabs.io/v1',
+    icon: '/logos/elevenlabs.svg',
+    // Free-tier-safe fallback set; account-specific/custom voices should come from /v2/voices dynamically later.
+    voices: [
+      {
+        id: 'EXAVITQu4vr4xnSDxMaL',
+        name: 'Sarah',
+        language: 'en-US',
+        gender: 'female',
+        description: 'Confident and warm professional voice for clear narration',
+      },
+      {
+        id: 'Xb7hH8MSUJpSbSDYk0k2',
+        name: 'Alice',
+        language: 'en-GB',
+        gender: 'female',
+        description: 'Clear and engaging British educator voice for e-learning',
+      },
+      {
+        id: 'XrExE9yKIg1WjnnlVkGX',
+        name: 'Matilda',
+        language: 'en-US',
+        gender: 'female',
+        description: 'Knowledgeable and upbeat voice suited for lectures',
+      },
+      {
+        id: 'CwhRBWXzGAHq8TQ4Fs17',
+        name: 'Roger',
+        language: 'en-US',
+        gender: 'male',
+        description: 'Laid-back but resonant male voice for friendly lessons',
+      },
+      {
+        id: 'cjVigY5qzO86Huf0OWal',
+        name: 'Eric',
+        language: 'en-US',
+        gender: 'male',
+        description: 'Smooth and trustworthy voice for polished classroom audio',
+      },
+      {
+        id: 'onwK4e9ZLuTAKqWW03F9',
+        name: 'Daniel',
+        language: 'en-GB',
+        gender: 'male',
+        description: 'Steady British broadcaster voice for formal explanations',
+      },
+      {
+        id: 'SAz9YHcvj6GT2YYXdXww',
+        name: 'River',
+        language: 'en-US',
+        gender: 'neutral',
+        description: 'Relaxed and informative neutral voice for general narration',
+      },
+    ],
+    supportedFormats: ['mp3', 'opus', 'pcm', 'wav', 'ulaw', 'alaw'],
+    speedRange: { min: 0.7, max: 1.2, default: 1.0 },
+  },
+
   'browser-native-tts': {
     id: 'browser-native-tts',
     name: '浏览器原生 (Web Speech API)',
@@ -833,6 +895,7 @@ export const DEFAULT_TTS_VOICES: Record<TTSProviderId, string> = {
   'azure-tts': 'zh-CN-XiaoxiaoNeural',
   'glm-tts': 'tongtong',
   'qwen-tts': 'Cherry',
+  'elevenlabs-tts': 'EXAVITQu4vr4xnSDxMaL',
   'browser-native-tts': 'default',
 };
 

diff --git a/lib/audio/tts-providers.ts b/lib/audio/tts-providers.ts
@@ -9,6 +9,7 @@
  * - Azure TTS: https://learn.microsoft.com/en-us/azure/ai-services/speech-service/text-to-speech
  * - GLM TTS: https://docs.bigmodel.cn/cn/guide/models/sound-and-video/glm-tts
  * - Qwen TTS: https://bailian.console.aliyun.com/
+ * - ElevenLabs TTS: https://elevenlabs.io/docs/api-reference/text-to-speech/convert
  * - Browser Native: Web Speech API (client-side only)
  *
  * HOW TO ADD A NEW PROVIDER:
@@ -23,7 +24,7 @@
  *      name: 'ElevenLabs',
  *      requiresApiKey: true,
  *      defaultBaseUrl: 'https://api.elevenlabs.io/v1',
- *      icon: '/elevenlabs.svg',
+ *      icon: '/logos/elevenlabs.svg',
  *      voices: [...],
  *      supportedFormats: ['mp3', 'pcm'],
  *      speedRange: { min: 0.5, max: 2.0, default: 1.0 }
@@ -51,10 +52,10 @@
  *        },
  *        body: JSON.stringify({
  *          text,
- *          model_id: 'eleven_monolingual_v1',
+ *          model_id: 'eleven_multilingual_v2',
  *          voice_settings: {
  *            stability: 0.5,
- *            similarity_boost: 0.5,
+ *            similarity_boost: 0.75,
  *          }
  *        }),
  *      });
@@ -130,6 +131,9 @@ export async function generateTTS(
     case 'qwen-tts':
       return await generateQwenTTS(config, text);
 
+    case 'elevenlabs-tts':
+      return await generateElevenLabsTTS(config, text);
+
     case 'browser-native-tts':
       throw new Error(
         'Browser Native TTS must be handled client-side using Web Speech API. This provider cannot be used on the server.',
@@ -316,6 +320,58 @@ async function generateQwenTTS(config: TTSModelConfig, text: string): Promise<TT
   };
 }
 
+/**
+ * ElevenLabs TTS implementation (direct API call with voice-specific endpoint)
+ */
+async function generateElevenLabsTTS(
+  config: TTSModelConfig,
+  text: string,
+): Promise<TTSGenerationResult> {
+  const baseUrl = config.baseUrl || TTS_PROVIDERS['elevenlabs-tts'].defaultBaseUrl;
+  const requestedFormat = config.format || 'mp3';
+  const clampedSpeed = Math.min(1.2, Math.max(0.7, config.speed || 1.0));
+  const outputFormatMap: Record<string, string> = {
+    mp3: 'mp3_44100_128',
+    opus: 'opus_48000_96',
+    pcm: 'pcm_44100',
+    wav: 'wav_44100',
+    ulaw: 'ulaw_8000',
+    alaw: 'alaw_8000',
+  };
+  const outputFormat = outputFormatMap[requestedFormat] || outputFormatMap.mp3;
+
+  const response = await fetch(
+    `${baseUrl}/text-to-speech/${encodeURIComponent(config.voice)}?output_format=${outputFormat}`,
+    {
+      method: 'POST',
+      headers: {
+        'xi-api-key': config.apiKey!,
+        'Content-Type': 'application/json; charset=utf-8',
+      },
+      body: JSON.stringify({
+        text,
+        model_id: 'eleven_multilingual_v2',
+        voice_settings: {
+          stability: 0.5,
+          similarity_boost: 0.75,
+          speed: clampedSpeed,
+        },
+      }),
+    },
+  );
+
+  if (!response.ok) {
+    const errorText = await response.text().catch(() => response.statusText);
+    throw new Error(`ElevenLabs TTS API error: ${errorText || response.statusText}`);
+  }
+
+  const arrayBuffer = await response.arrayBuffer();
+  return {
+    audio: new Uint8Array(arrayBuffer),
+    format: requestedFormat,
+  };
+}
+
 /**
  * Get current TTS configuration from settings store
  * Note: This function should only be called in browser context

diff --git a/lib/audio/types.ts b/lib/audio/types.ts
@@ -82,9 +82,9 @@ export type TTSProviderId =
   | 'azure-tts'
   | 'glm-tts'
   | 'qwen-tts'
+  | 'elevenlabs-tts'
   | 'browser-native-tts';
 // Add new TTS providers below (uncomment and modify):
-// | 'elevenlabs-tts'
 // | 'fish-audio-tts'
 // | 'cartesia-tts'
 // | 'playht-tts'

diff --git a/lib/i18n/settings.ts b/lib/i18n/settings.ts
@@ -221,6 +221,7 @@ export const settingsZhCN = {
     providerAzureTTS: 'Azure TTS',
     providerGLMTTS: 'GLM TTS',
     providerQwenTTS: 'Qwen TTS（阿里云百炼）',
+    providerElevenLabsTTS: 'ElevenLabs TTS',
     providerBrowserNativeTTS: '浏览器原生 TTS',
     providerOpenAIWhisper: 'OpenAI ASR (gpt-4o-mini-transcribe)',
     providerBrowserNative: '浏览器原生 ASR',
@@ -808,6 +809,7 @@ export const settingsEnUS = {
     providerAzureTTS: 'Azure TTS',
     providerGLMTTS: 'GLM TTS',
     providerQwenTTS: 'Qwen TTS (Alibaba Cloud Bailian)',
+    providerElevenLabsTTS: 'ElevenLabs TTS',
     providerBrowserNativeTTS: 'Browser Native TTS',
     providerOpenAIWhisper: 'OpenAI ASR (gpt-4o-mini-transcribe)',
     providerBrowserNative: 'Browser Native ASR',

diff --git a/lib/server/provider-config.ts b/lib/server/provider-config.ts
@@ -56,6 +56,7 @@ const TTS_ENV_MAP: Record<string, string> = {
   TTS_AZURE: 'azure-tts',
   TTS_GLM: 'glm-tts',
   TTS_QWEN: 'qwen-tts',
+  TTS_ELEVENLABS: 'elevenlabs-tts',
 };
 
 const ASR_ENV_MAP: Record<string, string> = {

diff --git a/lib/store/settings.ts b/lib/store/settings.ts
@@ -266,6 +266,7 @@ const getDefaultAudioConfig = () => ({
     'azure-tts': { apiKey: '', baseUrl: '', enabled: false },
     'glm-tts': { apiKey: '', baseUrl: '', enabled: false },
     'qwen-tts': { apiKey: '', baseUrl: '', enabled: false },
+    'elevenlabs-tts': { apiKey: '', baseUrl: '', enabled: false },
     'browser-native-tts': { apiKey: '', baseUrl: '', enabled: true },
   } as Record<TTSProviderId, { apiKey: string; baseUrl: string; enabled: boolean }>,
   asrProvidersConfig: {

diff --git a/public/logos/elevenlabs.svg b/public/logos/elevenlabs.svg