Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,9 @@ TTS_GLM_BASE_URL=
TTS_QWEN_API_KEY=
TTS_QWEN_BASE_URL=

TTS_ELEVENLABS_API_KEY=
TTS_ELEVENLABS_BASE_URL=

# --- ASR (Automatic Speech Recognition) --------------------------------------

ASR_OPENAI_API_KEY=
Expand Down
1 change: 1 addition & 0 deletions components/generation/media-popover.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ function getTTSProviderName(providerId: TTSProviderId, t: (key: string) => strin
'azure-tts': t('settings.providerAzureTTS'),
'glm-tts': t('settings.providerGLMTTS'),
'qwen-tts': t('settings.providerQwenTTS'),
'elevenlabs-tts': t('settings.providerElevenLabsTTS'),
'browser-native-tts': t('settings.providerBrowserNativeTTS'),
};
return names[providerId] || providerId;
Expand Down
1 change: 1 addition & 0 deletions components/settings/audio-settings.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ function getTTSProviderName(providerId: TTSProviderId, t: (key: string) => strin
'azure-tts': t('settings.providerAzureTTS'),
'glm-tts': t('settings.providerGLMTTS'),
'qwen-tts': t('settings.providerQwenTTS'),
'elevenlabs-tts': t('settings.providerElevenLabsTTS'),
'browser-native-tts': t('settings.providerBrowserNativeTTS'),
};
return names[providerId];
Expand Down
1 change: 1 addition & 0 deletions components/settings/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ function getTTSProviderName(providerId: TTSProviderId, t: (key: string) => strin
'azure-tts': t('settings.providerAzureTTS'),
'glm-tts': t('settings.providerGLMTTS'),
'qwen-tts': t('settings.providerQwenTTS'),
'elevenlabs-tts': t('settings.providerElevenLabsTTS'),
'browser-native-tts': t('settings.providerBrowserNativeTTS'),
};
return names[providerId];
Expand Down
3 changes: 3 additions & 0 deletions components/settings/tts-settings.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,9 @@ export function TTSSettings({ selectedProviderId }: TTSSettingsProps) {
case 'qwen-tts':
endpointPath = '/services/aigc/multimodal-generation/generation';
break;
case 'elevenlabs-tts':
endpointPath = '/text-to-speech';
break;
}
if (!endpointPath) return null;
return (
Expand Down
63 changes: 63 additions & 0 deletions lib/audio/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -606,6 +606,68 @@ export const TTS_PROVIDERS: Record<TTSProviderId, TTSProviderConfig> = {
supportedFormats: ['mp3', 'wav', 'pcm'],
},

'elevenlabs-tts': {
id: 'elevenlabs-tts',
name: 'ElevenLabs TTS',
requiresApiKey: true,
defaultBaseUrl: 'https://api.elevenlabs.io/v1',
icon: '/logos/elevenlabs.svg',
// Free-tier-safe fallback set; account-specific/custom voices should come from /v2/voices dynamically later.
voices: [
{
id: 'EXAVITQu4vr4xnSDxMaL',
name: 'Sarah',
language: 'en-US',
gender: 'female',
description: 'Confident and warm professional voice for clear narration',
},
{
id: 'Xb7hH8MSUJpSbSDYk0k2',
name: 'Alice',
language: 'en-GB',
gender: 'female',
description: 'Clear and engaging British educator voice for e-learning',
},
{
id: 'XrExE9yKIg1WjnnlVkGX',
name: 'Matilda',
language: 'en-US',
gender: 'female',
description: 'Knowledgeable and upbeat voice suited for lectures',
},
{
id: 'CwhRBWXzGAHq8TQ4Fs17',
name: 'Roger',
language: 'en-US',
gender: 'male',
description: 'Laid-back but resonant male voice for friendly lessons',
},
{
id: 'cjVigY5qzO86Huf0OWal',
name: 'Eric',
language: 'en-US',
gender: 'male',
description: 'Smooth and trustworthy voice for polished classroom audio',
},
{
id: 'onwK4e9ZLuTAKqWW03F9',
name: 'Daniel',
language: 'en-GB',
gender: 'male',
description: 'Steady British broadcaster voice for formal explanations',
},
{
id: 'SAz9YHcvj6GT2YYXdXww',
name: 'River',
language: 'en-US',
gender: 'neutral',
description: 'Relaxed and informative neutral voice for general narration',
},
],
supportedFormats: ['mp3', 'opus', 'pcm', 'wav', 'ulaw', 'alaw'],
speedRange: { min: 0.7, max: 1.2, default: 1.0 },
},

'browser-native-tts': {
id: 'browser-native-tts',
name: '浏览器原生 (Web Speech API)',
Expand Down Expand Up @@ -833,6 +895,7 @@ export const DEFAULT_TTS_VOICES: Record<TTSProviderId, string> = {
'azure-tts': 'zh-CN-XiaoxiaoNeural',
'glm-tts': 'tongtong',
'qwen-tts': 'Cherry',
'elevenlabs-tts': 'EXAVITQu4vr4xnSDxMaL',
'browser-native-tts': 'default',
};

Expand Down
62 changes: 59 additions & 3 deletions lib/audio/tts-providers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
* - Azure TTS: https://learn.microsoft.com/en-us/azure/ai-services/speech-service/text-to-speech
* - GLM TTS: https://docs.bigmodel.cn/cn/guide/models/sound-and-video/glm-tts
* - Qwen TTS: https://bailian.console.aliyun.com/
* - ElevenLabs TTS: https://elevenlabs.io/docs/api-reference/text-to-speech/convert
* - Browser Native: Web Speech API (client-side only)
*
* HOW TO ADD A NEW PROVIDER:
Expand All @@ -23,7 +24,7 @@
* name: 'ElevenLabs',
* requiresApiKey: true,
* defaultBaseUrl: 'https://api.elevenlabs.io/v1',
* icon: '/elevenlabs.svg',
* icon: '/logos/elevenlabs.svg',
* voices: [...],
* supportedFormats: ['mp3', 'pcm'],
* speedRange: { min: 0.5, max: 2.0, default: 1.0 }
Expand Down Expand Up @@ -51,10 +52,10 @@
* },
* body: JSON.stringify({
* text,
* model_id: 'eleven_monolingual_v1',
* model_id: 'eleven_multilingual_v2',
* voice_settings: {
* stability: 0.5,
* similarity_boost: 0.5,
* similarity_boost: 0.75,
* }
* }),
* });
Expand Down Expand Up @@ -130,6 +131,9 @@ export async function generateTTS(
case 'qwen-tts':
return await generateQwenTTS(config, text);

case 'elevenlabs-tts':
return await generateElevenLabsTTS(config, text);

case 'browser-native-tts':
throw new Error(
'Browser Native TTS must be handled client-side using Web Speech API. This provider cannot be used on the server.',
Expand Down Expand Up @@ -316,6 +320,58 @@ async function generateQwenTTS(config: TTSModelConfig, text: string): Promise<TT
};
}

/**
* ElevenLabs TTS implementation (direct API call with voice-specific endpoint)
*/
async function generateElevenLabsTTS(
config: TTSModelConfig,
text: string,
): Promise<TTSGenerationResult> {
const baseUrl = config.baseUrl || TTS_PROVIDERS['elevenlabs-tts'].defaultBaseUrl;
const requestedFormat = config.format || 'mp3';
const clampedSpeed = Math.min(1.2, Math.max(0.7, config.speed || 1.0));
const outputFormatMap: Record<string, string> = {
mp3: 'mp3_44100_128',
opus: 'opus_48000_96',
pcm: 'pcm_44100',
wav: 'wav_44100',
ulaw: 'ulaw_8000',
alaw: 'alaw_8000',
};
const outputFormat = outputFormatMap[requestedFormat] || outputFormatMap.mp3;

const response = await fetch(
`${baseUrl}/text-to-speech/${encodeURIComponent(config.voice)}?output_format=${outputFormat}`,
{
method: 'POST',
headers: {
'xi-api-key': config.apiKey!,
'Content-Type': 'application/json; charset=utf-8',
},
body: JSON.stringify({
text,
model_id: 'eleven_multilingual_v2',
voice_settings: {
stability: 0.5,
similarity_boost: 0.75,
speed: clampedSpeed,
},
}),
},
);

if (!response.ok) {
const errorText = await response.text().catch(() => response.statusText);
throw new Error(`ElevenLabs TTS API error: ${errorText || response.statusText}`);
}

const arrayBuffer = await response.arrayBuffer();
return {
audio: new Uint8Array(arrayBuffer),
format: requestedFormat,
};
}

/**
* Get current TTS configuration from settings store
* Note: This function should only be called in browser context
Expand Down
2 changes: 1 addition & 1 deletion lib/audio/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -82,9 +82,9 @@ export type TTSProviderId =
| 'azure-tts'
| 'glm-tts'
| 'qwen-tts'
| 'elevenlabs-tts'
| 'browser-native-tts';
// Add new TTS providers below (uncomment and modify):
// | 'elevenlabs-tts'
// | 'fish-audio-tts'
// | 'cartesia-tts'
// | 'playht-tts'
Expand Down
2 changes: 2 additions & 0 deletions lib/i18n/settings.ts
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,7 @@ export const settingsZhCN = {
providerAzureTTS: 'Azure TTS',
providerGLMTTS: 'GLM TTS',
providerQwenTTS: 'Qwen TTS(阿里云百炼)',
providerElevenLabsTTS: 'ElevenLabs TTS',
providerBrowserNativeTTS: '浏览器原生 TTS',
providerOpenAIWhisper: 'OpenAI ASR (gpt-4o-mini-transcribe)',
providerBrowserNative: '浏览器原生 ASR',
Expand Down Expand Up @@ -808,6 +809,7 @@ export const settingsEnUS = {
providerAzureTTS: 'Azure TTS',
providerGLMTTS: 'GLM TTS',
providerQwenTTS: 'Qwen TTS (Alibaba Cloud Bailian)',
providerElevenLabsTTS: 'ElevenLabs TTS',
providerBrowserNativeTTS: 'Browser Native TTS',
providerOpenAIWhisper: 'OpenAI ASR (gpt-4o-mini-transcribe)',
providerBrowserNative: 'Browser Native ASR',
Expand Down
1 change: 1 addition & 0 deletions lib/server/provider-config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ const TTS_ENV_MAP: Record<string, string> = {
TTS_AZURE: 'azure-tts',
TTS_GLM: 'glm-tts',
TTS_QWEN: 'qwen-tts',
TTS_ELEVENLABS: 'elevenlabs-tts',
};

const ASR_ENV_MAP: Record<string, string> = {
Expand Down
1 change: 1 addition & 0 deletions lib/store/settings.ts
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,7 @@ const getDefaultAudioConfig = () => ({
'azure-tts': { apiKey: '', baseUrl: '', enabled: false },
'glm-tts': { apiKey: '', baseUrl: '', enabled: false },
'qwen-tts': { apiKey: '', baseUrl: '', enabled: false },
'elevenlabs-tts': { apiKey: '', baseUrl: '', enabled: false },
'browser-native-tts': { apiKey: '', baseUrl: '', enabled: true },
} as Record<TTSProviderId, { apiKey: string; baseUrl: string; enabled: boolean }>,
asrProvidersConfig: {
Expand Down
4 changes: 4 additions & 0 deletions public/logos/elevenlabs.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading