Skip to content

Commit bb752a5

Browse files
authored
Add ElevenLabs TTS provider integration (#2004)
* Add ElevenLabs TTS provider integration Adds ElevenLabs text-to-speech support in the backend and frontend. Adds `ElevenLabsTTSService`, updates `PuterAIModule` to register the service, documents configuration, and integrates cost tracking with zero cost in `MeteringService` for now (todo). updates `AI.js` to support 11labs as a provider and adds related tests for `txt2speech` functionality. * Update 11labs cost map values
1 parent 11e0575 commit bb752a5

File tree

8 files changed

+300
-5
lines changed

8 files changed

+300
-5
lines changed

src/backend/src/filesystem/definitions/ts/fsentry.js

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 196 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,196 @@
1+
/*
2+
* Copyright (C) 2024-present Puter Technologies Inc.
3+
*
4+
* This file is part of Puter.
5+
*
6+
* Puter is free software: you can redistribute it and/or modify
7+
* it under the terms of the GNU Affero General Public License as published
8+
* by the Free Software Foundation, either version 3 of the License, or
9+
* (at your option) any later version.
10+
*
11+
* This program is distributed in the hope that it will be useful,
12+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14+
* GNU Affero General Public License for more details.
15+
*
16+
* You should have received a copy of the GNU Affero General Public License
17+
* along with this program. If not, see <https://www.gnu.org/licenses/>.
18+
*/
19+
20+
const { Readable } = require('stream');
21+
const APIError = require('../../api/APIError');
22+
const BaseService = require('../../services/BaseService');
23+
const { TypedValue } = require('../../services/drivers/meta/Runtime');
24+
const { Context } = require('../../util/context');
25+
26+
const DEFAULT_MODEL = 'eleven_multilingual_v2';
27+
const DEFAULT_VOICE_ID = '21m00Tcm4TlvDq8ikWAM'; // Common public "Rachel" sample voice
28+
const DEFAULT_OUTPUT_FORMAT = 'mp3_44100_128';
29+
const SAMPLE_AUDIO_URL = 'https://puter-sample-data.puter.site/tts_example.mp3';
30+
31+
const ELEVENLABS_TTS_MODELS = [
32+
{ id: DEFAULT_MODEL, name: 'Eleven Multilingual v2' },
33+
{ id: 'eleven_flash_v2_5', name: 'Eleven Flash v2.5' },
34+
{ id: 'eleven_turbo_v2_5', name: 'Eleven Turbo v2.5' },
35+
{ id: 'eleven_v3', name: 'Eleven v3 Alpha' },
36+
];
37+
38+
/**
39+
* ElevenLabs text-to-speech provider.
40+
* Implements the `puter-tts` interface so the AI module can synthesize speech
41+
* using ElevenLabs voices.
42+
*/
43+
class ElevenLabsTTSService extends BaseService {
44+
/** @type {import('../../services/MeteringService/MeteringService').MeteringService} */
45+
get meteringService () {
46+
return this.services.get('meteringService').meteringService;
47+
}
48+
49+
static IMPLEMENTS = {
50+
['driver-capabilities']: {
51+
supports_test_mode (iface, method_name) {
52+
return iface === 'puter-tts' && method_name === 'synthesize';
53+
},
54+
},
55+
['puter-tts']: {
56+
async list_voices () {
57+
return this.listVoices();
58+
},
59+
async list_engines () {
60+
return this.listEngines();
61+
},
62+
async synthesize (params) {
63+
return this.synthesize(params);
64+
},
65+
},
66+
};
67+
68+
async _init () {
69+
const svcThere = this.global_config?.services?.elevenlabs ?? this.config?.services?.elevenlabs ?? this.config?.elevenlabs;
70+
71+
this.apiKey = svcThere?.apiKey ?? svcThere?.api_key ?? svcThere?.key;
72+
this.baseUrl = svcThere?.baseUrl ?? 'https://api.elevenlabs.io';
73+
this.defaultVoiceId = svcThere?.defaultVoiceId ?? svcThere?.voiceId ?? DEFAULT_VOICE_ID;
74+
75+
if ( !this.apiKey ) {
76+
throw new Error('ElevenLabs API key not configured');
77+
}
78+
}
79+
80+
async request (path, { method = 'GET', body, headers = {} } = {}) {
81+
const response = await fetch(`${this.baseUrl}${path}`, {
82+
method,
83+
headers: {
84+
'xi-api-key': this.apiKey,
85+
...(body ? { 'Content-Type': 'application/json' } : {}),
86+
...headers,
87+
},
88+
body: body ? JSON.stringify(body) : undefined,
89+
});
90+
91+
if ( response.ok ) {
92+
return response;
93+
}
94+
95+
let detail = null;
96+
try {
97+
detail = await response.json();
98+
} catch ( e ) {
99+
// ignore
100+
}
101+
this.log.error('ElevenLabs request failed', { path, status: response.status, detail });
102+
throw APIError.create('internal_server_error', null, { provider: 'elevenlabs', status: response.status });
103+
}
104+
105+
async listVoices () {
106+
const res = await this.request('/v1/voices');
107+
const data = await res.json();
108+
const voices = Array.isArray(data?.voices) ? data.voices : Array.isArray(data) ? data : [];
109+
110+
return voices
111+
.map(voice => ({
112+
id: voice.voice_id || voice.voiceId || voice.id,
113+
name: voice.name,
114+
description: voice.description,
115+
category: voice.category,
116+
provider: 'elevenlabs',
117+
labels: voice.labels,
118+
supported_models: ELEVENLABS_TTS_MODELS.map(model => model.id),
119+
}))
120+
.filter(v => v.id && v.name);
121+
}
122+
123+
async listEngines () {
124+
return ELEVENLABS_TTS_MODELS.map(model => ({
125+
id: model.id,
126+
name: model.name,
127+
provider: 'elevenlabs',
128+
pricing_per_million_chars: 0,
129+
}));
130+
}
131+
132+
async synthesize (params) {
133+
const {
134+
text,
135+
voice,
136+
model,
137+
response_format,
138+
output_format,
139+
voice_settings,
140+
voiceSettings,
141+
test_mode,
142+
} = params;
143+
if ( test_mode ) {
144+
return new TypedValue({
145+
$: 'string:url:web',
146+
content_type: 'audio',
147+
}, SAMPLE_AUDIO_URL);
148+
}
149+
150+
if ( typeof text !== 'string' || !text.trim() ) {
151+
throw APIError.create('field_required', null, { key: 'text' });
152+
}
153+
154+
const voiceId = voice || this.defaultVoiceId;
155+
const modelId = model || DEFAULT_MODEL;
156+
const desiredFormat = output_format || response_format || DEFAULT_OUTPUT_FORMAT;
157+
158+
const actor = Context.get('actor');
159+
const usageKey = `elevenlabs:${modelId}:character`;
160+
const usageAllowed = await this.meteringService.hasEnoughCreditsFor(actor, usageKey, text.length);
161+
if ( !usageAllowed ) {
162+
throw APIError.create('insufficient_funds');
163+
}
164+
165+
const payload = {
166+
text,
167+
model_id: modelId,
168+
output_format: desiredFormat,
169+
};
170+
171+
const finalVoiceSettings = voice_settings ?? voiceSettings;
172+
if ( finalVoiceSettings ) {
173+
payload.voice_settings = finalVoiceSettings;
174+
}
175+
176+
const response = await this.request(`/v1/text-to-speech/${voiceId}`, {
177+
method: 'POST',
178+
body: payload,
179+
});
180+
181+
const arrayBuffer = await response.arrayBuffer();
182+
const buffer = Buffer.from(arrayBuffer);
183+
const stream = Readable.from(buffer);
184+
185+
this.meteringService.incrementUsage(actor, usageKey, text.length);
186+
187+
return new TypedValue({
188+
$: 'stream',
189+
content_type: response.headers.get('content-type') || 'audio/mpeg',
190+
}, stream);
191+
}
192+
}
193+
194+
module.exports = {
195+
ElevenLabsTTSService,
196+
};

src/backend/src/modules/puterai/PuterAIModule.js

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,11 @@ class PuterAIModule extends AdvancedBase {
5555
services.registerService('aws-polly', AWSPollyService);
5656
}
5757

58+
if ( config?.services?.['elevenlabs'] || config?.elevenlabs ) {
59+
const { ElevenLabsTTSService } = require('./ElevenLabsTTSService');
60+
services.registerService('elevenlabs-tts', ElevenLabsTTSService);
61+
}
62+
5863
if ( config?.services?.openai || config?.openai ) {
5964
const { OpenAICompletionServiceWrapper } = require('./OpenAiCompletionService/index.mjs');
6065
services.registerService('openai-completion', OpenAICompletionServiceWrapper);

src/backend/src/modules/puterai/doc/ai-services-config.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@ AI services are configured under the `services` block in the configuration file.
99
"openai": {
1010
"apiKey": "sk-abcdefg..."
1111
},
12+
"elevenlabs": {
13+
"apiKey": "eleven-api-key",
14+
"defaultVoiceId": "optional-voice-id"
15+
},
1216
"deepseek": {
1317
"apiKey": "sk-xyz123..."
1418
},
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
// ElevenLabs Text-to-Speech Cost Map
2+
//
3+
// Pricing for ElevenLabs voices varies by model and plan tier. We don't yet
4+
// have public micro-cent pricing, so we record usage with a zero cost. This
5+
// prevents metering alerts while still tracking character counts for future
6+
// cost attribution once pricing is finalized.
7+
8+
export const ELEVENLABS_COST_MAP = {
9+
'elevenlabs:eleven_multilingual_v2:character': 11,
10+
'elevenlabs:eleven_turbo_v2_5:character': 11,
11+
'elevenlabs:eleven_flash_v2_5:character': 5.5,
12+
'elevenlabs:eleven_v3:character': 11,
13+
};

src/backend/src/services/MeteringService/costMaps/index.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,14 @@ import { OPENROUTER_COST_MAP } from './openrouterCostMap';
1313
import { OPENAI_VIDEO_COST_MAP } from './openaiVideoCostMap';
1414
import { TOGETHER_COST_MAP } from './togetherCostMap';
1515
import { XAI_COST_MAP } from './xaiCostMap';
16+
import { ELEVENLABS_COST_MAP } from './elevenlabsCostMap';
1617

1718
export const COST_MAPS = {
1819
...AWS_POLLY_COST_MAP,
1920
...AWS_TEXTRACT_COST_MAP,
2021
...CLAUDE_COST_MAP,
2122
...DEEPSEEK_COST_MAP,
23+
...ELEVENLABS_COST_MAP,
2224
...GEMINI_COST_MAP,
2325
...GROQ_COST_MAP,
2426
...KV_COST_MAP,

src/puter-js/src/modules/AI.js

Lines changed: 39 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ const normalizeTTSProvider = (value) => {
66
}
77
const lower = value.toLowerCase();
88
if ( lower === 'openai' ) return 'openai';
9+
if ( ['elevenlabs', 'eleven', '11labs', '11-labs', 'eleven-labs', 'elevenlabs-tts'].includes(lower) ) return 'elevenlabs';
910
if ( lower === 'aws' || lower === 'polly' || lower === 'aws-polly' ) return 'aws-polly';
1011
return value;
1112
};
@@ -281,6 +282,10 @@ class AI {
281282
provider = 'openai';
282283
}
283284

285+
if ( options.engine && normalizeTTSProvider(options.engine) === 'elevenlabs' && !options.provider ) {
286+
provider = 'elevenlabs';
287+
}
288+
284289
if ( provider === 'openai' ) {
285290
if ( !options.model && typeof options.engine === 'string' ) {
286291
options.model = options.engine;
@@ -295,6 +300,23 @@ class AI {
295300
options.response_format = 'mp3';
296301
}
297302
delete options.engine;
303+
} else if ( provider === 'elevenlabs' ) {
304+
if ( ! options.voice ) {
305+
options.voice = '21m00Tcm4TlvDq8ikWAM';
306+
}
307+
if ( ! options.model && typeof options.engine === 'string' ) {
308+
options.model = options.engine;
309+
}
310+
if ( ! options.model ) {
311+
options.model = 'eleven_multilingual_v2';
312+
}
313+
if ( ! options.output_format && !options.response_format ) {
314+
options.output_format = 'mp3_44100_128';
315+
}
316+
if ( options.response_format && !options.output_format ) {
317+
options.output_format = options.response_format;
318+
}
319+
delete options.engine;
298320
} else {
299321
provider = 'aws-polly';
300322

@@ -326,7 +348,9 @@ class AI {
326348
}
327349
}
328350

329-
const driverName = provider === 'openai' ? 'openai-tts' : 'aws-polly';
351+
const driverName = provider === 'openai'
352+
? 'openai-tts'
353+
: (provider === 'elevenlabs' ? 'elevenlabs-tts' : 'aws-polly');
330354

331355
return await utils.make_driver_method(['source'], 'puter-tts', driverName, 'synthesize', {
332356
responseType: 'blob',
@@ -449,7 +473,13 @@ class AI {
449473
params.provider = 'openai';
450474
}
451475

452-
const driverName = provider === 'openai' ? 'openai-tts' : 'aws-polly';
476+
if ( provider === 'elevenlabs' ) {
477+
params.provider = 'elevenlabs';
478+
}
479+
480+
const driverName = provider === 'openai'
481+
? 'openai-tts'
482+
: (provider === 'elevenlabs' ? 'elevenlabs-tts' : 'aws-polly');
453483

454484
return await utils.make_driver_method(['source'], 'puter-tts', driverName, 'list_engines', {
455485
responseType: 'text',
@@ -478,7 +508,13 @@ class AI {
478508
delete params.engine;
479509
}
480510

481-
const driverName = provider === 'openai' ? 'openai-tts' : 'aws-polly';
511+
if ( provider === 'elevenlabs' ) {
512+
params.provider = 'elevenlabs';
513+
}
514+
515+
const driverName = provider === 'openai'
516+
? 'openai-tts'
517+
: (provider === 'elevenlabs' ? 'elevenlabs-tts' : 'aws-polly');
482518

483519
return utils.make_driver_method(['source'], 'puter-tts', driverName, 'list_voices', {
484520
responseType: 'text',

src/puter-js/test/txt2speech.test.js

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,32 @@ const testTxt2SpeechWithOpenAIProviderCore = async function() {
157157
assert(valueOfValue === srcValue, "valueOf() should match src for OpenAI provider");
158158
};
159159

160+
const testTxt2SpeechWithElevenLabsProviderCore = async function() {
161+
// Test ElevenLabs provider in test mode to avoid external calls
162+
const result = await puter.ai.txt2speech(
163+
"Hello, this is an ElevenLabs provider test.",
164+
{ provider: "elevenlabs", voice: "21m00Tcm4TlvDq8ikWAM" },
165+
true,
166+
);
167+
168+
assert(result instanceof Audio, "txt2speech should return an Audio object for ElevenLabs provider");
169+
assert(result !== null, "txt2speech should not return null for ElevenLabs provider");
170+
171+
const toStringValue = result.toString();
172+
const valueOfValue = result.valueOf();
173+
const srcValue = result.src;
174+
175+
assert(typeof toStringValue === 'string', "toString() should return a string for ElevenLabs provider");
176+
assert(typeof valueOfValue === 'string', "valueOf() should return a string for ElevenLabs provider");
177+
assert(typeof srcValue === 'string', "src should be a string for ElevenLabs provider");
178+
assert(toStringValue.length > 0, "toString() should not be empty for ElevenLabs provider");
179+
assert(valueOfValue.length > 0, "valueOf() should not be empty for ElevenLabs provider");
180+
assert(srcValue.length > 0, "src should not be empty for ElevenLabs provider");
181+
182+
assert(toStringValue === srcValue, "toString() should match src for ElevenLabs provider");
183+
assert(valueOfValue === srcValue, "valueOf() should match src for ElevenLabs provider");
184+
};
185+
160186
// Export test functions
161187
window.txt2speechTests = [
162188
{
@@ -209,5 +235,18 @@ window.txt2speechTests = [
209235
fail("testTxt2SpeechWithOpenAIProvider failed:", error);
210236
}
211237
}
238+
},
239+
240+
{
241+
name: "testTxt2SpeechWithElevenLabsProvider",
242+
description: "Test text-to-speech using the ElevenLabs provider in test mode",
243+
test: async function() {
244+
try {
245+
await testTxt2SpeechWithElevenLabsProviderCore();
246+
pass("testTxt2SpeechWithElevenLabsProvider passed");
247+
} catch (error) {
248+
fail("testTxt2SpeechWithElevenLabsProvider failed:", error);
249+
}
250+
}
212251
}
213252
];

0 commit comments

Comments
 (0)