Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/backend/src/filesystem/definitions/ts/fsentry.js

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

196 changes: 196 additions & 0 deletions src/backend/src/modules/puterai/ElevenLabsTTSService.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
/*
* Copyright (C) 2024-present Puter Technologies Inc.
*
* This file is part of Puter.
*
* Puter is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/

const { Readable } = require('stream');
const APIError = require('../../api/APIError');
const BaseService = require('../../services/BaseService');
const { TypedValue } = require('../../services/drivers/meta/Runtime');
const { Context } = require('../../util/context');

const DEFAULT_MODEL = 'eleven_multilingual_v2';
const DEFAULT_VOICE_ID = '21m00Tcm4TlvDq8ikWAM'; // Common public "Rachel" sample voice
const DEFAULT_OUTPUT_FORMAT = 'mp3_44100_128';
const SAMPLE_AUDIO_URL = 'https://puter-sample-data.puter.site/tts_example.mp3';

const ELEVENLABS_TTS_MODELS = [
{ id: DEFAULT_MODEL, name: 'Eleven Multilingual v2' },
{ id: 'eleven_flash_v2_5', name: 'Eleven Flash v2.5' },
{ id: 'eleven_turbo_v2_5', name: 'Eleven Turbo v2.5' },
{ id: 'eleven_v3', name: 'Eleven v3 Alpha' },
];

/**
* ElevenLabs text-to-speech provider.
* Implements the `puter-tts` interface so the AI module can synthesize speech
* using ElevenLabs voices.
*/
class ElevenLabsTTSService extends BaseService {
/** @type {import('../../services/MeteringService/MeteringService').MeteringService} */
get meteringService () {
return this.services.get('meteringService').meteringService;
}

static IMPLEMENTS = {
['driver-capabilities']: {
supports_test_mode (iface, method_name) {
return iface === 'puter-tts' && method_name === 'synthesize';
},
},
['puter-tts']: {
async list_voices () {
return this.listVoices();
},
async list_engines () {
return this.listEngines();
},
async synthesize (params) {
return this.synthesize(params);
},
},
};

async _init () {
const svcThere = this.global_config?.services?.elevenlabs ?? this.config?.services?.elevenlabs ?? this.config?.elevenlabs;

this.apiKey = svcThere?.apiKey ?? svcThere?.api_key ?? svcThere?.key;
this.baseUrl = svcThere?.baseUrl ?? 'https://api.elevenlabs.io';
this.defaultVoiceId = svcThere?.defaultVoiceId ?? svcThere?.voiceId ?? DEFAULT_VOICE_ID;

if ( !this.apiKey ) {
throw new Error('ElevenLabs API key not configured');
}
}

async request (path, { method = 'GET', body, headers = {} } = {}) {
const response = await fetch(`${this.baseUrl}${path}`, {
method,
headers: {
'xi-api-key': this.apiKey,
...(body ? { 'Content-Type': 'application/json' } : {}),
...headers,
},
body: body ? JSON.stringify(body) : undefined,
});

if ( response.ok ) {
return response;
}

let detail = null;
try {
detail = await response.json();
} catch ( e ) {
// ignore
}
this.log.error('ElevenLabs request failed', { path, status: response.status, detail });
throw APIError.create('internal_server_error', null, { provider: 'elevenlabs', status: response.status });
}

async listVoices () {
const res = await this.request('/v1/voices');
const data = await res.json();
const voices = Array.isArray(data?.voices) ? data.voices : Array.isArray(data) ? data : [];

return voices
.map(voice => ({
id: voice.voice_id || voice.voiceId || voice.id,
name: voice.name,
description: voice.description,
category: voice.category,
provider: 'elevenlabs',
labels: voice.labels,
supported_models: ELEVENLABS_TTS_MODELS.map(model => model.id),
}))
.filter(v => v.id && v.name);
}

async listEngines () {
return ELEVENLABS_TTS_MODELS.map(model => ({
id: model.id,
name: model.name,
provider: 'elevenlabs',
pricing_per_million_chars: 0,
}));
}

async synthesize (params) {
const {
text,
voice,
model,
response_format,
output_format,
voice_settings,
voiceSettings,
test_mode,
} = params;
if ( test_mode ) {
return new TypedValue({
$: 'string:url:web',
content_type: 'audio',
}, SAMPLE_AUDIO_URL);
}

if ( typeof text !== 'string' || !text.trim() ) {
throw APIError.create('field_required', null, { key: 'text' });
}

const voiceId = voice || this.defaultVoiceId;
const modelId = model || DEFAULT_MODEL;
const desiredFormat = output_format || response_format || DEFAULT_OUTPUT_FORMAT;

const actor = Context.get('actor');
const usageKey = `elevenlabs:${modelId}:character`;
const usageAllowed = await this.meteringService.hasEnoughCreditsFor(actor, usageKey, text.length);
if ( !usageAllowed ) {
throw APIError.create('insufficient_funds');
}

const payload = {
text,
model_id: modelId,
output_format: desiredFormat,
};

const finalVoiceSettings = voice_settings ?? voiceSettings;
if ( finalVoiceSettings ) {
payload.voice_settings = finalVoiceSettings;
}

const response = await this.request(`/v1/text-to-speech/${voiceId}`, {
method: 'POST',
body: payload,
});

const arrayBuffer = await response.arrayBuffer();
const buffer = Buffer.from(arrayBuffer);
const stream = Readable.from(buffer);

this.meteringService.incrementUsage(actor, usageKey, text.length);

return new TypedValue({
$: 'stream',
content_type: response.headers.get('content-type') || 'audio/mpeg',
}, stream);
}
}

module.exports = {
ElevenLabsTTSService,
};
5 changes: 5 additions & 0 deletions src/backend/src/modules/puterai/PuterAIModule.js
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,11 @@ class PuterAIModule extends AdvancedBase {
services.registerService('aws-polly', AWSPollyService);
}

if ( config?.services?.['elevenlabs'] || config?.elevenlabs ) {
const { ElevenLabsTTSService } = require('./ElevenLabsTTSService');
services.registerService('elevenlabs-tts', ElevenLabsTTSService);
}

if ( config?.services?.openai || config?.openai ) {
const { OpenAICompletionServiceWrapper } = require('./OpenAiCompletionService/index.mjs');
services.registerService('openai-completion', OpenAICompletionServiceWrapper);
Expand Down
4 changes: 4 additions & 0 deletions src/backend/src/modules/puterai/doc/ai-services-config.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ AI services are configured under the `services` block in the configuration file.
"openai": {
"apiKey": "sk-abcdefg..."
},
"elevenlabs": {
"apiKey": "eleven-api-key",
"defaultVoiceId": "optional-voice-id"
},
"deepseek": {
"apiKey": "sk-xyz123..."
},
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
// ElevenLabs Text-to-Speech Cost Map
//
// Pricing for ElevenLabs voices varies by model and plan tier. We don't yet
// have public micro-cent pricing, so we record usage with a zero cost. This
// prevents metering alerts while still tracking character counts for future
// cost attribution once pricing is finalized.

export const ELEVENLABS_COST_MAP = {
'elevenlabs:eleven_multilingual_v2:character': 11,
'elevenlabs:eleven_turbo_v2_5:character': 11,
'elevenlabs:eleven_flash_v2_5:character': 5.5,
'elevenlabs:eleven_v3:character': 11,
};
2 changes: 2 additions & 0 deletions src/backend/src/services/MeteringService/costMaps/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,14 @@ import { OPENROUTER_COST_MAP } from './openrouterCostMap';
import { OPENAI_VIDEO_COST_MAP } from './openaiVideoCostMap';
import { TOGETHER_COST_MAP } from './togetherCostMap';
import { XAI_COST_MAP } from './xaiCostMap';
import { ELEVENLABS_COST_MAP } from './elevenlabsCostMap';

export const COST_MAPS = {
...AWS_POLLY_COST_MAP,
...AWS_TEXTRACT_COST_MAP,
...CLAUDE_COST_MAP,
...DEEPSEEK_COST_MAP,
...ELEVENLABS_COST_MAP,
...GEMINI_COST_MAP,
...GROQ_COST_MAP,
...KV_COST_MAP,
Expand Down
42 changes: 39 additions & 3 deletions src/puter-js/src/modules/AI.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ const normalizeTTSProvider = (value) => {
}
const lower = value.toLowerCase();
if ( lower === 'openai' ) return 'openai';
if ( ['elevenlabs', 'eleven', '11labs', '11-labs', 'eleven-labs', 'elevenlabs-tts'].includes(lower) ) return 'elevenlabs';
if ( lower === 'aws' || lower === 'polly' || lower === 'aws-polly' ) return 'aws-polly';
return value;
};
Expand Down Expand Up @@ -281,6 +282,10 @@ class AI {
provider = 'openai';
}

if ( options.engine && normalizeTTSProvider(options.engine) === 'elevenlabs' && !options.provider ) {
provider = 'elevenlabs';
}

if ( provider === 'openai' ) {
if ( !options.model && typeof options.engine === 'string' ) {
options.model = options.engine;
Expand All @@ -295,6 +300,23 @@ class AI {
options.response_format = 'mp3';
}
delete options.engine;
} else if ( provider === 'elevenlabs' ) {
if ( ! options.voice ) {
options.voice = '21m00Tcm4TlvDq8ikWAM';
}
if ( ! options.model && typeof options.engine === 'string' ) {
options.model = options.engine;
}
if ( ! options.model ) {
options.model = 'eleven_multilingual_v2';
}
if ( ! options.output_format && !options.response_format ) {
options.output_format = 'mp3_44100_128';
}
if ( options.response_format && !options.output_format ) {
options.output_format = options.response_format;
}
delete options.engine;
} else {
provider = 'aws-polly';

Expand Down Expand Up @@ -326,7 +348,9 @@ class AI {
}
}

const driverName = provider === 'openai' ? 'openai-tts' : 'aws-polly';
const driverName = provider === 'openai'
? 'openai-tts'
: (provider === 'elevenlabs' ? 'elevenlabs-tts' : 'aws-polly');

return await utils.make_driver_method(['source'], 'puter-tts', driverName, 'synthesize', {
responseType: 'blob',
Expand Down Expand Up @@ -449,7 +473,13 @@ class AI {
params.provider = 'openai';
}

const driverName = provider === 'openai' ? 'openai-tts' : 'aws-polly';
if ( provider === 'elevenlabs' ) {
params.provider = 'elevenlabs';
}

const driverName = provider === 'openai'
? 'openai-tts'
: (provider === 'elevenlabs' ? 'elevenlabs-tts' : 'aws-polly');

return await utils.make_driver_method(['source'], 'puter-tts', driverName, 'list_engines', {
responseType: 'text',
Expand Down Expand Up @@ -478,7 +508,13 @@ class AI {
delete params.engine;
}

const driverName = provider === 'openai' ? 'openai-tts' : 'aws-polly';
if ( provider === 'elevenlabs' ) {
params.provider = 'elevenlabs';
}

const driverName = provider === 'openai'
? 'openai-tts'
: (provider === 'elevenlabs' ? 'elevenlabs-tts' : 'aws-polly');

return utils.make_driver_method(['source'], 'puter-tts', driverName, 'list_voices', {
responseType: 'text',
Expand Down
39 changes: 39 additions & 0 deletions src/puter-js/test/txt2speech.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,32 @@ const testTxt2SpeechWithOpenAIProviderCore = async function() {
assert(valueOfValue === srcValue, "valueOf() should match src for OpenAI provider");
};

const testTxt2SpeechWithElevenLabsProviderCore = async function() {
// Test ElevenLabs provider in test mode to avoid external calls
const result = await puter.ai.txt2speech(
"Hello, this is an ElevenLabs provider test.",
{ provider: "elevenlabs", voice: "21m00Tcm4TlvDq8ikWAM" },
true,
);

assert(result instanceof Audio, "txt2speech should return an Audio object for ElevenLabs provider");
assert(result !== null, "txt2speech should not return null for ElevenLabs provider");

const toStringValue = result.toString();
const valueOfValue = result.valueOf();
const srcValue = result.src;

assert(typeof toStringValue === 'string', "toString() should return a string for ElevenLabs provider");
assert(typeof valueOfValue === 'string', "valueOf() should return a string for ElevenLabs provider");
assert(typeof srcValue === 'string', "src should be a string for ElevenLabs provider");
assert(toStringValue.length > 0, "toString() should not be empty for ElevenLabs provider");
assert(valueOfValue.length > 0, "valueOf() should not be empty for ElevenLabs provider");
assert(srcValue.length > 0, "src should not be empty for ElevenLabs provider");

assert(toStringValue === srcValue, "toString() should match src for ElevenLabs provider");
assert(valueOfValue === srcValue, "valueOf() should match src for ElevenLabs provider");
};

// Export test functions
window.txt2speechTests = [
{
Expand Down Expand Up @@ -209,5 +235,18 @@ window.txt2speechTests = [
fail("testTxt2SpeechWithOpenAIProvider failed:", error);
}
}
},

{
name: "testTxt2SpeechWithElevenLabsProvider",
description: "Test text-to-speech using the ElevenLabs provider in test mode",
test: async function() {
try {
await testTxt2SpeechWithElevenLabsProviderCore();
pass("testTxt2SpeechWithElevenLabsProvider passed");
} catch (error) {
fail("testTxt2SpeechWithElevenLabsProvider failed:", error);
}
}
}
];