diff --git a/src/backend/src/filesystem/definitions/ts/fsentry.js b/src/backend/src/filesystem/definitions/ts/fsentry.js index 31ff0e64c6..37ca9ff16c 100644 --- a/src/backend/src/filesystem/definitions/ts/fsentry.js +++ b/src/backend/src/filesystem/definitions/ts/fsentry.js @@ -1,10 +1,10 @@ -'use strict'; +"use strict"; // Code generated by protoc-gen-ts_proto. DO NOT EDIT. // versions: // protoc-gen-ts_proto v2.8.0 // protoc v3.21.12 // source: fsentry.proto -Object.defineProperty(exports, '__esModule', { value: true }); +Object.defineProperty(exports, "__esModule", { value: true }); exports.FSEntry = exports.protobufPackage = void 0; /* eslint-disable */ const wire_1 = require("@bufbuild/protobuf/wire"); diff --git a/src/backend/src/modules/puterai/ElevenLabsTTSService.js b/src/backend/src/modules/puterai/ElevenLabsTTSService.js new file mode 100644 index 0000000000..b6bb7a3552 --- /dev/null +++ b/src/backend/src/modules/puterai/ElevenLabsTTSService.js @@ -0,0 +1,196 @@ +/* + * Copyright (C) 2024-present Puter Technologies Inc. + * + * This file is part of Puter. + * + * Puter is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +const { Readable } = require('stream'); +const APIError = require('../../api/APIError'); +const BaseService = require('../../services/BaseService'); +const { TypedValue } = require('../../services/drivers/meta/Runtime'); +const { Context } = require('../../util/context'); + +const DEFAULT_MODEL = 'eleven_multilingual_v2'; +const DEFAULT_VOICE_ID = '21m00Tcm4TlvDq8ikWAM'; // Common public "Rachel" sample voice +const DEFAULT_OUTPUT_FORMAT = 'mp3_44100_128'; +const SAMPLE_AUDIO_URL = 'https://puter-sample-data.puter.site/tts_example.mp3'; + +const ELEVENLABS_TTS_MODELS = [ + { id: DEFAULT_MODEL, name: 'Eleven Multilingual v2' }, + { id: 'eleven_flash_v2_5', name: 'Eleven Flash v2.5' }, + { id: 'eleven_turbo_v2_5', name: 'Eleven Turbo v2.5' }, + { id: 'eleven_v3', name: 'Eleven v3 Alpha' }, +]; + +/** + * ElevenLabs text-to-speech provider. + * Implements the `puter-tts` interface so the AI module can synthesize speech + * using ElevenLabs voices. + */ +class ElevenLabsTTSService extends BaseService { + /** @type {import('../../services/MeteringService/MeteringService').MeteringService} */ + get meteringService () { + return this.services.get('meteringService').meteringService; + } + + static IMPLEMENTS = { + ['driver-capabilities']: { + supports_test_mode (iface, method_name) { + return iface === 'puter-tts' && method_name === 'synthesize'; + }, + }, + ['puter-tts']: { + async list_voices () { + return this.listVoices(); + }, + async list_engines () { + return this.listEngines(); + }, + async synthesize (params) { + return this.synthesize(params); + }, + }, + }; + + async _init () { + const svcThere = this.global_config?.services?.elevenlabs ?? this.config?.services?.elevenlabs ?? this.config?.elevenlabs; + + this.apiKey = svcThere?.apiKey ?? svcThere?.api_key ?? svcThere?.key; + this.baseUrl = svcThere?.baseUrl ?? 'https://api.elevenlabs.io'; + this.defaultVoiceId = svcThere?.defaultVoiceId ?? svcThere?.voiceId ?? DEFAULT_VOICE_ID; + + if ( !this.apiKey ) { + throw new Error('ElevenLabs API key not configured'); + } + } + + async request (path, { method = 'GET', body, headers = {} } = {}) { + const response = await fetch(`${this.baseUrl}${path}`, { + method, + headers: { + 'xi-api-key': this.apiKey, + ...(body ? { 'Content-Type': 'application/json' } : {}), + ...headers, + }, + body: body ? JSON.stringify(body) : undefined, + }); + + if ( response.ok ) { + return response; + } + + let detail = null; + try { + detail = await response.json(); + } catch ( e ) { + // ignore + } + this.log.error('ElevenLabs request failed', { path, status: response.status, detail }); + throw APIError.create('internal_server_error', null, { provider: 'elevenlabs', status: response.status }); + } + + async listVoices () { + const res = await this.request('/v1/voices'); + const data = await res.json(); + const voices = Array.isArray(data?.voices) ? data.voices : Array.isArray(data) ? data : []; + + return voices + .map(voice => ({ + id: voice.voice_id || voice.voiceId || voice.id, + name: voice.name, + description: voice.description, + category: voice.category, + provider: 'elevenlabs', + labels: voice.labels, + supported_models: ELEVENLABS_TTS_MODELS.map(model => model.id), + })) + .filter(v => v.id && v.name); + } + + async listEngines () { + return ELEVENLABS_TTS_MODELS.map(model => ({ + id: model.id, + name: model.name, + provider: 'elevenlabs', + pricing_per_million_chars: 0, + })); + } + + async synthesize (params) { + const { + text, + voice, + model, + response_format, + output_format, + voice_settings, + voiceSettings, + test_mode, + } = params; + if ( test_mode ) { + return new TypedValue({ + $: 'string:url:web', + content_type: 'audio', + }, SAMPLE_AUDIO_URL); + } + + if ( typeof text !== 'string' || !text.trim() ) { + throw APIError.create('field_required', null, { key: 'text' }); + } + + const voiceId = voice || this.defaultVoiceId; + const modelId = model || DEFAULT_MODEL; + const desiredFormat = output_format || response_format || DEFAULT_OUTPUT_FORMAT; + + const actor = Context.get('actor'); + const usageKey = `elevenlabs:${modelId}:character`; + const usageAllowed = await this.meteringService.hasEnoughCreditsFor(actor, usageKey, text.length); + if ( !usageAllowed ) { + throw APIError.create('insufficient_funds'); + } + + const payload = { + text, + model_id: modelId, + output_format: desiredFormat, + }; + + const finalVoiceSettings = voice_settings ?? voiceSettings; + if ( finalVoiceSettings ) { + payload.voice_settings = finalVoiceSettings; + } + + const response = await this.request(`/v1/text-to-speech/${voiceId}`, { + method: 'POST', + body: payload, + }); + + const arrayBuffer = await response.arrayBuffer(); + const buffer = Buffer.from(arrayBuffer); + const stream = Readable.from(buffer); + + this.meteringService.incrementUsage(actor, usageKey, text.length); + + return new TypedValue({ + $: 'stream', + content_type: response.headers.get('content-type') || 'audio/mpeg', + }, stream); + } +} + +module.exports = { + ElevenLabsTTSService, +}; diff --git a/src/backend/src/modules/puterai/PuterAIModule.js b/src/backend/src/modules/puterai/PuterAIModule.js index bbd3e44ff5..d2dc35a11e 100644 --- a/src/backend/src/modules/puterai/PuterAIModule.js +++ b/src/backend/src/modules/puterai/PuterAIModule.js @@ -55,6 +55,11 @@ class PuterAIModule extends AdvancedBase { services.registerService('aws-polly', AWSPollyService); } + if ( config?.services?.['elevenlabs'] || config?.elevenlabs ) { + const { ElevenLabsTTSService } = require('./ElevenLabsTTSService'); + services.registerService('elevenlabs-tts', ElevenLabsTTSService); + } + if ( config?.services?.openai || config?.openai ) { const { OpenAICompletionServiceWrapper } = require('./OpenAiCompletionService/index.mjs'); services.registerService('openai-completion', OpenAICompletionServiceWrapper); diff --git a/src/backend/src/modules/puterai/doc/ai-services-config.md b/src/backend/src/modules/puterai/doc/ai-services-config.md index ab648442ad..ae171941e2 100644 --- a/src/backend/src/modules/puterai/doc/ai-services-config.md +++ b/src/backend/src/modules/puterai/doc/ai-services-config.md @@ -9,6 +9,10 @@ AI services are configured under the `services` block in the configuration file. "openai": { "apiKey": "sk-abcdefg..." }, + "elevenlabs": { + "apiKey": "eleven-api-key", + "defaultVoiceId": "optional-voice-id" + }, "deepseek": { "apiKey": "sk-xyz123..." }, diff --git a/src/backend/src/services/MeteringService/costMaps/elevenlabsCostMap.ts b/src/backend/src/services/MeteringService/costMaps/elevenlabsCostMap.ts new file mode 100644 index 0000000000..f3be1f4e39 --- /dev/null +++ b/src/backend/src/services/MeteringService/costMaps/elevenlabsCostMap.ts @@ -0,0 +1,13 @@ +// ElevenLabs Text-to-Speech Cost Map +// +// Pricing for ElevenLabs voices varies by model and plan tier. We don't yet +// have public micro-cent pricing, so we record usage with a zero cost. This +// prevents metering alerts while still tracking character counts for future +// cost attribution once pricing is finalized. + +export const ELEVENLABS_COST_MAP = { + 'elevenlabs:eleven_multilingual_v2:character': 11, + 'elevenlabs:eleven_turbo_v2_5:character': 11, + 'elevenlabs:eleven_flash_v2_5:character': 5.5, + 'elevenlabs:eleven_v3:character': 11, +}; diff --git a/src/backend/src/services/MeteringService/costMaps/index.ts b/src/backend/src/services/MeteringService/costMaps/index.ts index 9a4faca56d..9c18fb3c38 100644 --- a/src/backend/src/services/MeteringService/costMaps/index.ts +++ b/src/backend/src/services/MeteringService/costMaps/index.ts @@ -13,12 +13,14 @@ import { OPENROUTER_COST_MAP } from './openrouterCostMap'; import { OPENAI_VIDEO_COST_MAP } from './openaiVideoCostMap'; import { TOGETHER_COST_MAP } from './togetherCostMap'; import { XAI_COST_MAP } from './xaiCostMap'; +import { ELEVENLABS_COST_MAP } from './elevenlabsCostMap'; export const COST_MAPS = { ...AWS_POLLY_COST_MAP, ...AWS_TEXTRACT_COST_MAP, ...CLAUDE_COST_MAP, ...DEEPSEEK_COST_MAP, + ...ELEVENLABS_COST_MAP, ...GEMINI_COST_MAP, ...GROQ_COST_MAP, ...KV_COST_MAP, diff --git a/src/puter-js/src/modules/AI.js b/src/puter-js/src/modules/AI.js index 344baa337b..2005e8c3be 100644 --- a/src/puter-js/src/modules/AI.js +++ b/src/puter-js/src/modules/AI.js @@ -6,6 +6,7 @@ const normalizeTTSProvider = (value) => { } const lower = value.toLowerCase(); if ( lower === 'openai' ) return 'openai'; + if ( ['elevenlabs', 'eleven', '11labs', '11-labs', 'eleven-labs', 'elevenlabs-tts'].includes(lower) ) return 'elevenlabs'; if ( lower === 'aws' || lower === 'polly' || lower === 'aws-polly' ) return 'aws-polly'; return value; }; @@ -281,6 +282,10 @@ class AI { provider = 'openai'; } + if ( options.engine && normalizeTTSProvider(options.engine) === 'elevenlabs' && !options.provider ) { + provider = 'elevenlabs'; + } + if ( provider === 'openai' ) { if ( !options.model && typeof options.engine === 'string' ) { options.model = options.engine; @@ -295,6 +300,23 @@ class AI { options.response_format = 'mp3'; } delete options.engine; + } else if ( provider === 'elevenlabs' ) { + if ( ! options.voice ) { + options.voice = '21m00Tcm4TlvDq8ikWAM'; + } + if ( ! options.model && typeof options.engine === 'string' ) { + options.model = options.engine; + } + if ( ! options.model ) { + options.model = 'eleven_multilingual_v2'; + } + if ( ! options.output_format && !options.response_format ) { + options.output_format = 'mp3_44100_128'; + } + if ( options.response_format && !options.output_format ) { + options.output_format = options.response_format; + } + delete options.engine; } else { provider = 'aws-polly'; @@ -326,7 +348,9 @@ class AI { } } - const driverName = provider === 'openai' ? 'openai-tts' : 'aws-polly'; + const driverName = provider === 'openai' + ? 'openai-tts' + : (provider === 'elevenlabs' ? 'elevenlabs-tts' : 'aws-polly'); return await utils.make_driver_method(['source'], 'puter-tts', driverName, 'synthesize', { responseType: 'blob', @@ -449,7 +473,13 @@ class AI { params.provider = 'openai'; } - const driverName = provider === 'openai' ? 'openai-tts' : 'aws-polly'; + if ( provider === 'elevenlabs' ) { + params.provider = 'elevenlabs'; + } + + const driverName = provider === 'openai' + ? 'openai-tts' + : (provider === 'elevenlabs' ? 'elevenlabs-tts' : 'aws-polly'); return await utils.make_driver_method(['source'], 'puter-tts', driverName, 'list_engines', { responseType: 'text', @@ -478,7 +508,13 @@ class AI { delete params.engine; } - const driverName = provider === 'openai' ? 'openai-tts' : 'aws-polly'; + if ( provider === 'elevenlabs' ) { + params.provider = 'elevenlabs'; + } + + const driverName = provider === 'openai' + ? 'openai-tts' + : (provider === 'elevenlabs' ? 'elevenlabs-tts' : 'aws-polly'); return utils.make_driver_method(['source'], 'puter-tts', driverName, 'list_voices', { responseType: 'text', diff --git a/src/puter-js/test/txt2speech.test.js b/src/puter-js/test/txt2speech.test.js index 3dde1f950d..a3f2528384 100644 --- a/src/puter-js/test/txt2speech.test.js +++ b/src/puter-js/test/txt2speech.test.js @@ -157,6 +157,32 @@ const testTxt2SpeechWithOpenAIProviderCore = async function() { assert(valueOfValue === srcValue, "valueOf() should match src for OpenAI provider"); }; +const testTxt2SpeechWithElevenLabsProviderCore = async function() { + // Test ElevenLabs provider in test mode to avoid external calls + const result = await puter.ai.txt2speech( + "Hello, this is an ElevenLabs provider test.", + { provider: "elevenlabs", voice: "21m00Tcm4TlvDq8ikWAM" }, + true, + ); + + assert(result instanceof Audio, "txt2speech should return an Audio object for ElevenLabs provider"); + assert(result !== null, "txt2speech should not return null for ElevenLabs provider"); + + const toStringValue = result.toString(); + const valueOfValue = result.valueOf(); + const srcValue = result.src; + + assert(typeof toStringValue === 'string', "toString() should return a string for ElevenLabs provider"); + assert(typeof valueOfValue === 'string', "valueOf() should return a string for ElevenLabs provider"); + assert(typeof srcValue === 'string', "src should be a string for ElevenLabs provider"); + assert(toStringValue.length > 0, "toString() should not be empty for ElevenLabs provider"); + assert(valueOfValue.length > 0, "valueOf() should not be empty for ElevenLabs provider"); + assert(srcValue.length > 0, "src should not be empty for ElevenLabs provider"); + + assert(toStringValue === srcValue, "toString() should match src for ElevenLabs provider"); + assert(valueOfValue === srcValue, "valueOf() should match src for ElevenLabs provider"); +}; + // Export test functions window.txt2speechTests = [ { @@ -209,5 +235,18 @@ window.txt2speechTests = [ fail("testTxt2SpeechWithOpenAIProvider failed:", error); } } + }, + + { + name: "testTxt2SpeechWithElevenLabsProvider", + description: "Test text-to-speech using the ElevenLabs provider in test mode", + test: async function() { + try { + await testTxt2SpeechWithElevenLabsProviderCore(); + pass("testTxt2SpeechWithElevenLabsProvider passed"); + } catch (error) { + fail("testTxt2SpeechWithElevenLabsProvider failed:", error); + } + } } ];