77} from "@elizaos/core" ;
88import { ElevenLabsClient } from "@elevenlabs/elevenlabs-js" ;
99import {
10- SpeechToTextConvertRequestModelId ,
11- SpeechToTextConvertRequestTimestampsGranularity ,
12- TextToSpeechStreamRequestOutputFormat ,
10+ SpeechToTextConvertRequestModelId as SttModelIdEnum ,
11+ SpeechToTextConvertRequestTimestampsGranularity as SttTimestampsGranularityEnum ,
12+ TextToSpeechStreamRequestOutputFormat as TtsOutputFormatEnum ,
1313} from "@elevenlabs/elevenlabs-js/api" ;
1414import type {
1515 BodySpeechToTextV1SpeechToTextPost ,
@@ -23,14 +23,14 @@ import type {
2323function parseTtsOutputFormat (
2424 format : string ,
2525) : TextToSpeechStreamRequestOutputFormat {
26- for ( const allowed of Object . values ( TextToSpeechStreamRequestOutputFormat ) ) {
26+ for ( const allowed of Object . values ( TtsOutputFormatEnum ) ) {
2727 if ( allowed === format ) return allowed ;
2828 }
2929 throw new Error ( `Unsupported ElevenLabs TTS output format: ${ format } ` ) ;
3030}
3131
3232function parseSttModelId ( id : string ) : SpeechToTextConvertRequestModelId {
33- for ( const allowed of Object . values ( SpeechToTextConvertRequestModelId ) ) {
33+ for ( const allowed of Object . values ( SttModelIdEnum ) ) {
3434 if ( allowed === id ) return allowed ;
3535 }
3636 throw new Error ( `Unsupported ElevenLabs STT model: ${ id } ` ) ;
@@ -39,9 +39,7 @@ function parseSttModelId(id: string): SpeechToTextConvertRequestModelId {
3939function parseSttTimestampsGranularity (
4040 value : string ,
4141) : SpeechToTextConvertRequestTimestampsGranularity {
42- for ( const allowed of Object . values (
43- SpeechToTextConvertRequestTimestampsGranularity ,
44- ) ) {
42+ for ( const allowed of Object . values ( SttTimestampsGranularityEnum ) ) {
4543 if ( allowed === value ) return allowed ;
4644 }
4745 throw new Error (
@@ -86,47 +84,57 @@ interface TranscriptionSettings {
8684}
8785
8886function isBrowser ( ) : boolean {
89- return (
90- typeof globalThis !== "undefined" &&
91- typeof ( globalThis as unknown as { document ?: unknown } ) . document !==
92- "undefined"
93- ) ;
87+ return typeof globalThis . document !== "undefined" ;
9488}
9589
90+ function getSetting ( runtime : IAgentRuntime , key : string ) : string | undefined ;
91+ function getSetting (
92+ runtime : IAgentRuntime ,
93+ key : string ,
94+ fallback : string ,
95+ ) : string ;
9696function getSetting (
9797 runtime : IAgentRuntime ,
9898 key : string ,
9999 fallback ?: string ,
100- ) : string {
100+ ) : string | undefined {
101+ const rawRuntime = runtime . getSetting ( key ) ;
102+ const fromRuntime =
103+ rawRuntime === null || rawRuntime === undefined
104+ ? undefined
105+ : String ( rawRuntime ) ;
106+
101107 const envValue =
102108 typeof process !== "undefined" &&
103- ( process as { env ?: Record < string , string > } ) . env
104- ? ( process as { env : Record < string , string > } ) . env [ key ]
109+ process . env &&
110+ typeof process . env [ key ] === "string"
111+ ? process . env [ key ]
105112 : undefined ;
106- return (
107- ( runtime . getSetting ( key ) as string ) ??
108- ( envValue as string ) ??
109- ( fallback as string )
110- ) ;
113+
114+ return fromRuntime ?? envValue ?? fallback ;
111115}
112116
113117function getBaseURL ( runtime : IAgentRuntime ) : string {
114- const browserURL = runtime . getSetting ( "ELEVENLABS_BROWSER_URL" ) as
115- | string
116- | undefined ;
118+ const browserRaw = runtime . getSetting ( "ELEVENLABS_BROWSER_URL" ) ;
119+ const browserURL =
120+ browserRaw === null || browserRaw === undefined
121+ ? undefined
122+ : String ( browserRaw ) ;
117123 if ( isBrowser ( ) && browserURL ) return browserURL ;
118124 return "https://api.elevenlabs.io/v1" ;
119125}
120126
121127function getApiKey ( runtime : IAgentRuntime ) : string | undefined {
122- const env =
123- ( typeof process !== "undefined" &&
124- ( process as { env ?: Record < string , string > } ) . env ) ||
125- { } ;
126- return (
127- ( runtime . getSetting ( "ELEVENLABS_API_KEY" ) as string | undefined ) ||
128- ( env . ELEVENLABS_API_KEY as string | undefined )
129- ) ;
128+ const raw = runtime . getSetting ( "ELEVENLABS_API_KEY" ) ;
129+ const fromRuntime =
130+ raw === null || raw === undefined ? undefined : String ( raw ) ;
131+ const fromEnv =
132+ typeof process !== "undefined" &&
133+ process . env &&
134+ typeof process . env . ELEVENLABS_API_KEY === "string"
135+ ? process . env . ELEVENLABS_API_KEY
136+ : undefined ;
137+ return fromRuntime ?? fromEnv ;
130138}
131139
132140/**
@@ -154,7 +162,7 @@ function getVoiceSettings(runtime: IAgentRuntime): VoiceSettings {
154162 ) ,
155163 style : getSetting ( runtime , "ELEVENLABS_VOICE_STYLE" , "0" ) ,
156164 speakerBoost : parseBooleanFromText (
157- `${ getSetting ( runtime , "ELEVENLABS_VOICE_USE_SPEAKER_BOOST" , "true" ) } ` as string ,
165+ `${ getSetting ( runtime , "ELEVENLABS_VOICE_USE_SPEAKER_BOOST" , "true" ) ?? "true" } ` ,
158166 ) ,
159167 } ;
160168}
@@ -175,11 +183,11 @@ function getTranscriptionSettings(
175183 "word" ,
176184 ) ,
177185 diarize : parseBooleanFromText (
178- `${ getSetting ( runtime , "ELEVENLABS_STT_DIARIZE" , "false" ) } ` as string ,
186+ `${ getSetting ( runtime , "ELEVENLABS_STT_DIARIZE" , "false" ) ?? "false" } ` ,
179187 ) ,
180188 numSpeakers : numSpeakersStr ? Number ( numSpeakersStr ) : undefined ,
181189 tagAudioEvents : parseBooleanFromText (
182- `${ getSetting ( runtime , "ELEVENLABS_STT_TAG_AUDIO_EVENTS" , "false" ) } ` as string ,
190+ `${ getSetting ( runtime , "ELEVENLABS_STT_TAG_AUDIO_EVENTS" , "false" ) ?? "false" } ` ,
183191 ) ,
184192 } ;
185193}
@@ -241,7 +249,7 @@ async function fetchSpeech(
241249 const stream = await client . textToSpeech . stream ( params . voiceId , {
242250 text : params . text ,
243251 modelId : params . modelId ,
244- outputFormat : params . outputFormat as any ,
252+ outputFormat : parseTtsOutputFormat ( params . outputFormat ) ,
245253 optimizeStreamingLatency : Number ( params . latency ) || 0 ,
246254 voiceSettings : {
247255 stability : Number ( params . stability ) || 0 ,
@@ -283,48 +291,39 @@ async function fetchTranscription(
283291 baseUrl,
284292 } ) ;
285293
286- const requestParams : any = {
287- modelId : params . modelId ,
288- audio : params . audioFile ,
294+ const body : BodySpeechToTextV1SpeechToTextPost = {
295+ modelId : parseSttModelId ( params . modelId ) ,
296+ file : params . audioFile ,
289297 } ;
290298
291299 if ( params . languageCode ) {
292- requestParams . languageCode = params . languageCode ;
300+ body . languageCode = params . languageCode ;
293301 }
294302
295303 if ( params . timestampsGranularity !== "none" ) {
296- requestParams . timestampsGranularity = params . timestampsGranularity ;
304+ body . timestampsGranularity = parseSttTimestampsGranularity (
305+ params . timestampsGranularity ,
306+ ) ;
297307 }
298308
299309 if ( params . diarize ) {
300- requestParams . diarize = true ;
301- if ( params . numSpeakers ) {
302- requestParams . numSpeakers = params . numSpeakers ;
310+ body . diarize = true ;
311+ if ( params . numSpeakers !== undefined ) {
312+ body . numSpeakers = params . numSpeakers ;
303313 }
304314 }
305315
306316 if ( params . tagAudioEvents ) {
307- requestParams . tagAudioEvents = true ;
317+ body . tagAudioEvents = true ;
308318 }
309319
310- const response = await client . speechToText . convert ( requestParams ) ;
320+ const response = await client . speechToText . convert ( body ) ;
311321
312322 if ( ! response ) {
313323 throw new Error ( "Empty response from ElevenLabs STT API" ) ;
314324 }
315325
316- let transcript = "" ;
317- if ( "transcript" in response && response . transcript ) {
318- const transcriptObj = response . transcript as { text ?: string } ;
319- transcript = transcriptObj . text || "" ;
320- } else if ( "transcripts" in response && response . transcripts ) {
321- const transcriptsArray = response . transcripts as Array < { text ?: string } > ;
322- transcript = transcriptsArray
323- . map ( ( t : { text ?: string } ) => t . text || "" )
324- . join ( "\n" ) ;
325- }
326-
327- return transcript ;
326+ return extractTranscript ( response ) ;
328327 } catch ( error : unknown ) {
329328 const msg = error instanceof Error ? error . message : String ( error ) ;
330329 logger . error ( `ElevenLabs fetchTranscription error: ${ msg } ` ) ;
@@ -392,8 +391,7 @@ export const elevenLabsPlugin: Plugin = {
392391 const settings = getVoiceSettings ( runtime ) ;
393392 const resolvedModel = options . model || settings . model ;
394393 // Prefer explicit ElevenLabs voiceId param; fall back to configured voiceId.
395- const resolvedVoiceId =
396- ( options . voiceId as string | undefined ) || settings . voiceId ;
394+ const resolvedVoiceId = options . voiceId ?? settings . voiceId ;
397395 // Honor explicit caller-provided format (e.g., "pcm_16000", "mp3_22050_64").
398396 // Gracefully map generic "mp3" to a valid ElevenLabs enum, otherwise pass through.
399397 // Only default to settings.outputFormat when absent.
@@ -561,26 +559,26 @@ export const elevenLabsPlugin: Plugin = {
561559
562560 const testText = "Hello from ElevenLabs test." ;
563561 try {
564- const audioStream = ( await runtime . useModel (
562+ const audio = await runtime . useModel (
565563 ModelType . TEXT_TO_SPEECH ,
566564 testText ,
567- ) ) as ReadableStream < Uint8Array > ;
568-
569- if (
570- ! audioStream ||
571- typeof ( audioStream as { getReader ?: unknown } ) . getReader !==
572- "function"
573- ) {
574- throw new Error ( "TTS output is not a Web ReadableStream" ) ;
575- }
565+ ) ;
566+
567+ const bytes : Uint8Array | null =
568+ audio instanceof Uint8Array
569+ ? audio
570+ : Buffer . isBuffer ( audio )
571+ ? new Uint8Array ( audio )
572+ : audio instanceof ArrayBuffer
573+ ? new Uint8Array ( audio )
574+ : null ;
576575
577- const reader = audioStream . getReader ( ) ;
578- const { value, done } = await reader . read ( ) ;
579- reader . releaseLock ( ) ;
580- if ( done && ! value ) {
581- throw new Error ( "Received empty audio stream" ) ;
576+ if ( ! bytes || bytes . byteLength === 0 ) {
577+ throw new Error (
578+ "TTS output must be non-empty Uint8Array, Buffer, or ArrayBuffer" ,
579+ ) ;
582580 }
583- logger . success ( "Received audio stream chunk successfully" ) ;
581+ logger . success ( "Received TTS binary payload successfully" ) ;
584582 } catch ( error : unknown ) {
585583 const msg =
586584 error instanceof Error ? error . message : String ( error ) ;
0 commit comments