Skip to content

Commit 4f2c531

Browse files
yorrickclaudelongcw
authored
feat(elevenlabs): add STTv2 with streaming support for Scribe v2 (livekit#3909)
Co-authored-by: Claude <[email protected]> Co-authored-by: Long Chen <[email protected]>
1 parent f57d39b commit 4f2c531

File tree

4 files changed

+476
-1
lines changed

4 files changed

+476
-1
lines changed
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
import logging
2+
3+
from dotenv import load_dotenv
4+
5+
from livekit.agents import Agent, AgentSession, JobContext, JobProcess, WorkerOptions, cli
6+
from livekit.plugins import elevenlabs, openai, silero
7+
8+
logger = logging.getLogger("realtime-scribe-v2")
9+
logger.setLevel(logging.INFO)
10+
11+
load_dotenv()
12+
13+
14+
async def entrypoint(ctx: JobContext):
15+
stt = elevenlabs.STTv2(
16+
model_id="scribe_v2_realtime",
17+
vad_silence_threshold_secs=0.5,
18+
vad_threshold=0.5,
19+
min_speech_duration_ms=100,
20+
min_silence_duration_ms=300,
21+
)
22+
23+
session = AgentSession(
24+
allow_interruptions=True,
25+
vad=ctx.proc.userdata["vad"],
26+
stt=stt,
27+
llm=openai.LLM(model="gpt-4.1-mini"),
28+
tts=elevenlabs.TTS(model="eleven_turbo_v2_5"),
29+
)
30+
await session.start(
31+
agent=Agent(instructions="You are a somewhat helpful assistant."), room=ctx.room
32+
)
33+
34+
await session.say("Hello, how can I help you?")
35+
36+
37+
def prewarm(proc: JobProcess):
38+
proc.userdata["vad"] = silero.VAD.load()
39+
40+
41+
if __name__ == "__main__":
42+
cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint, prewarm_fnc=prewarm))

livekit-plugins/livekit-plugins-elevenlabs/livekit/plugins/elevenlabs/__init__.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,18 +17,23 @@
1717
See https://docs.livekit.io/agents/integrations/tts/elevenlabs/ for more information.
1818
"""
1919

20-
from .models import TTSEncoding, TTSModels
20+
from .models import STTAudioFormat, STTModels, TTSEncoding, TTSModels
2121
from .stt import STT
22+
from .stt_v2 import SpeechStreamv2, STTv2
2223
from .tts import DEFAULT_VOICE_ID, TTS, Voice, VoiceSettings
2324
from .version import __version__
2425

2526
__all__ = [
2627
"STT",
28+
"STTv2",
29+
"SpeechStreamv2",
2730
"TTS",
2831
"Voice",
2932
"VoiceSettings",
3033
"TTSEncoding",
3134
"TTSModels",
35+
"STTModels",
36+
"STTAudioFormat",
3237
"DEFAULT_VOICE_ID",
3338
"__version__",
3439
]

livekit-plugins/livekit-plugins-elevenlabs/livekit/plugins/elevenlabs/models.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,3 +20,14 @@
2020
"mp3_44100_128",
2121
"mp3_44100_192",
2222
]
23+
24+
STTModels = Literal["scribe_v2_realtime",]
25+
26+
STTAudioFormat = Literal[
27+
"pcm_8000",
28+
"pcm_16000",
29+
"pcm_22050",
30+
"pcm_24000",
31+
"pcm_44100",
32+
"pcm_48000",
33+
]

0 commit comments

Comments
 (0)