Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions real-time-voicebot/.env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
ASSEMBLYAI_API_KEY=your_assemblyai_api_key
OPENAI_API_KEY=your_openai_api_key
ELEVENLABS_API_KEY=your_elevenlabs_api_key
9 changes: 6 additions & 3 deletions real-time-voicebot/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,16 @@ Before running the application, you need API keys for the following services:
- [Get the API key for OpenAI here →](https://platform.openai.com/api-keys)
- [Get the API key for ElevenLabs here →](https://elevenlabs.io/app/sign-in)

Update the API keys in the code by replacing the placeholders in the `AI_Assistant` class.
Copy `.env.example` to `.env` and add your API keys:

```bash
cp .env.example .env
```

## Run the application

```bash
pip install assemblyai openai elevenlabs python-dotenv
python app.py
```
---
Expand All @@ -36,5 +41,3 @@ python app.py

## Contribution
Contributions are welcome! Please fork the repository and submit a pull request with your improvements.


58 changes: 23 additions & 35 deletions real-time-voicebot/app.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
import os
from dotenv import load_dotenv
import assemblyai as aai
from elevenlabs import stream
from elevenlabs.client import ElevenLabs
from openai import OpenAI

load_dotenv()

class AI_Assistant:
def __init__(self):
aai.settings.api_key = "<AssemblyAI API Key>"
self.openai_client = OpenAI(api_key = "<OpenAI API Key>")
self.elevenlabs_api_key = "<ElevenLabs API Key>"

self.elevenlabs_client = ElevenLabs(api_key = self.elevenlabs_api_key)

aai.settings.api_key = os.getenv("ASSEMBLYAI_API_KEY")
self.openai_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
self.elevenlabs_client = ElevenLabs(api_key=os.getenv("ELEVENLABS_API_KEY"))
self.transcriber = None

self.interaction = [
{"role":"system", "content":"You are a helpful travel guide in London, UK, helping a tourist plan their trip. Be conversational and concise in your responses."},
{"role": "system", "content": "You are a helpful travel guide in London, UK, helping a tourist plan their trip. Be conversational and concise in your responses."},
]

def stop_transcription(self):
Expand All @@ -24,15 +24,12 @@ def stop_transcription(self):

def on_open(self, session_opened: aai.RealtimeSessionOpened):
print("Session ID:", session_opened.session_id)
return

def on_error(self, error: aai.RealtimeError):
print("An error occured:", error)
return

def on_close(self):
print("Closing Session")
return

def on_data(self, transcript: aai.RealtimeTranscript):
if not transcript.text:
Expand All @@ -44,52 +41,43 @@ def on_data(self, transcript: aai.RealtimeTranscript):

def start_transcription(self):
self.transcriber = aai.RealtimeTranscriber(
sample_rate = 16000,
on_data = self.on_data,
on_error = self.on_error,
on_open = self.on_open,
on_close = self.on_close,
end_utterance_silence_threshold = 1000
sample_rate=16000,
on_data=self.on_data,
on_error=self.on_error,
on_open=self.on_open,
on_close=self.on_close,
end_utterance_silence_threshold=1000
)

self.transcriber.connect()
microphone_stream = aai.extras.MicrophoneStream(sample_rate=16000)
self.transcriber.stream(microphone_stream)

def generate_ai_response(self, transcript):

self.stop_transcription()

self.interaction.append({"role":"user", "content": transcript.text})
self.interaction.append({"role": "user", "content": transcript.text})
print(f"\nTourist: {transcript.text}", end="\r\n")

response = self.openai_client.chat.completions.create(
model = "gpt-3.5-turbo",
messages = self.interaction
model="gpt-3.5-turbo",
messages=self.interaction
)

ai_response = response.choices[0].message.content

self.generate_audio(ai_response)

self.start_transcription()
print(f"\nReal-time transcription: ", end="\r\n")


def generate_audio(self, text):

self.interaction.append({"role":"assistant", "content": text})
self.interaction.append({"role": "assistant", "content": text})
print(f"\nAI Guide: {text}")

audio_stream = self.elevenlabs_client.generate(
text = text,
voice = "Rachel",
stream = True
text=text,
voice="Rachel",
stream=True
)

stream(audio_stream)


greeting = "Thank you for calling London Travel Guide. My name is Rachel, how may I assist you?"
ai_assistant = AI_Assistant()
ai_assistant.generate_audio(greeting)
ai_assistant.start_transcription()
ai_assistant.start_transcription()