-
-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathapp.py
89 lines (76 loc) · 3 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import os
import azure.cognitiveservices.speech as speechsdk
import openai
# Set up OpenAI API credentials
openai.api_type = "azure"
openai.api_base = os.getenv("OPENAI_ENDPOINT")
openai.api_version = "2023-03-15-preview"
openai.api_key = os.getenv("OPENAI_API_KEY")
# Set up engine name
engine_name = "test"
# Set up Azure Speech-to-Text and Text-to-Speech credentials
speech_key = os.getenv("SPEECH_API_KEY")
service_region = "eastus"
speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region)
# Set up Azure Text-to-Speech language
speech_config.speech_synthesis_language = "en-NZ"
# Set up Azure Speech-to-Text language recognition
speech_config.speech_recognition_language = "en-NZ"
# Set up the voice configuration
speech_config.speech_synthesis_voice_name = "en-NZ-MollyNeural"
speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config)
# Define the speech-to-text function
def speech_to_text():
# Set up the audio configuration
audio_config = speechsdk.audio.AudioConfig(use_default_microphone=True)
# Create a speech recognizer and start the recognition
speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config)
print("Say something...")
result = speech_recognizer.recognize_once_async().get()
if result.reason == speechsdk.ResultReason.RecognizedSpeech:
return result.text
elif result.reason == speechsdk.ResultReason.NoMatch:
return "Sorry, I didn't catch that."
elif result.reason == speechsdk.ResultReason.Canceled:
return "Recognition canceled."
# Define the text-to-speech function
def text_to_speech(text):
try:
result = speech_synthesizer.speak_text_async(text).get()
if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
print("Text-to-speech conversion successful.")
return True
else:
print(f"Error synthesizing audio: {result}")
return False
except Exception as ex:
print(f"Error synthesizing audio: {ex}")
return False
# Define the Azure OpenAI language generation function
def generate_text(prompt):
response = openai.ChatCompletion.create(
engine=engine_name,
messages=[
{"role": "system", "content": "You are an AI assistant that helps people find information."},
{"role": "user", "content": prompt}
],
temperature=0.7,
max_tokens=800,
top_p=0.95,
frequency_penalty=0,
presence_penalty=0,
stop=None
)
return response['choices'][0]['message']['content']
# Main program loop
while True:
# Get input from user using speech-to-text
user_input = speech_to_text()
print(f"You said: {user_input}")
# Generate a response using OpenAI
prompt = f"Q: {user_input}\nA:"
response = generate_text(prompt)
#response = user_input
print(f"AI says: {response}")
# Convert the response to speech using text-to-speech
text_to_speech(response)