From 9b54a01dd4b4aa70cbf70346026abe814e46441a Mon Sep 17 00:00:00 2001 From: hogaku Date: Fri, 5 May 2023 22:45:29 +0900 Subject: [PATCH] Add: sts sample using openai --- .../Speech_to_Speech_Chat [LINK]/.env | 4 + .../openai-speech.py | 82 +++++++++++++++++++ .../requirements.txt | 4 + 3 files changed, 90 insertions(+) create mode 100644 End_to_end_Solutions/Speech_to_Speech_Chat [LINK]/.env create mode 100644 End_to_end_Solutions/Speech_to_Speech_Chat [LINK]/openai-speech.py create mode 100644 End_to_end_Solutions/Speech_to_Speech_Chat [LINK]/requirements.txt diff --git a/End_to_end_Solutions/Speech_to_Speech_Chat [LINK]/.env b/End_to_end_Solutions/Speech_to_Speech_Chat [LINK]/.env new file mode 100644 index 00000000..2a50a009 --- /dev/null +++ b/End_to_end_Solutions/Speech_to_Speech_Chat [LINK]/.env @@ -0,0 +1,4 @@ +AZURE_OPENAI_SERVICE_KEY="" +AZURE_OPENAI_SERVICE_ENDPOINT="" +SPEECH_KEY="" +SPEECH_REGION="" \ No newline at end of file diff --git a/End_to_end_Solutions/Speech_to_Speech_Chat [LINK]/openai-speech.py b/End_to_end_Solutions/Speech_to_Speech_Chat [LINK]/openai-speech.py new file mode 100644 index 00000000..1b9885e0 --- /dev/null +++ b/End_to_end_Solutions/Speech_to_Speech_Chat [LINK]/openai-speech.py @@ -0,0 +1,82 @@ +import os +import azure.cognitiveservices.speech as speechsdk +import openai +from dotenv import load_dotenv + +# This example requires environment variables named "OPEN_AI_KEY" and "OPEN_AI_ENDPOINT" +# Your endpoint should look like the following https://YOUR_OPEN_AI_RESOURCE_NAME.openai.azure.com/ +load_dotenv(verbose=True) + +openai.api_key = os.environ.get('AZURE_OPENAI_SERVICE_KEY') +openai.api_base = os.environ.get('AZURE_OPENAI_SERVICE_ENDPOINT') +openai.api_type = 'azure' +openai.api_version = '2023-03-15-preview' + +# This will correspond to the custom name you chose for your deployment when you deployed a model. +deployment_id='gpt-35-turbo' + +# This example requires environment variables named "SPEECH_KEY" and "SPEECH_REGION" +speech_config = speechsdk.SpeechConfig(subscription=os.environ.get('SPEECH_KEY'), region=os.environ.get('SPEECH_REGION')) +audio_output_config = speechsdk.audio.AudioOutputConfig(use_default_speaker=True) +audio_config = speechsdk.audio.AudioConfig(use_default_microphone=True) + +# Should be the locale for the speaker's language. +speech_config.speech_recognition_language="en-US" +speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config) + +# The language of the voice that responds on behalf of Azure OpenAI. +speech_config.speech_synthesis_voice_name='en-US-JennyMultilingualNeural' +speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=audio_output_config) + +# Prompts Azure OpenAI with a request and synthesizes the response. +def ask_openai(prompt): + + # Ask Azure OpenAI + response = openai.Completion.create(engine=deployment_id, prompt=prompt, max_tokens=100) + text = response['choices'][0]['text'].replace('\n', ' ').replace(' .', '.').strip() + print('Azure OpenAI response:' + text) + + # Azure text-to-speech output + speech_synthesis_result = speech_synthesizer.speak_text_async(text).get() + + # Check result + if speech_synthesis_result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted: + print("Speech synthesized to speaker for text [{}]".format(text)) + elif speech_synthesis_result.reason == speechsdk.ResultReason.Canceled: + cancellation_details = speech_synthesis_result.cancellation_details + print("Speech synthesis canceled: {}".format(cancellation_details.reason)) + if cancellation_details.reason == speechsdk.CancellationReason.Error: + print("Error details: {}".format(cancellation_details.error_details)) + +# Continuously listens for speech input to recognize and send as text to Azure OpenAI +def chat_with_open_ai(): + while True: + print("Azure OpenAI is listening. Say 'Stop' or press Ctrl-Z to end the conversation.") + try: + # Get audio from the microphone and then send it to the TTS service. + speech_recognition_result = speech_recognizer.recognize_once_async().get() + + # If speech is recognized, send it to Azure OpenAI and listen for the response. + if speech_recognition_result.reason == speechsdk.ResultReason.RecognizedSpeech: + if speech_recognition_result.text == "Stop.": + print("Conversation ended.") + break + print("Recognized speech: {}".format(speech_recognition_result.text)) + ask_openai(speech_recognition_result.text) + elif speech_recognition_result.reason == speechsdk.ResultReason.NoMatch: + print("No speech could be recognized: {}".format(speech_recognition_result.no_match_details)) + break + elif speech_recognition_result.reason == speechsdk.ResultReason.Canceled: + cancellation_details = speech_recognition_result.cancellation_details + print("Speech Recognition canceled: {}".format(cancellation_details.reason)) + if cancellation_details.reason == speechsdk.CancellationReason.Error: + print("Error details: {}".format(cancellation_details.error_details)) + except EOFError: + break + +# Main + +try: + chat_with_open_ai() +except Exception as err: + print("Encountered exception. {}".format(err)) \ No newline at end of file diff --git a/End_to_end_Solutions/Speech_to_Speech_Chat [LINK]/requirements.txt b/End_to_end_Solutions/Speech_to_Speech_Chat [LINK]/requirements.txt new file mode 100644 index 00000000..979bf14b --- /dev/null +++ b/End_to_end_Solutions/Speech_to_Speech_Chat [LINK]/requirements.txt @@ -0,0 +1,4 @@ +requests +azure-cognitiveservices-speech +openai +python-dotenv \ No newline at end of file