physql/voice_trigger.py at main · Brayden/physql · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import time
import pyaudio
import speech_recognition as sr
import requests
import os
import sys

def extract_instruction(transcription, trigger_word):
    """Extract the instruction part from the transcription."""
    trigger_word_position = transcription.lower().find(trigger_word)
    if (trigger_word_position != -1):
        return transcription[trigger_word_position + len(trigger_word):].strip()
    return None

def send_instruction_to_api(instruction):
    """Send the extracted instruction to the local API."""
    url = f"http://localhost:3000/instruction?instruction={instruction}&index=0"
    try:
        response = requests.get(url)
        response.raise_for_status()  # Raise an error for bad responses
        print(f"API response: {response.status_code}, {response.text}")
    except requests.exceptions.RequestException as e:
        print(f"Error sending instruction to API: {e}")

def handle_audio(recognizer, audio):
    """Handle the audio input and detect trigger word and instructions."""
    trigger_word = "houston"

    try:
        transcription = recognizer.recognize_google(audio)
        print(f"You said: {transcription}")

        if trigger_word in transcription.lower():
            print(f"Trigger word '{trigger_word}' detected!")
            instruction = extract_instruction(transcription, trigger_word)
            if instruction:
                print(f"Received instruction: {instruction}")
                send_instruction_to_api(instruction)
            else:
                print("No instruction received.")
    except sr.UnknownValueError:
        print("Could not understand audio")
    except sr.RequestError as e:
        print(f"Could not request results; {e}")
        if 'Broken pipe' in str(e):
                print("Broken pipe detected, restarting script...")
                restart_script()
        # Could not request results; recognition connection failed: [Errno 32] Broken pipe

def restart_script():
    """Restart the script."""
    print("Restarting script...")
    os.execv(sys.executable, [sys.executable] + sys.argv)

def main():
    recognizer = sr.Recognizer()
    microphone = sr.Microphone()

    # Adjust for ambient noise once at the beginning
    with microphone as source:
        recognizer.adjust_for_ambient_noise(source)

    print("Listening for trigger word 'Zeke'...")

    def callback(recognizer, audio):
        handle_audio(recognizer, audio)

    # Use listen_in_background for non-blocking audio capture
    stop_listening = recognizer.listen_in_background(microphone, callback, phrase_time_limit=10)

    try:
        while True:
            # Keep the main thread alive
            pass
    except KeyboardInterrupt:
        stop_listening(wait_for_stop=False)
        print("Stopped listening")

if __name__ == "__main__":
    while True:
        try:
            main()
        except Exception as e:
            print(f"Error: {e}")
            if 'Broken pipe' in str(e):
                print("Broken pipe detected, restarting script...")
            else:
                print("Unexpected error, restarting script...")
            time.sleep(5)  # Wait a bit before restarting to avoid rapid restarts


# import pyaudio
# import speech_recognition as sr

# def extract_instruction(transcription, trigger_word):
#     """Extract the instruction part from the transcription."""
#     trigger_word_position = transcription.lower().find(trigger_word)
#     if trigger_word_position != -1:
#         return transcription[trigger_word_position + len(trigger_word):].strip()
#     return None

# def handle_audio(recognizer, audio):
#     """Handle the audio input and detect trigger word and instructions."""
#     trigger_word = "hello world"

#     try:
#         transcription = recognizer.recognize_google(audio)
#         print(f"You said: {transcription}")

#         if trigger_word in transcription.lower():
#             print(f"Trigger word '{trigger_word}' detected!")
#             instruction = extract_instruction(transcription, trigger_word)
#             if instruction:
#                 print(f"Received instruction: {instruction}")
#             else:
#                 print("No instruction received.")
#     except sr.UnknownValueError:
#         print("Could not understand audio")
#     except sr.RequestError as e:
#         print(f"Could not request results; {e}")

# def main():
#     recognizer = sr.Recognizer()
#     microphone = sr.Microphone()

#     # Adjust for ambient noise once at the beginning
#     with microphone as source:
#         recognizer.adjust_for_ambient_noise(source)

#     print("Listening for trigger word 'Hello world'...")

#     # Use listen_in_background for non-blocking audio capture
#     stop_listening = recognizer.listen_in_background(microphone, handle_audio)

#     try:
#         while True:
#             # Keep the main thread alive
#             pass
#     except KeyboardInterrupt:
#         stop_listening(wait_for_stop=False)
#         print("Stopped listening")

# if __name__ == "__main__":
#     main()