-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathvoice_worker.py
More file actions
106 lines (88 loc) · 3.31 KB
/
voice_worker.py
File metadata and controls
106 lines (88 loc) · 3.31 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
#!/usr/bin/env python3
"""
Voice Worker - Speech-to-Text using Vosk
Runs in background thread to avoid freezing UI
"""
import json
import pyaudio
from vosk import Model, KaldiRecognizer
from PyQt6.QtCore import QThread, pyqtSignal
import os
# Suppress ALSA error messages (they're harmless warnings)
os.environ['ALSA_LOG_LEVEL'] = '0'
class VoiceWorker(QThread):
"""
Speech-to-Text worker that runs in a background thread.
"""
# Signal emitted when speech is recognized
text_ready = pyqtSignal(str)
# Signal emitted when an error occurs
error_occurred = pyqtSignal(str)
def __init__(self, model_path="models/vosk-model-small-en-us-0.15"):
super().__init__()
self.model_path = model_path
self.is_running = False
self.audio = None
self.stream = None
self.recognizer = None
def run(self):
"""Main thread execution. Runs in background."""
try:
# Load Vosk model
model = Model(self.model_path)
# Setup audio recording
self.audio = pyaudio.PyAudio()
device_index=self._get_default_input_device()
self.stream = self.audio.open(
format=pyaudio.paInt16,
channels=1,
rate=16000,
input=True,
input_device_index=device_index,
frames_per_buffer=2000 # Smaller buffer = more responsive stop
)
self.recognizer = KaldiRecognizer(model, 16000)
self.is_running = True
# Process audio in small chunks with stop check
while self.is_running:
# Read small chunk (non-blocking check via is_running)
data = self.stream.read(2000, exception_on_overflow=False)
if not self.is_running:
break
# Feed to recognizer
if self.recognizer.AcceptWaveform(data):
result = json.loads(self.recognizer.Result())
if result.get("text"):
self.text_ready.emit(result["text"])
break # Stop after one sentence (auto-stop behavior)
# Cleanup
self._cleanup()
except Exception as e:
self.error_occurred.emit(str(e))
self._cleanup()
def _get_default_input_device(self):
"""Find the default input (microphone) device"""
p = pyaudio.PyAudio()
default_index = p.get_default_input_device_info()['index']
p.terminate()
return default_index
def _cleanup(self):
"""Internal cleanup method"""
self.is_running = False
if self.stream:
try:
self.stream.stop_stream()
self.stream.close()
except:
pass
self.stream = None
if self.audio:
try:
self.audio.terminate()
except:
pass
self.audio = None
def stop(self):
"""Stop listening - called from main thread"""
self.is_running = False
# Note: stream.read() will return after current chunk, then loop exits