kh_Assistant/voice_worker.py at main · Momokh99/kh_Assistant · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
#!/usr/bin/env python3
"""
Voice Worker - Speech-to-Text using Vosk
Runs in background thread to avoid freezing UI
"""

import json
import pyaudio
from vosk import Model, KaldiRecognizer
from PyQt6.QtCore import QThread, pyqtSignal
import os

# Suppress ALSA error messages (they're harmless warnings)
os.environ['ALSA_LOG_LEVEL'] = '0'

class VoiceWorker(QThread):
    """
    Speech-to-Text worker that runs in a background thread.
    """

    # Signal emitted when speech is recognized
    text_ready = pyqtSignal(str)

    # Signal emitted when an error occurs
    error_occurred = pyqtSignal(str)

    def __init__(self, model_path="models/vosk-model-small-en-us-0.15"):
        super().__init__()
        self.model_path = model_path
        self.is_running = False
        self.audio = None
        self.stream = None
        self.recognizer = None

    def run(self):
        """Main thread execution. Runs in background."""
        try:
            # Load Vosk model
            model = Model(self.model_path)

            # Setup audio recording
            self.audio = pyaudio.PyAudio()
            device_index=self._get_default_input_device()

            self.stream = self.audio.open(
                format=pyaudio.paInt16,
                channels=1,
                rate=16000,
                input=True,
                input_device_index=device_index,
                frames_per_buffer=2000  # Smaller buffer = more responsive stop
            )

            self.recognizer = KaldiRecognizer(model, 16000)
            self.is_running = True

            # Process audio in small chunks with stop check
            while self.is_running:
                # Read small chunk (non-blocking check via is_running)
                data = self.stream.read(2000, exception_on_overflow=False)

                if not self.is_running:
                    break

                # Feed to recognizer
                if self.recognizer.AcceptWaveform(data):
                    result = json.loads(self.recognizer.Result())
                    if result.get("text"):
                        self.text_ready.emit(result["text"])
                        break  # Stop after one sentence (auto-stop behavior)

            # Cleanup
            self._cleanup()

        except Exception as e:
            self.error_occurred.emit(str(e))
            self._cleanup()
    def _get_default_input_device(self):
        """Find the default input (microphone) device"""
        p = pyaudio.PyAudio()
        default_index = p.get_default_input_device_info()['index']
        p.terminate()
        return default_index
    def _cleanup(self):
        """Internal cleanup method"""
        self.is_running = False

        if self.stream:
            try:
                self.stream.stop_stream()
                self.stream.close()
            except:
                pass
            self.stream = None

        if self.audio:
            try:
                self.audio.terminate()
            except:
                pass
            self.audio = None

    def stop(self):
        """Stop listening - called from main thread"""
        self.is_running = False
        # Note: stream.read() will return after current chunk, then loop exits