revira/AudioHandler.py at master · ests-masterrace/revira · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import sounddevice as sd
import numpy as np

AUDIO_CONFIG = {
    "CHANNELS": 1,
    "RATE": 16000,
    "CHUNK": 512,  # Reduced chunk size for lower latency
}


class AudioHandler:
    """Handles audio recording using sounddevice"""

    def __init__(self):
        self.frames = []
        self.latest_frame = None
        self.stream = None
        self.dtype = "int16"
        self.channels = AUDIO_CONFIG["CHANNELS"]
        self.rate = AUDIO_CONFIG["RATE"]
        self.chunk = AUDIO_CONFIG["CHUNK"]

    def callback(self, indata, frames, time, status):
        if status:
            print(status)
        self.frames.append(indata.copy())
        self.latest_frame = indata.copy()

    def start_recording(self):
        self.frames = []
        self.latest_frame = None
        self.stream = sd.InputStream(
            samplerate=self.rate,
            channels=self.channels,
            dtype=self.dtype,
            blocksize=self.chunk,
            callback=self.callback,
        )
        self.stream.start()

    def process_frame(self):
        if self.latest_frame is not None:
            return self.latest_frame.flatten().astype(np.float32) / 32768.0
        else:
            return np.zeros(self.chunk, dtype=np.float32)

    def stop_recording(self):
        if self.stream:
            self.stream.stop()
            self.stream.close()
            self.stream = None
        if not self.frames:
            return np.zeros(0, dtype=np.float32)
        audio_data = np.concatenate(self.frames, axis=0).flatten()
        audio_data = audio_data.astype(np.float32) / 32768.0
        self.frames = []
        return audio_data

    def cleanup(self):
        if self.stream:
            self.stream.stop()
            self.stream.close()