Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 10 additions & 39 deletions homeassistant/components/assist_pipeline/audio_enhancer.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,8 @@
from abc import ABC, abstractmethod
from dataclasses import dataclass
import logging
import math

from pysilero_vad import SileroVoiceActivityDetector
from pymicro_vad import MicroVad
from pyspeex_noise import AudioProcessor

from .const import BYTES_PER_CHUNK
Expand Down Expand Up @@ -43,8 +42,8 @@ def enhance_chunk(self, audio: bytes, timestamp_ms: int) -> EnhancedAudioChunk:
"""Enhance chunk of PCM audio @ 16Khz with 16-bit mono samples."""


class SileroVadSpeexEnhancer(AudioEnhancer):
"""Audio enhancer that runs Silero VAD and speex."""
class MicroVadSpeexEnhancer(AudioEnhancer):
"""Audio enhancer that runs microVAD and speex."""

def __init__(
self, auto_gain: int, noise_suppression: int, is_vad_enabled: bool
Expand All @@ -70,49 +69,21 @@ def __init__(
self.noise_suppression,
)

self.vad: SileroVoiceActivityDetector | None = None

# We get 10ms chunks but Silero works on 32ms chunks, so we have to
# buffer audio. The previous speech probability is used until enough
# audio has been buffered.
self._vad_buffer: bytearray | None = None
self._vad_buffer_chunks = 0
self._vad_buffer_chunk_idx = 0
self._last_speech_probability: float | None = None
self.vad: MicroVad | None = None

if self.is_vad_enabled:
self.vad = SileroVoiceActivityDetector()

# VAD buffer is a multiple of 10ms, but Silero VAD needs 32ms.
self._vad_buffer_chunks = int(
math.ceil(self.vad.chunk_bytes() / BYTES_PER_CHUNK)
)
self._vad_leftover_bytes = self.vad.chunk_bytes() - BYTES_PER_CHUNK
self._vad_buffer = bytearray(self.vad.chunk_bytes())
_LOGGER.debug("Initialized Silero VAD")
self.vad = MicroVad()
_LOGGER.debug("Initialized microVAD")

def enhance_chunk(self, audio: bytes, timestamp_ms: int) -> EnhancedAudioChunk:
"""Enhance 10ms chunk of PCM audio @ 16Khz with 16-bit mono samples."""
speech_probability: float | None = None

assert len(audio) == BYTES_PER_CHUNK

if self.vad is not None:
# Run VAD
assert self._vad_buffer is not None
start_idx = self._vad_buffer_chunk_idx * BYTES_PER_CHUNK
self._vad_buffer[start_idx : start_idx + BYTES_PER_CHUNK] = audio

self._vad_buffer_chunk_idx += 1
if self._vad_buffer_chunk_idx >= self._vad_buffer_chunks:
# We have enough data to run Silero VAD (32 ms)
self._last_speech_probability = self.vad.process_chunk(
self._vad_buffer[: self.vad.chunk_bytes()]
)

# Copy leftover audio that wasn't processed to start
self._vad_buffer[: self._vad_leftover_bytes] = self._vad_buffer[
-self._vad_leftover_bytes :
]
self._vad_buffer_chunk_idx = 0
speech_probability = self.vad.Process10ms(audio)

if self.audio_processor is not None:
# Run noise suppression and auto gain
Expand All @@ -121,5 +92,5 @@ def enhance_chunk(self, audio: bytes, timestamp_ms: int) -> EnhancedAudioChunk:
return EnhancedAudioChunk(
audio=audio,
timestamp_ms=timestamp_ms,
speech_probability=self._last_speech_probability,
speech_probability=speech_probability,
)
2 changes: 1 addition & 1 deletion homeassistant/components/assist_pipeline/manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,5 @@
"integration_type": "system",
"iot_class": "local_push",
"quality_scale": "internal",
"requirements": ["pysilero-vad==3.2.0", "pyspeex-noise==1.0.2"]
"requirements": ["pymicro-vad==1.0.1", "pyspeex-noise==1.0.2"]
}
4 changes: 2 additions & 2 deletions homeassistant/components/assist_pipeline/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@
from homeassistant.util.hass_dict import HassKey
from homeassistant.util.limited_size_dict import LimitedSizeDict

from .audio_enhancer import AudioEnhancer, EnhancedAudioChunk, SileroVadSpeexEnhancer
from .audio_enhancer import AudioEnhancer, EnhancedAudioChunk, MicroVadSpeexEnhancer
from .const import (
ACKNOWLEDGE_PATH,
BYTES_PER_CHUNK,
Expand Down Expand Up @@ -633,7 +633,7 @@ def __post_init__(self) -> None:
# Initialize with audio settings
if self.audio_settings.needs_processor and (self.audio_enhancer is None):
# Default audio enhancer
self.audio_enhancer = SileroVadSpeexEnhancer(
self.audio_enhancer = MicroVadSpeexEnhancer(
self.audio_settings.auto_gain_dbfs,
self.audio_settings.noise_suppression_level,
self.audio_settings.is_vad_enabled,
Expand Down
2 changes: 1 addition & 1 deletion homeassistant/package_constraints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,10 @@ Pillow==12.0.0
propcache==0.4.1
psutil-home-assistant==0.0.1
PyJWT==2.10.1
pymicro-vad==1.0.1
PyNaCl==1.6.0
pyOpenSSL==25.3.0
pyserial==3.5
pysilero-vad==3.2.0
pyspeex-noise==1.0.2
python-slugify==8.0.4
PyTurboJPEG==1.8.0
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions requirements_all.txt

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions requirements_test_all.txt

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading