Skip to content

Commit 47e9402

Browse files
committed
feat(语音): 客户端 VAD 门控,播放时过滤回声防止自我打断
- 内联 SimpleVAD(RMS 振幅检测 + 防抖) - 播放时每帧麦克风数据先过 VAD,只有真实人声(RMS > 0.08)才发给服务器 - 播放停止时重置 VAD 状态,恢复正常发送 - 实现说话打断播放功能,同时避免回声触发误打断
1 parent e216862 commit 47e9402

1 file changed

Lines changed: 84 additions & 9 deletions

File tree

packages/project-neko-audio-service/src/native/audioServiceNative.ts

Lines changed: 84 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,67 @@ import { TinyEmitter } from "@project_neko/common";
33
import { SpeechInterruptController } from "../protocol";
44
import type { AudioService, AudioServiceEvents, AudioServiceState, NekoWsIncomingJson, RealtimeClientLike } from "../types";
55

6+
/** 简单 VAD:基于 RMS 振幅 + 防抖,适用于 React Native */
7+
function calcRMS(int16: Int16Array): number {
8+
let sum = 0;
9+
for (let i = 0; i < int16.length; i++) sum += (int16[i] / 32768) ** 2;
10+
return Math.sqrt(sum / int16.length);
11+
}
12+
13+
interface SimpleVADState {
14+
isSpeaking: boolean;
15+
consecutiveSpeechFrames: number;
16+
consecutiveSilenceFrames: number;
17+
}
18+
19+
function createSimpleVAD(opts: {
20+
speechThreshold?: number; // RMS 超过此值认为有语音,默认 0.02
21+
silenceThreshold?: number; // RMS 低于此值认为静音,默认 0.01
22+
minSpeechFrames?: number; // 连续多少帧才确认语音开始,默认 2
23+
silenceFrames?: number; // 连续多少帧静音才确认语音结束,默认 8
24+
onSpeechStart?: () => void;
25+
onSpeechEnd?: () => void;
26+
}) {
27+
const speechThreshold = opts.speechThreshold ?? 0.02;
28+
const silenceThreshold = opts.silenceThreshold ?? 0.01;
29+
const minSpeechFrames = opts.minSpeechFrames ?? 2;
30+
const silenceFrames = opts.silenceFrames ?? 8;
31+
32+
const state: SimpleVADState = {
33+
isSpeaking: false,
34+
consecutiveSpeechFrames: 0,
35+
consecutiveSilenceFrames: 0,
36+
};
37+
38+
function processFrame(int16: Int16Array): boolean {
39+
const rms = calcRMS(int16);
40+
if (rms >= speechThreshold) {
41+
state.consecutiveSpeechFrames++;
42+
state.consecutiveSilenceFrames = 0;
43+
if (!state.isSpeaking && state.consecutiveSpeechFrames >= minSpeechFrames) {
44+
state.isSpeaking = true;
45+
opts.onSpeechStart?.();
46+
}
47+
} else if (rms < silenceThreshold) {
48+
state.consecutiveSilenceFrames++;
49+
state.consecutiveSpeechFrames = 0;
50+
if (state.isSpeaking && state.consecutiveSilenceFrames >= silenceFrames) {
51+
state.isSpeaking = false;
52+
opts.onSpeechEnd?.();
53+
}
54+
}
55+
return state.isSpeaking;
56+
}
57+
58+
function reset() {
59+
state.isSpeaking = false;
60+
state.consecutiveSpeechFrames = 0;
61+
state.consecutiveSilenceFrames = 0;
62+
}
63+
64+
return { processFrame, reset, getState: () => ({ ...state }) };
65+
}
66+
667
function withTimeout<T>(p: Promise<T>, ms: number, message: string): Promise<T> {
768
if (!ms || ms <= 0) return p;
869
let timer: any = null;
@@ -30,6 +91,17 @@ export function createNativeAudioService(args: {
3091
const emitter = new TinyEmitter<AudioServiceEvents>();
3192
const interrupt = new SpeechInterruptController();
3293

94+
// 当前是否正在播放(用于 VAD 门控)
95+
let isPlaying = false;
96+
97+
// 客户端 VAD:过滤回声,只有真正的人声才发给服务器
98+
const vad = createSimpleVAD({
99+
speechThreshold: 0.08,
100+
silenceThreshold: 0.06,
101+
minSpeechFrames: 2,
102+
silenceFrames: 8,
103+
});
104+
33105
let state: AudioServiceState = "idle";
34106
const setState = (next: AudioServiceState) => {
35107
if (state === next) return;
@@ -59,15 +131,15 @@ export function createNativeAudioService(args: {
59131
if (ampSub) return;
60132

61133
ampSub = PCMStream.addListener("onAmplitudeUpdate", (event: any) => {
62-
// 打断后屏蔽一段时间,防止缓冲区余音让 isPlaying 保持 true
63134
if (Date.now() < outputAmpMutedUntil) return;
64135
const amp = typeof event?.amplitude === "number" ? event.amplitude : 0;
65-
// onAmplitudeUpdate 来自 PCMStreamPlayer(播放振幅),应映射为 outputAmplitude
136+
isPlaying = amp > 0.01;
66137
emitter.emit("outputAmplitude", { amplitude: Math.max(0, Math.min(1, amp)) });
67138
});
68139

69140
playbackStopSub = PCMStream.addListener("onPlaybackStop", () => {
70-
// 播放完成:输出 0,方便口型收嘴
141+
isPlaying = false;
142+
vad.reset();
71143
emitter.emit("outputAmplitude", { amplitude: 0 });
72144
});
73145
};
@@ -91,12 +163,16 @@ export function createNativeAudioService(args: {
91163
const pcm: Uint8Array | undefined = event?.pcm;
92164
if (!pcm) return;
93165

94-
// 打断后静音期内丢弃麦克风数据,防止扬声器尾音被当作用户输入
166+
// 打断后静音期内丢弃麦克风数据
95167
if (Date.now() < micMutedUntil) return;
96168

97-
// 与旧版协议一致:stream_data + input_type=audio + data 为 number[]
169+
const int16 = new Int16Array(pcm.buffer.slice(pcm.byteOffset, pcm.byteOffset + pcm.byteLength));
170+
171+
// 客户端 VAD 门控:播放时只有检测到真实人声才发送,过滤回声
172+
const isSpeaking = vad.processFrame(int16);
173+
if (isPlaying && !isSpeaking) return;
174+
98175
try {
99-
const int16 = new Int16Array(pcm.buffer.slice(pcm.byteOffset, pcm.byteOffset + pcm.byteLength));
100176
args.client.sendJson({
101177
action: "stream_data",
102178
data: Array.from(int16 as any),
@@ -302,11 +378,10 @@ export function createNativeAudioService(args: {
302378
try {
303379
PCMStream.stopPlayback();
304380
} catch (_e) {}
305-
// 手动打断:丢弃后续飞来的 binary 音频帧,直到新一轮 audio_chunk 到来
381+
isPlaying = false;
382+
vad.reset();
306383
manualInterruptActive = true;
307-
// 打断后静音麦克风一段时间,避免扬声器尾音被录入
308384
micMutedUntil = Date.now() + MIC_MUTE_AFTER_INTERRUPT_MS;
309-
// 打断后屏蔽播放振幅事件,避免缓冲区余音让按钮消不掉
310385
outputAmpMutedUntil = Date.now() + OUTPUT_AMP_MUTE_AFTER_INTERRUPT_MS;
311386
emitter.emit("outputAmplitude", { amplitude: 0 });
312387
};

0 commit comments

Comments
 (0)