Skip to content

Commit 6751eb5

Browse files
committed
feat(语音): 动态校准回声阈值,适应不同音量
- 每次播放开始时采集前 10 帧(~320ms)测量回声 RMS 均值 - 阈值动态设为回声均值 × 1.8,自动适应手机音量变化 - onSpeechStart 回调中立即停止播放,消除网络往返延迟 - minSpeechFrames=2 保留 64ms 自然延迟感
1 parent 47e9402 commit 6751eb5

1 file changed

Lines changed: 42 additions & 4 deletions

File tree

packages/project-neko-audio-service/src/native/audioServiceNative.ts

Lines changed: 42 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@ function createSimpleVAD(opts: {
2424
onSpeechStart?: () => void;
2525
onSpeechEnd?: () => void;
2626
}) {
27-
const speechThreshold = opts.speechThreshold ?? 0.02;
28-
const silenceThreshold = opts.silenceThreshold ?? 0.01;
27+
let speechThreshold = opts.speechThreshold ?? 0.02;
28+
let silenceThreshold = opts.silenceThreshold ?? 0.01;
2929
const minSpeechFrames = opts.minSpeechFrames ?? 2;
3030
const silenceFrames = opts.silenceFrames ?? 8;
3131

@@ -61,7 +61,12 @@ function createSimpleVAD(opts: {
6161
state.consecutiveSilenceFrames = 0;
6262
}
6363

64-
return { processFrame, reset, getState: () => ({ ...state }) };
64+
function updateThreshold(threshold: number) {
65+
speechThreshold = threshold;
66+
silenceThreshold = threshold * 0.75;
67+
}
68+
69+
return { processFrame, reset, updateThreshold, getState: () => ({ ...state }) };
6570
}
6671

6772
function withTimeout<T>(p: Promise<T>, ms: number, message: string): Promise<T> {
@@ -93,13 +98,27 @@ export function createNativeAudioService(args: {
9398

9499
// 当前是否正在播放(用于 VAD 门控)
95100
let isPlaying = false;
101+
// 动态回声校准
102+
const CALIBRATION_FRAMES = 10; // 校准帧数(约 320ms)
103+
const ECHO_GATE_MULTIPLIER = 1.8; // 阈值 = 回声均值 × 此倍数
104+
let calibrationFrames: number[] = [];
105+
let calibratedThreshold = 0.08; // 初始默认值
96106

97107
// 客户端 VAD:过滤回声,只有真正的人声才发给服务器
98108
const vad = createSimpleVAD({
99109
speechThreshold: 0.08,
100110
silenceThreshold: 0.06,
101111
minSpeechFrames: 2,
102-
silenceFrames: 8,
112+
silenceFrames: 6,
113+
onSpeechStart: () => {
114+
// 检测到人声立即停止播放,不等服务器 user_activity,消除网络往返延迟
115+
if (isPlaying) {
116+
try { PCMStream.stopPlayback(); } catch (_e) {}
117+
isPlaying = false;
118+
outputAmpMutedUntil = Date.now() + OUTPUT_AMP_MUTE_AFTER_INTERRUPT_MS;
119+
emitter.emit("outputAmplitude", { amplitude: 0 });
120+
}
121+
},
103122
});
104123

105124
let state: AudioServiceState = "idle";
@@ -133,12 +152,18 @@ export function createNativeAudioService(args: {
133152
ampSub = PCMStream.addListener("onAmplitudeUpdate", (event: any) => {
134153
if (Date.now() < outputAmpMutedUntil) return;
135154
const amp = typeof event?.amplitude === "number" ? event.amplitude : 0;
155+
if (!isPlaying && amp > 0.01) {
156+
// 播放刚开始,重置校准
157+
isPlaying = true;
158+
calibrationFrames = [];
159+
}
136160
isPlaying = amp > 0.01;
137161
emitter.emit("outputAmplitude", { amplitude: Math.max(0, Math.min(1, amp)) });
138162
});
139163

140164
playbackStopSub = PCMStream.addListener("onPlaybackStop", () => {
141165
isPlaying = false;
166+
calibrationFrames = [];
142167
vad.reset();
143168
emitter.emit("outputAmplitude", { amplitude: 0 });
144169
});
@@ -168,6 +193,18 @@ export function createNativeAudioService(args: {
168193

169194
const int16 = new Int16Array(pcm.buffer.slice(pcm.byteOffset, pcm.byteOffset + pcm.byteLength));
170195

196+
// 播放时前 N 帧用于校准回声阈值,不发送
197+
if (isPlaying && calibrationFrames.length < CALIBRATION_FRAMES) {
198+
calibrationFrames.push(calcRMS(int16));
199+
if (calibrationFrames.length === CALIBRATION_FRAMES) {
200+
const avg = calibrationFrames.reduce((a, b) => a + b, 0) / CALIBRATION_FRAMES;
201+
calibratedThreshold = Math.max(0.04, avg * ECHO_GATE_MULTIPLIER);
202+
vad.updateThreshold(calibratedThreshold);
203+
console.log(`🎚️ 回声校准完成: 均值=${avg.toFixed(4)} 阈值=${calibratedThreshold.toFixed(4)}`);
204+
}
205+
return;
206+
}
207+
171208
// 客户端 VAD 门控:播放时只有检测到真实人声才发送,过滤回声
172209
const isSpeaking = vad.processFrame(int16);
173210
if (isPlaying && !isSpeaking) return;
@@ -379,6 +416,7 @@ export function createNativeAudioService(args: {
379416
PCMStream.stopPlayback();
380417
} catch (_e) {}
381418
isPlaying = false;
419+
calibrationFrames = [];
382420
vad.reset();
383421
manualInterruptActive = true;
384422
micMutedUntil = Date.now() + MIC_MUTE_AFTER_INTERRUPT_MS;

0 commit comments

Comments
 (0)