|
1 | | -use speexdsp::preprocess::*; |
| 1 | +use std::sync::{LazyLock, Mutex}; |
| 2 | + |
| 3 | +use speexdsp::preprocess::{SpeexPreprocess, SpeexPreprocessConst}; |
| 4 | + |
| 5 | +use crate::audio::AudioProcessParams; |
2 | 6 |
|
3 | 7 | // xxx: do we really need to change the sample rate ? |
4 | 8 | // apparently, speexdsp is optimized for low sample rate (8000, 16000), according to chatgpt, |
5 | 9 | // but 16000 just doesn't work on my end |
6 | 10 | pub const DENOISE_SPEEX_SAMPLE_RATE: u32 = 48000; |
| 11 | +const FRAME_SIZE: usize = (DENOISE_SPEEX_SAMPLE_RATE as f32 * 0.02) as usize; // 20 ms frame |
7 | 12 |
|
8 | | -pub struct DenoiseSpeexCache { |
| 13 | +struct DenoiseSpeexCache { |
| 14 | + sample_buffer: Vec<Vec<i16>>, |
9 | 15 | denoisers: Vec<SpeexPreprocess>, |
| 16 | + config_noise_suppress: i32, |
| 17 | + config_vad_enabled: bool, |
| 18 | + config_vad_threshold: u32, |
| 19 | + config_agc_enabled: bool, |
| 20 | + config_agc_target: u32, |
| 21 | + config_dereverb_enabled: bool, |
| 22 | + config_dereverb_level: f32, |
| 23 | +} |
| 24 | + |
| 25 | +impl DenoiseSpeexCache { |
| 26 | + fn is_config_changed(&self, config: &AudioProcessParams) -> bool { |
| 27 | + self.config_noise_suppress != config.speex_noise_suppress |
| 28 | + || self.config_vad_enabled != config.speex_vad_enabled |
| 29 | + || self.config_vad_threshold != config.speex_vad_threshold |
| 30 | + || self.config_agc_enabled != config.speex_agc_enabled |
| 31 | + || self.config_agc_target != config.speex_agc_target |
| 32 | + || self.config_dereverb_enabled != config.speex_dereverb_enabled |
| 33 | + || self.config_dereverb_level != config.speex_dereverb_level |
| 34 | + } |
10 | 35 | } |
11 | 36 |
|
12 | 37 | // safe because packets are processed in order, and not concurrently |
13 | 38 | unsafe impl Send for DenoiseSpeexCache {} |
14 | 39 |
|
| 40 | +// safe because packets are processed in order, and not concurrently |
| 41 | +static DENOISE_CACHE: LazyLock<Mutex<Option<DenoiseSpeexCache>>> = |
| 42 | + LazyLock::new(|| Mutex::new(None)); |
| 43 | + |
15 | 44 | pub fn denoise_speex_f32_stream( |
16 | | - data: &mut [Vec<i16>], |
17 | | - cache: &mut Option<DenoiseSpeexCache>, |
18 | | - noise_suppress: i32, |
19 | | -) -> anyhow::Result<()> { |
20 | | - const FRAME_SIZE: usize = (DENOISE_SPEEX_SAMPLE_RATE as f32 * 0.02) as usize; // 20 ms frame |
21 | | - |
22 | | - if cache.is_none() { |
23 | | - *cache = Some(DenoiseSpeexCache { |
| 45 | + data: &[Vec<f32>], |
| 46 | + config: &AudioProcessParams, |
| 47 | +) -> anyhow::Result<Vec<Vec<f32>>> { |
| 48 | + let mut denoise_cache = DENOISE_CACHE.lock().unwrap(); |
| 49 | + |
| 50 | + if denoise_cache.is_none() |
| 51 | + || data.len() != denoise_cache.as_ref().unwrap().denoisers.len() |
| 52 | + || denoise_cache.as_ref().unwrap().is_config_changed(config) |
| 53 | + { |
| 54 | + *denoise_cache = Some(DenoiseSpeexCache { |
| 55 | + sample_buffer: vec![Vec::with_capacity(FRAME_SIZE); data.len()], |
24 | 56 | denoisers: data |
25 | 57 | .iter() |
26 | 58 | .map(|_| { |
27 | 59 | let mut st = |
28 | 60 | SpeexPreprocess::new(FRAME_SIZE, DENOISE_SPEEX_SAMPLE_RATE as usize) |
29 | 61 | .unwrap(); |
30 | | - st.set_denoise(true); |
31 | | - st.set_noise_suppress(noise_suppress); |
| 62 | + st.preprocess_ctl(SpeexPreprocessConst::SPEEX_PREPROCESS_SET_DENOISE, 1) |
| 63 | + .unwrap(); |
| 64 | + st.set_noise_suppress(config.speex_noise_suppress); |
| 65 | + st.preprocess_ctl( |
| 66 | + SpeexPreprocessConst::SPEEX_PREPROCESS_SET_VAD, |
| 67 | + if config.speex_vad_enabled { 1 } else { 0 }, |
| 68 | + ) |
| 69 | + .unwrap(); |
| 70 | + st.preprocess_ctl( |
| 71 | + SpeexPreprocessConst::SPEEX_PREPROCESS_SET_PROB_START, |
| 72 | + config.speex_vad_threshold, |
| 73 | + ) |
| 74 | + .unwrap(); |
| 75 | + st.preprocess_ctl( |
| 76 | + SpeexPreprocessConst::SPEEX_PREPROCESS_SET_AGC, |
| 77 | + if config.speex_agc_enabled { 1 } else { 0 }, |
| 78 | + ) |
| 79 | + .unwrap(); |
| 80 | + st.preprocess_ctl( |
| 81 | + SpeexPreprocessConst::SPEEX_PREPROCESS_SET_AGC_TARGET, |
| 82 | + config.speex_agc_target, |
| 83 | + ) |
| 84 | + .unwrap(); |
| 85 | + st.preprocess_ctl( |
| 86 | + SpeexPreprocessConst::SPEEX_PREPROCESS_SET_DEREVERB, |
| 87 | + if config.speex_dereverb_enabled { 1 } else { 0 }, |
| 88 | + ) |
| 89 | + .unwrap(); |
| 90 | + st.preprocess_ctl( |
| 91 | + SpeexPreprocessConst::SPEEX_PREPROCESS_SET_DEREVERB_LEVEL, |
| 92 | + config.speex_dereverb_level, |
| 93 | + ) |
| 94 | + .unwrap(); |
32 | 95 | st |
33 | 96 | }) |
34 | 97 | .collect(), |
| 98 | + config_noise_suppress: config.speex_noise_suppress, |
| 99 | + config_vad_enabled: config.speex_vad_enabled, |
| 100 | + config_vad_threshold: config.speex_vad_threshold, |
| 101 | + config_agc_enabled: config.speex_agc_enabled, |
| 102 | + config_agc_target: config.speex_agc_target, |
| 103 | + config_dereverb_enabled: config.speex_dereverb_enabled, |
| 104 | + config_dereverb_level: config.speex_dereverb_level, |
35 | 105 | }); |
36 | 106 | } |
37 | 107 |
|
38 | | - for (channel, st) in data |
39 | | - .iter_mut() |
40 | | - .zip(cache.as_mut().unwrap().denoisers.iter_mut()) |
41 | | - { |
42 | | - for frame in channel.chunks_exact_mut(FRAME_SIZE) { |
43 | | - match st.preprocess_run(frame) { |
| 108 | + let cache = denoise_cache.as_mut().unwrap(); |
| 109 | + let mut output: Vec<Vec<f32>> = vec![Vec::new(); data.len()]; |
| 110 | + |
| 111 | + // Convert f32 to i16 |
| 112 | + let data_i16: Vec<Vec<i16>> = data |
| 113 | + .iter() |
| 114 | + .map(|channel| { |
| 115 | + channel |
| 116 | + .iter() |
| 117 | + .map(|&x| (x * i16::MAX as f32).clamp(i16::MIN as f32, i16::MAX as f32) as i16) |
| 118 | + .collect() |
| 119 | + }) |
| 120 | + .collect(); |
| 121 | + |
| 122 | + // Append new data into the cache |
| 123 | + for channel_idx in 0..data_i16.len() { |
| 124 | + cache.sample_buffer[channel_idx].extend_from_slice(&data_i16[channel_idx]); |
| 125 | + } |
| 126 | + |
| 127 | + while cache.sample_buffer[0].len() >= FRAME_SIZE { |
| 128 | + for channel_idx in 0..data.len() { |
| 129 | + match cache.denoisers[channel_idx] |
| 130 | + .preprocess_run(&mut cache.sample_buffer[channel_idx][0..FRAME_SIZE]) |
| 131 | + { |
44 | 132 | 0 => { |
45 | | - frame.fill(0); |
| 133 | + cache.sample_buffer[channel_idx][0..FRAME_SIZE].fill(0); |
46 | 134 | } |
47 | 135 | 1 => {} |
48 | 136 | _ => panic!(), |
49 | 137 | } |
| 138 | + |
| 139 | + // Scale back to -1.0 to 1.0 range |
| 140 | + output[channel_idx].extend_from_slice( |
| 141 | + &cache.sample_buffer[channel_idx][0..FRAME_SIZE] |
| 142 | + .iter() |
| 143 | + .map(|&x| x as f32 / i16::MAX as f32) |
| 144 | + .collect::<Vec<f32>>(), |
| 145 | + ); |
| 146 | + } |
| 147 | + |
| 148 | + // Clear the sample buffer for the next round |
| 149 | + for channel in &mut cache.sample_buffer { |
| 150 | + channel.drain(0..FRAME_SIZE); |
50 | 151 | } |
51 | 152 | } |
52 | 153 |
|
53 | | - Ok(()) |
| 154 | + Ok(output) |
54 | 155 | } |
0 commit comments