Skip to content

Commit f122703

Browse files
authored
Merge pull request #95 from teamclouday/speexdsp
Speexdsp improvements
2 parents 24e5e79 + 164a22d commit f122703

File tree

11 files changed

+444
-198
lines changed

11 files changed

+444
-198
lines changed
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@ use std::sync::{LazyLock, Mutex};
22

33
use nnnoiseless::DenoiseState;
44

5+
pub const DENOISE_RNNOISE_SAMPLE_RATE: u32 = 48000;
6+
57
struct DenoiseCache {
68
sample_buffer: Vec<Vec<f32>>,
79
denoisers: Vec<Box<DenoiseState<'static>>>,

RustApp/src/audio/denoise_speex.rs

Lines changed: 121 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,54 +1,155 @@
1-
use speexdsp::preprocess::*;
1+
use std::sync::{LazyLock, Mutex};
2+
3+
use speexdsp::preprocess::{SpeexPreprocess, SpeexPreprocessConst};
4+
5+
use crate::audio::AudioProcessParams;
26

37
// xxx: do we really need to change the sample rate ?
48
// apparently, speexdsp is optimized for low sample rate (8000, 16000), according to chatgpt,
59
// but 16000 just doesn't work on my end
610
pub const DENOISE_SPEEX_SAMPLE_RATE: u32 = 48000;
11+
const FRAME_SIZE: usize = (DENOISE_SPEEX_SAMPLE_RATE as f32 * 0.02) as usize; // 20 ms frame
712

8-
pub struct DenoiseSpeexCache {
13+
struct DenoiseSpeexCache {
14+
sample_buffer: Vec<Vec<i16>>,
915
denoisers: Vec<SpeexPreprocess>,
16+
config_noise_suppress: i32,
17+
config_vad_enabled: bool,
18+
config_vad_threshold: u32,
19+
config_agc_enabled: bool,
20+
config_agc_target: u32,
21+
config_dereverb_enabled: bool,
22+
config_dereverb_level: f32,
23+
}
24+
25+
impl DenoiseSpeexCache {
26+
fn is_config_changed(&self, config: &AudioProcessParams) -> bool {
27+
self.config_noise_suppress != config.speex_noise_suppress
28+
|| self.config_vad_enabled != config.speex_vad_enabled
29+
|| self.config_vad_threshold != config.speex_vad_threshold
30+
|| self.config_agc_enabled != config.speex_agc_enabled
31+
|| self.config_agc_target != config.speex_agc_target
32+
|| self.config_dereverb_enabled != config.speex_dereverb_enabled
33+
|| self.config_dereverb_level != config.speex_dereverb_level
34+
}
1035
}
1136

1237
// safe because packets are processed in order, and not concurrently
1338
unsafe impl Send for DenoiseSpeexCache {}
1439

40+
// safe because packets are processed in order, and not concurrently
41+
static DENOISE_CACHE: LazyLock<Mutex<Option<DenoiseSpeexCache>>> =
42+
LazyLock::new(|| Mutex::new(None));
43+
1544
pub fn denoise_speex_f32_stream(
16-
data: &mut [Vec<i16>],
17-
cache: &mut Option<DenoiseSpeexCache>,
18-
noise_suppress: i32,
19-
) -> anyhow::Result<()> {
20-
const FRAME_SIZE: usize = (DENOISE_SPEEX_SAMPLE_RATE as f32 * 0.02) as usize; // 20 ms frame
21-
22-
if cache.is_none() {
23-
*cache = Some(DenoiseSpeexCache {
45+
data: &[Vec<f32>],
46+
config: &AudioProcessParams,
47+
) -> anyhow::Result<Vec<Vec<f32>>> {
48+
let mut denoise_cache = DENOISE_CACHE.lock().unwrap();
49+
50+
if denoise_cache.is_none()
51+
|| data.len() != denoise_cache.as_ref().unwrap().denoisers.len()
52+
|| denoise_cache.as_ref().unwrap().is_config_changed(config)
53+
{
54+
*denoise_cache = Some(DenoiseSpeexCache {
55+
sample_buffer: vec![Vec::with_capacity(FRAME_SIZE); data.len()],
2456
denoisers: data
2557
.iter()
2658
.map(|_| {
2759
let mut st =
2860
SpeexPreprocess::new(FRAME_SIZE, DENOISE_SPEEX_SAMPLE_RATE as usize)
2961
.unwrap();
30-
st.set_denoise(true);
31-
st.set_noise_suppress(noise_suppress);
62+
st.preprocess_ctl(SpeexPreprocessConst::SPEEX_PREPROCESS_SET_DENOISE, 1)
63+
.unwrap();
64+
st.set_noise_suppress(config.speex_noise_suppress);
65+
st.preprocess_ctl(
66+
SpeexPreprocessConst::SPEEX_PREPROCESS_SET_VAD,
67+
if config.speex_vad_enabled { 1 } else { 0 },
68+
)
69+
.unwrap();
70+
st.preprocess_ctl(
71+
SpeexPreprocessConst::SPEEX_PREPROCESS_SET_PROB_START,
72+
config.speex_vad_threshold,
73+
)
74+
.unwrap();
75+
st.preprocess_ctl(
76+
SpeexPreprocessConst::SPEEX_PREPROCESS_SET_AGC,
77+
if config.speex_agc_enabled { 1 } else { 0 },
78+
)
79+
.unwrap();
80+
st.preprocess_ctl(
81+
SpeexPreprocessConst::SPEEX_PREPROCESS_SET_AGC_TARGET,
82+
config.speex_agc_target,
83+
)
84+
.unwrap();
85+
st.preprocess_ctl(
86+
SpeexPreprocessConst::SPEEX_PREPROCESS_SET_DEREVERB,
87+
if config.speex_dereverb_enabled { 1 } else { 0 },
88+
)
89+
.unwrap();
90+
st.preprocess_ctl(
91+
SpeexPreprocessConst::SPEEX_PREPROCESS_SET_DEREVERB_LEVEL,
92+
config.speex_dereverb_level,
93+
)
94+
.unwrap();
3295
st
3396
})
3497
.collect(),
98+
config_noise_suppress: config.speex_noise_suppress,
99+
config_vad_enabled: config.speex_vad_enabled,
100+
config_vad_threshold: config.speex_vad_threshold,
101+
config_agc_enabled: config.speex_agc_enabled,
102+
config_agc_target: config.speex_agc_target,
103+
config_dereverb_enabled: config.speex_dereverb_enabled,
104+
config_dereverb_level: config.speex_dereverb_level,
35105
});
36106
}
37107

38-
for (channel, st) in data
39-
.iter_mut()
40-
.zip(cache.as_mut().unwrap().denoisers.iter_mut())
41-
{
42-
for frame in channel.chunks_exact_mut(FRAME_SIZE) {
43-
match st.preprocess_run(frame) {
108+
let cache = denoise_cache.as_mut().unwrap();
109+
let mut output: Vec<Vec<f32>> = vec![Vec::new(); data.len()];
110+
111+
// Convert f32 to i16
112+
let data_i16: Vec<Vec<i16>> = data
113+
.iter()
114+
.map(|channel| {
115+
channel
116+
.iter()
117+
.map(|&x| (x * i16::MAX as f32).clamp(i16::MIN as f32, i16::MAX as f32) as i16)
118+
.collect()
119+
})
120+
.collect();
121+
122+
// Append new data into the cache
123+
for channel_idx in 0..data_i16.len() {
124+
cache.sample_buffer[channel_idx].extend_from_slice(&data_i16[channel_idx]);
125+
}
126+
127+
while cache.sample_buffer[0].len() >= FRAME_SIZE {
128+
for channel_idx in 0..data.len() {
129+
match cache.denoisers[channel_idx]
130+
.preprocess_run(&mut cache.sample_buffer[channel_idx][0..FRAME_SIZE])
131+
{
44132
0 => {
45-
frame.fill(0);
133+
cache.sample_buffer[channel_idx][0..FRAME_SIZE].fill(0);
46134
}
47135
1 => {}
48136
_ => panic!(),
49137
}
138+
139+
// Scale back to -1.0 to 1.0 range
140+
output[channel_idx].extend_from_slice(
141+
&cache.sample_buffer[channel_idx][0..FRAME_SIZE]
142+
.iter()
143+
.map(|&x| x as f32 / i16::MAX as f32)
144+
.collect::<Vec<f32>>(),
145+
);
146+
}
147+
148+
// Clear the sample buffer for the next round
149+
for channel in &mut cache.sample_buffer {
150+
channel.drain(0..FRAME_SIZE);
50151
}
51152
}
52153

53-
Ok(())
154+
Ok(output)
54155
}

RustApp/src/audio/mod.rs

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,47 @@ use byteorder::{ByteOrder, NativeEndian, WriteBytesExt};
33
use rtrb::Consumer;
44

55
use crate::{
6-
config::{AudioFormat, ChannelCount, SampleRate},
6+
config::{AudioFormat, ChannelCount, Config, DenoiseKind, SampleRate},
77
ui::app::{AppState, Stream},
88
};
99

10-
mod denoise;
10+
mod denoise_rnnoise;
1111
mod denoise_speex;
1212
mod player;
1313
pub mod process;
1414
mod resampler;
15-
pub use denoise_speex::DenoiseSpeexCache;
15+
16+
/// Audio processing parameters
17+
#[derive(Clone, Debug)]
18+
pub struct AudioProcessParams {
19+
pub target_format: AudioPacketFormat,
20+
pub denoise: Option<DenoiseKind>,
21+
pub amplify: Option<f32>,
22+
pub speex_noise_suppress: i32,
23+
pub speex_vad_enabled: bool,
24+
pub speex_vad_threshold: u32,
25+
pub speex_agc_enabled: bool,
26+
pub speex_agc_target: u32,
27+
pub speex_dereverb_enabled: bool,
28+
pub speex_dereverb_level: f32,
29+
}
30+
31+
impl AudioProcessParams {
32+
pub fn new(target_format: AudioPacketFormat, config: Config) -> Self {
33+
Self {
34+
target_format,
35+
denoise: config.denoise.then_some(config.denoise_kind),
36+
amplify: config.amplify.then_some(config.amplify_value),
37+
speex_noise_suppress: config.speex_noise_suppress,
38+
speex_vad_enabled: config.speex_vad_enabled,
39+
speex_vad_threshold: config.speex_vad_threshold,
40+
speex_agc_enabled: config.speex_agc_enabled,
41+
speex_agc_target: config.speex_agc_target,
42+
speex_dereverb_enabled: config.speex_dereverb_enabled,
43+
speex_dereverb_level: config.speex_dereverb_level,
44+
}
45+
}
46+
}
1647

1748
impl AppState {
1849
pub fn start_audio_stream(

RustApp/src/audio/player.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,9 @@ pub fn start_audio_stream(
3939
}
4040

4141
if !supported {
42-
bail!("unsupported output audio format or sample rate.");
42+
bail!(
43+
"Unsupported output audio format or sample rate. Please apply recommended format from settings page."
44+
);
4345
}
4446

4547
let config = cpal::StreamConfig {

RustApp/src/audio/process.rs

Lines changed: 14 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,15 @@
11
use std::borrow::Cow;
22

33
use crate::{
4-
audio::denoise_speex::{DENOISE_SPEEX_SAMPLE_RATE, denoise_speex_f32_stream},
5-
config::{AudioFormat, Config, DenoiseKind},
4+
audio::{
5+
denoise_rnnoise::DENOISE_RNNOISE_SAMPLE_RATE,
6+
denoise_speex::{DENOISE_SPEEX_SAMPLE_RATE, denoise_speex_f32_stream},
7+
},
8+
config::{AudioFormat, DenoiseKind},
69
streamer::{AudioPacketMessage, AudioStream},
710
};
811

9-
use super::{
10-
AudioBytes, AudioPacketFormat, denoise::denoise_f32_stream, resampler::resample_f32_stream,
11-
};
12-
13-
/// Audio processing parameters
14-
#[derive(Clone, Debug)]
15-
pub struct AudioProcessParams {
16-
pub target_format: AudioPacketFormat,
17-
pub denoise: Option<DenoiseKind>,
18-
pub amplify: Option<f32>,
19-
pub speex_noise_suppress: i32,
20-
}
21-
22-
impl AudioProcessParams {
23-
pub fn new(target_format: AudioPacketFormat, config: Config) -> Self {
24-
Self {
25-
target_format,
26-
denoise: config.denoise.then_some(config.denoise_kind),
27-
amplify: config.amplify.then_some(config.amplify_value),
28-
speex_noise_suppress: config.speex_noise_suppress,
29-
}
30-
}
31-
}
12+
use super::{AudioBytes, denoise_rnnoise::denoise_f32_stream, resampler::resample_f32_stream};
3213

3314
impl AudioStream {
3415
/// This function converts an audio stream from packet into producer
@@ -65,14 +46,15 @@ impl AudioStream {
6546
if let Some(denoise) = &config.denoise {
6647
match denoise {
6748
DenoiseKind::Rnnoise => {
68-
const DENOISE_SAMPLE_RATE: u32 = 48000;
69-
70-
let prepared_buffer = if current_sample_rate == DENOISE_SAMPLE_RATE {
49+
let prepared_buffer = if current_sample_rate == DENOISE_RNNOISE_SAMPLE_RATE {
7150
Cow::Borrowed(&buffer)
7251
} else {
73-
let tmp =
74-
resample_f32_stream(&buffer, current_sample_rate, DENOISE_SAMPLE_RATE)?;
75-
current_sample_rate = DENOISE_SAMPLE_RATE;
52+
let tmp = resample_f32_stream(
53+
&buffer,
54+
current_sample_rate,
55+
DENOISE_RNNOISE_SAMPLE_RATE,
56+
)?;
57+
current_sample_rate = DENOISE_RNNOISE_SAMPLE_RATE;
7658
Cow::Owned(tmp)
7759
};
7860

@@ -92,21 +74,7 @@ impl AudioStream {
9274
Cow::Owned(tmp)
9375
};
9476

95-
let mut prepared_buffer: Vec<Vec<i16>> = prepared_buffer
96-
.iter()
97-
.map(|v| v.iter().map(|v| AudioBytes::from_f32(*v)).collect())
98-
.collect();
99-
100-
denoise_speex_f32_stream(
101-
&mut prepared_buffer,
102-
&mut self.denoise_speex_cache,
103-
config.speex_noise_suppress,
104-
)?;
105-
106-
buffer = prepared_buffer
107-
.into_iter()
108-
.map(|v| v.into_iter().map(|v| AudioBytes::to_f32(&v)).collect())
109-
.collect();
77+
buffer = denoise_speex_f32_stream(&prepared_buffer, &config)?;
11078
}
11179
}
11280
}

RustApp/src/config.rs

Lines changed: 22 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,12 @@ pub struct Config {
2323
pub denoise: bool,
2424
pub denoise_kind: DenoiseKind,
2525
pub speex_noise_suppress: i32,
26+
pub speex_vad_enabled: bool,
27+
pub speex_vad_threshold: u32,
28+
pub speex_agc_enabled: bool,
29+
pub speex_agc_target: u32,
30+
pub speex_dereverb_enabled: bool,
31+
pub speex_dereverb_level: f32,
2632
pub theme: AppTheme,
2733
pub amplify: bool,
2834
pub amplify_value: f32,
@@ -62,30 +68,26 @@ impl Default for Config {
6268
theme: Default::default(),
6369
amplify: false,
6470
amplify_value: 2.0,
65-
speex_noise_suppress: -30,
71+
speex_noise_suppress: -30, // range: [-100, 0]
72+
speex_vad_enabled: false,
73+
speex_vad_threshold: 80, // range: [0, 100]
74+
speex_agc_enabled: false,
75+
speex_agc_target: 8000, // range: [8000, 65535]
76+
speex_dereverb_enabled: false,
77+
speex_dereverb_level: 0.5, // range: [0.0, 1.0]
6678
}
6779
}
6880
}
6981

70-
pub struct ConfigCache {
71-
pub amplify_value: String,
72-
pub speex_noise_suppress: String,
73-
}
74-
75-
impl ConfigCache {
76-
pub fn new(config: &Config) -> Self {
77-
Self {
78-
amplify_value: config.amplify_value.to_string(),
79-
speex_noise_suppress: config.speex_noise_suppress.to_string(),
80-
}
81-
}
82-
83-
pub fn parse_amplify_value(&self) -> Option<f32> {
84-
self.amplify_value.replace(',', ".").parse().ok()
85-
}
86-
87-
pub fn parse_speex_noise_suppress(&self) -> Option<i32> {
88-
self.speex_noise_suppress.parse().ok()
82+
impl Config {
83+
pub fn reset_denoise_settings(&mut self) {
84+
self.speex_noise_suppress = -30;
85+
self.speex_vad_enabled = false;
86+
self.speex_vad_threshold = 80;
87+
self.speex_agc_enabled = false;
88+
self.speex_agc_target = 8000;
89+
self.speex_dereverb_enabled = false;
90+
self.speex_dereverb_level = 0.5;
8991
}
9092
}
9193

0 commit comments

Comments
 (0)