Skip to content

Commit 5f772c4

Browse files
committed
Add Dolphin ASR support and examples
1 parent d34a4f7 commit 5f772c4

File tree

15 files changed

+249
-27
lines changed

15 files changed

+249
-27
lines changed

crates/sherpa-rs-sys/sherpa-onnx

Submodule sherpa-onnx updated 666 files

crates/sherpa-rs/Cargo.toml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,4 +117,8 @@ path = "../../examples/transducer.rs"
117117

118118
[[example]]
119119
name = "transducer_vosk"
120-
path = "../../examples/transducer_vosk.rs"
120+
path = "../../examples/transducer_vosk.rs"
121+
122+
[[example]]
123+
name = "dolphin"
124+
path = "../../examples/dolphin.rs"

crates/sherpa-rs/src/dolphin.rs

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
use crate::{get_default_provider, utils::cstring_from_str};
2+
use eyre::{bail, Result};
3+
use std::mem;
4+
5+
#[derive(Debug)]
6+
pub struct DolphinRecognizer {
7+
recognizer: *const sherpa_rs_sys::SherpaOnnxOfflineRecognizer,
8+
}
9+
10+
pub type DolphinRecognizerResult = super::OfflineRecognizerResult;
11+
12+
#[derive(Debug, Clone)]
13+
pub struct DolphinConfig {
14+
pub model: String,
15+
pub tokens: String,
16+
pub decoding_method: String,
17+
18+
pub provider: Option<String>,
19+
pub num_threads: Option<i32>,
20+
pub debug: bool,
21+
}
22+
23+
impl Default for DolphinConfig {
24+
fn default() -> Self {
25+
Self {
26+
model: String::new(),
27+
tokens: String::new(),
28+
decoding_method: String::from("greedy_search"),
29+
debug: false,
30+
provider: None,
31+
num_threads: Some(1),
32+
}
33+
}
34+
}
35+
36+
impl DolphinRecognizer {
37+
pub fn new(config: DolphinConfig) -> Result<Self> {
38+
let debug = config.debug.into();
39+
let provider = config.provider.unwrap_or(get_default_provider());
40+
41+
let provider_ptr = cstring_from_str(&provider);
42+
let num_threads = config.num_threads.unwrap_or(2);
43+
let model_ptr = cstring_from_str(&config.model);
44+
let tokens_ptr = cstring_from_str(&config.tokens);
45+
let decoding_method_ptr = cstring_from_str(&config.decoding_method);
46+
47+
let model_config = unsafe {
48+
sherpa_rs_sys::SherpaOnnxOfflineModelConfig {
49+
debug,
50+
num_threads,
51+
provider: provider_ptr.as_ptr(),
52+
dolphin: sherpa_rs_sys::SherpaOnnxOfflineDolphinModelConfig {
53+
model: model_ptr.as_ptr(),
54+
},
55+
tokens: tokens_ptr.as_ptr(),
56+
57+
// Zeros
58+
nemo_ctc: mem::zeroed::<_>(),
59+
paraformer: mem::zeroed::<_>(),
60+
tdnn: mem::zeroed::<_>(),
61+
telespeech_ctc: mem::zeroed::<_>(),
62+
fire_red_asr: mem::zeroed::<_>(),
63+
transducer: mem::zeroed::<_>(),
64+
whisper: mem::zeroed::<_>(),
65+
sense_voice: mem::zeroed::<_>(),
66+
moonshine: mem::zeroed::<_>(),
67+
bpe_vocab: mem::zeroed::<_>(),
68+
model_type: mem::zeroed::<_>(),
69+
modeling_unit: mem::zeroed::<_>(),
70+
}
71+
};
72+
73+
let config = unsafe {
74+
sherpa_rs_sys::SherpaOnnxOfflineRecognizerConfig {
75+
decoding_method: decoding_method_ptr.as_ptr(),
76+
model_config,
77+
feat_config: sherpa_rs_sys::SherpaOnnxFeatureConfig {
78+
sample_rate: 16000,
79+
feature_dim: 80,
80+
},
81+
hotwords_file: mem::zeroed::<_>(),
82+
hotwords_score: mem::zeroed::<_>(),
83+
lm_config: mem::zeroed::<_>(),
84+
max_active_paths: mem::zeroed::<_>(),
85+
rule_fars: mem::zeroed::<_>(),
86+
rule_fsts: mem::zeroed::<_>(),
87+
blank_penalty: mem::zeroed::<_>(),
88+
hr: mem::zeroed::<_>(),
89+
}
90+
};
91+
92+
let recognizer = unsafe { sherpa_rs_sys::SherpaOnnxCreateOfflineRecognizer(&config) };
93+
94+
if recognizer.is_null() {
95+
bail!("Failed to create recognizer");
96+
}
97+
98+
Ok(Self { recognizer })
99+
}
100+
101+
pub fn transcribe(&mut self, sample_rate: u32, samples: &[f32]) -> DolphinRecognizerResult {
102+
unsafe {
103+
let stream = sherpa_rs_sys::SherpaOnnxCreateOfflineStream(self.recognizer);
104+
sherpa_rs_sys::SherpaOnnxAcceptWaveformOffline(
105+
stream,
106+
sample_rate as i32,
107+
samples.as_ptr(),
108+
samples.len().try_into().unwrap(),
109+
);
110+
sherpa_rs_sys::SherpaOnnxDecodeOfflineStream(self.recognizer, stream);
111+
let result_ptr = sherpa_rs_sys::SherpaOnnxGetOfflineStreamResult(stream);
112+
let raw_result = result_ptr.read();
113+
let result = DolphinRecognizerResult::new(&raw_result);
114+
// Free
115+
sherpa_rs_sys::SherpaOnnxDestroyOfflineRecognizerResult(result_ptr);
116+
sherpa_rs_sys::SherpaOnnxDestroyOfflineStream(stream);
117+
result
118+
}
119+
}
120+
}
121+
122+
unsafe impl Send for DolphinRecognizer {}
123+
unsafe impl Sync for DolphinRecognizer {}
124+
125+
impl Drop for DolphinRecognizer {
126+
fn drop(&mut self) {
127+
unsafe {
128+
sherpa_rs_sys::SherpaOnnxDestroyOfflineRecognizer(self.recognizer);
129+
}
130+
}
131+
}

crates/sherpa-rs/src/lib.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
pub mod audio_tag;
22
pub mod diarize;
3+
pub mod dolphin;
34
pub mod embedding_manager;
45
pub mod keyword_spot;
56
pub mod language_id;
@@ -97,8 +98,8 @@ pub struct OfflineRecognizerResult {
9798

9899
impl OfflineRecognizerResult {
99100
fn new(result: &sherpa_rs_sys::SherpaOnnxOfflineRecognizerResult) -> Self {
100-
let lang = unsafe { cstr_to_string(result.lang) };
101-
let text = unsafe { cstr_to_string(result.text) };
101+
let lang = cstr_to_string(result.lang);
102+
let text = cstr_to_string(result.text);
102103
let count = result.count.try_into().unwrap();
103104
let timestamps = if result.timestamps.is_null() {
104105
Vec::new()

crates/sherpa-rs/src/moonshine.rs

Lines changed: 21 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ impl MoonshineRecognizer {
7171

7272
model_type: mem::zeroed::<_>(),
7373
modeling_unit: mem::zeroed::<_>(),
74+
dolphin: mem::zeroed::<_>(),
7475
bpe_vocab: mem::zeroed::<_>(),
7576
nemo_ctc: mem::zeroed::<_>(),
7677
paraformer: mem::zeroed::<_>(),
@@ -83,23 +84,26 @@ impl MoonshineRecognizer {
8384
}
8485
};
8586

86-
let config = sherpa_rs_sys::SherpaOnnxOfflineRecognizerConfig {
87-
decoding_method: null(),
88-
feat_config: sherpa_rs_sys::SherpaOnnxFeatureConfig {
89-
sample_rate: 16000,
90-
feature_dim: 512,
91-
},
92-
hotwords_file: null(),
93-
hotwords_score: 0.0,
94-
lm_config: sherpa_rs_sys::SherpaOnnxOfflineLMConfig {
95-
model: null(),
96-
scale: 0.0,
97-
},
98-
max_active_paths: 0,
99-
model_config,
100-
rule_fars: null(),
101-
rule_fsts: null(),
102-
blank_penalty: 0.0,
87+
let config = unsafe {
88+
sherpa_rs_sys::SherpaOnnxOfflineRecognizerConfig {
89+
decoding_method: null(),
90+
feat_config: sherpa_rs_sys::SherpaOnnxFeatureConfig {
91+
sample_rate: 16000,
92+
feature_dim: 512,
93+
},
94+
hotwords_file: null(),
95+
hotwords_score: 0.0,
96+
lm_config: sherpa_rs_sys::SherpaOnnxOfflineLMConfig {
97+
model: null(),
98+
scale: 0.0,
99+
},
100+
max_active_paths: 0,
101+
model_config,
102+
rule_fars: null(),
103+
rule_fsts: null(),
104+
blank_penalty: 0.0,
105+
hr: mem::zeroed::<_>(),
106+
}
103107
};
104108

105109
let recognizer = unsafe { sherpa_rs_sys::SherpaOnnxCreateOfflineRecognizer(&config) };

crates/sherpa-rs/src/paraformer.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ impl ParaformerRecognizer {
6969
whisper: mem::zeroed::<_>(),
7070
sense_voice: mem::zeroed::<_>(),
7171
moonshine: mem::zeroed::<_>(),
72+
dolphin: mem::zeroed::<_>(),
7273
}
7374
};
7475

@@ -88,6 +89,7 @@ impl ParaformerRecognizer {
8889
rule_fars: null(),
8990
rule_fsts: null(),
9091
blank_penalty: 0.0,
92+
hr: mem::zeroed::<_>(),
9193
}
9294
};
9395

crates/sherpa-rs/src/sense_voice.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ impl SenseVoiceRecognizer {
7373
transducer: mem::zeroed::<_>(),
7474
whisper: mem::zeroed::<_>(),
7575
moonshine: mem::zeroed::<_>(),
76+
dolphin: mem::zeroed::<_>(),
7677
}
7778
};
7879

@@ -95,6 +96,7 @@ impl SenseVoiceRecognizer {
9596
rule_fars: mem::zeroed::<_>(),
9697
rule_fsts: mem::zeroed::<_>(),
9798
blank_penalty: 0.0,
99+
hr: mem::zeroed::<_>(),
98100
}
99101
};
100102

crates/sherpa-rs/src/transducer.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ impl TransducerRecognizer {
8888
sense_voice: mem::zeroed::<_>(),
8989
moonshine: mem::zeroed::<_>(),
9090
fire_red_asr: mem::zeroed::<_>(),
91+
dolphin: mem::zeroed::<_>(),
9192
};
9293

9394
let recognizer_config = sherpa_rs_sys::SherpaOnnxOfflineRecognizerConfig {
@@ -106,6 +107,7 @@ impl TransducerRecognizer {
106107
rule_fsts: mem::zeroed::<_>(),
107108
rule_fars: mem::zeroed::<_>(),
108109
max_active_paths: mem::zeroed::<_>(),
110+
hr: mem::zeroed::<_>(),
109111
};
110112

111113
let recognizer = sherpa_rs_sys::SherpaOnnxCreateOfflineRecognizer(&recognizer_config);

crates/sherpa-rs/src/tts/mod.rs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,6 @@ mod kokoro;
22
mod matcha;
33
mod vits;
44

5-
use std::ffi::CString;
6-
75
use eyre::{bail, Result};
86

97
pub use kokoro::{KokoroTts, KokoroTtsConfig};

crates/sherpa-rs/src/vad.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ use eyre::Result;
33

44
#[derive(Debug)]
55
pub struct Vad {
6-
pub(crate) vad: *mut sherpa_rs_sys::SherpaOnnxVoiceActivityDetector,
6+
pub(crate) vad: *const sherpa_rs_sys::SherpaOnnxVoiceActivityDetector,
77
}
88

99
#[derive(Debug)]

0 commit comments

Comments
 (0)