Skip to content

Commit aa086ae

Browse files
committed
feat: add dolphin
1 parent 9bc0a69 commit aa086ae

File tree

13 files changed

+466
-250
lines changed

13 files changed

+466
-250
lines changed

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,4 +28,5 @@ build/
2828
kokoro-en-*/
2929
matcha-*
3030
crates/sherpa-rs-sys/sherpa-onnx/scripts/
31-
kokoro-multi-lang-v1_0/
31+
kokoro-multi-lang-v1_0/
32+
sherpa-onnx-dolphin*

crates/sherpa-rs-sys/checksum.txt

Lines changed: 81 additions & 65 deletions
Large diffs are not rendered by default.

crates/sherpa-rs-sys/dist.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{
2-
"tag": "v1.10.45",
2+
"tag": "v1.11.3",
33
"url": "https://github.com/k2-fsa/sherpa-onnx/releases/download/{tag}/{archive}",
44
"targets": {
55
"x86_64-pc-windows-msvc": {

crates/sherpa-rs/Cargo.toml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,4 +109,8 @@ path = "../../examples/sense_voice.rs"
109109

110110
[[example]]
111111
name = "paraformer"
112-
path = "../../examples/paraformer.rs"
112+
path = "../../examples/paraformer.rs"
113+
114+
[[example]]
115+
name = "dolphin"
116+
path = "../../examples/dolphin.rs"

crates/sherpa-rs/src/dolphin.rs

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
use crate::{get_default_provider, utils::cstring_from_str};
2+
use eyre::{bail, Result};
3+
use std::mem;
4+
5+
#[derive(Debug)]
6+
pub struct DolphinRecognizer {
7+
recognizer: *const sherpa_rs_sys::SherpaOnnxOfflineRecognizer,
8+
}
9+
10+
pub type DolphinRecognizerResult = super::OfflineRecognizerResult;
11+
12+
#[derive(Debug, Clone)]
13+
pub struct DolphinConfig {
14+
pub model: String,
15+
pub tokens: String,
16+
pub decoding_method: String,
17+
18+
pub provider: Option<String>,
19+
pub num_threads: Option<i32>,
20+
pub debug: bool,
21+
}
22+
23+
impl Default for DolphinConfig {
24+
fn default() -> Self {
25+
Self {
26+
model: String::new(),
27+
tokens: String::new(),
28+
decoding_method: String::from("greedy_search"),
29+
debug: false,
30+
provider: None,
31+
num_threads: Some(1),
32+
}
33+
}
34+
}
35+
36+
impl DolphinRecognizer {
37+
pub fn new(config: DolphinConfig) -> Result<Self> {
38+
let debug = config.debug.into();
39+
let provider = config.provider.unwrap_or(get_default_provider());
40+
41+
let provider_ptr = cstring_from_str(&provider);
42+
let num_threads = config.num_threads.unwrap_or(2);
43+
let model_ptr = cstring_from_str(&config.model);
44+
let tokens_ptr = cstring_from_str(&config.tokens);
45+
let decoding_method_ptr = cstring_from_str(&config.decoding_method);
46+
47+
let model_config = unsafe {
48+
sherpa_rs_sys::SherpaOnnxOfflineModelConfig {
49+
debug,
50+
num_threads,
51+
provider: provider_ptr.as_ptr(),
52+
dolphin: sherpa_rs_sys::SherpaOnnxOfflineDolphinModelConfig {
53+
model: model_ptr.as_ptr(),
54+
},
55+
tokens: tokens_ptr.as_ptr(),
56+
57+
// Zeros
58+
nemo_ctc: mem::zeroed::<_>(),
59+
paraformer: mem::zeroed::<_>(),
60+
tdnn: mem::zeroed::<_>(),
61+
telespeech_ctc: mem::zeroed::<_>(),
62+
fire_red_asr: mem::zeroed::<_>(),
63+
transducer: mem::zeroed::<_>(),
64+
whisper: mem::zeroed::<_>(),
65+
sense_voice: mem::zeroed::<_>(),
66+
moonshine: mem::zeroed::<_>(),
67+
bpe_vocab: mem::zeroed::<_>(),
68+
model_type: mem::zeroed::<_>(),
69+
modeling_unit: mem::zeroed::<_>(),
70+
}
71+
};
72+
73+
let config = unsafe {
74+
sherpa_rs_sys::SherpaOnnxOfflineRecognizerConfig {
75+
decoding_method: decoding_method_ptr.as_ptr(),
76+
model_config,
77+
feat_config: sherpa_rs_sys::SherpaOnnxFeatureConfig {
78+
sample_rate: 16000,
79+
feature_dim: 80,
80+
},
81+
hotwords_file: mem::zeroed::<_>(),
82+
hotwords_score: mem::zeroed::<_>(),
83+
lm_config: mem::zeroed::<_>(),
84+
max_active_paths: mem::zeroed::<_>(),
85+
rule_fars: mem::zeroed::<_>(),
86+
rule_fsts: mem::zeroed::<_>(),
87+
blank_penalty: mem::zeroed::<_>(),
88+
}
89+
};
90+
91+
let recognizer = unsafe { sherpa_rs_sys::SherpaOnnxCreateOfflineRecognizer(&config) };
92+
93+
if recognizer.is_null() {
94+
bail!("Failed to create recognizer");
95+
}
96+
97+
Ok(Self { recognizer })
98+
}
99+
100+
pub fn transcribe(&mut self, sample_rate: u32, samples: &[f32]) -> DolphinRecognizerResult {
101+
unsafe {
102+
let stream = sherpa_rs_sys::SherpaOnnxCreateOfflineStream(self.recognizer);
103+
sherpa_rs_sys::SherpaOnnxAcceptWaveformOffline(
104+
stream,
105+
sample_rate as i32,
106+
samples.as_ptr(),
107+
samples.len().try_into().unwrap(),
108+
);
109+
sherpa_rs_sys::SherpaOnnxDecodeOfflineStream(self.recognizer, stream);
110+
let result_ptr = sherpa_rs_sys::SherpaOnnxGetOfflineStreamResult(stream);
111+
let raw_result = result_ptr.read();
112+
let result = DolphinRecognizerResult::new(&raw_result);
113+
// Free
114+
sherpa_rs_sys::SherpaOnnxDestroyOfflineRecognizerResult(result_ptr);
115+
sherpa_rs_sys::SherpaOnnxDestroyOfflineStream(stream);
116+
result
117+
}
118+
}
119+
}
120+
121+
unsafe impl Send for DolphinRecognizer {}
122+
unsafe impl Sync for DolphinRecognizer {}
123+
124+
impl Drop for DolphinRecognizer {
125+
fn drop(&mut self) {
126+
unsafe {
127+
sherpa_rs_sys::SherpaOnnxDestroyOfflineRecognizer(self.recognizer);
128+
}
129+
}
130+
}

crates/sherpa-rs/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
pub mod audio_tag;
22
pub mod diarize;
3+
pub mod dolphin;
34
pub mod embedding_manager;
45
pub mod keyword_spot;
56
pub mod language_id;

crates/sherpa-rs/src/moonshine.rs

Lines changed: 45 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
use crate::{get_default_provider, utils::cstring_from_str};
22
use eyre::{bail, Result};
3+
use std::mem;
34
use std::ptr::null;
45

56
#[derive(Debug)]
@@ -56,45 +57,50 @@ impl MoonshineRecognizer {
5657
let uncached_decoder_ptr = cstring_from_str(&config.uncached_decoder);
5758
let tokens_ptr = cstring_from_str(&config.tokens);
5859

59-
let model_config = sherpa_rs_sys::SherpaOnnxOfflineModelConfig {
60-
bpe_vocab: null(),
61-
debug,
62-
model_type: null(),
63-
modeling_unit: null(),
64-
nemo_ctc: sherpa_rs_sys::SherpaOnnxOfflineNemoEncDecCtcModelConfig { model: null() },
65-
num_threads,
66-
paraformer: sherpa_rs_sys::SherpaOnnxOfflineParaformerModelConfig { model: null() },
67-
provider: provider_ptr.as_ptr(),
68-
tdnn: sherpa_rs_sys::SherpaOnnxOfflineTdnnModelConfig { model: null() },
69-
telespeech_ctc: null(),
70-
tokens: tokens_ptr.as_ptr(),
71-
fire_red_asr: sherpa_rs_sys::SherpaOnnxOfflineFireRedAsrModelConfig {
72-
encoder: null(),
73-
decoder: null(),
74-
},
75-
transducer: sherpa_rs_sys::SherpaOnnxOfflineTransducerModelConfig {
76-
encoder: null(),
77-
decoder: null(),
78-
joiner: null(),
79-
},
80-
whisper: sherpa_rs_sys::SherpaOnnxOfflineWhisperModelConfig {
81-
encoder: null(),
82-
decoder: null(),
83-
language: null(),
84-
task: null(),
85-
tail_paddings: 0,
86-
},
87-
sense_voice: sherpa_rs_sys::SherpaOnnxOfflineSenseVoiceModelConfig {
88-
model: null(),
89-
language: null(),
90-
use_itn: 0,
91-
},
92-
moonshine: sherpa_rs_sys::SherpaOnnxOfflineMoonshineModelConfig {
93-
preprocessor: preprocessor_ptr.as_ptr(),
94-
encoder: encoder_ptr.as_ptr(),
95-
uncached_decoder: uncached_decoder_ptr.as_ptr(),
96-
cached_decoder: cached_decoder_ptr.as_ptr(),
97-
},
60+
let model_config = unsafe {
61+
sherpa_rs_sys::SherpaOnnxOfflineModelConfig {
62+
bpe_vocab: null(),
63+
debug,
64+
model_type: null(),
65+
modeling_unit: null(),
66+
dolphin: mem::zeroed::<_>(),
67+
nemo_ctc: sherpa_rs_sys::SherpaOnnxOfflineNemoEncDecCtcModelConfig {
68+
model: null(),
69+
},
70+
num_threads,
71+
paraformer: sherpa_rs_sys::SherpaOnnxOfflineParaformerModelConfig { model: null() },
72+
provider: provider_ptr.as_ptr(),
73+
tdnn: sherpa_rs_sys::SherpaOnnxOfflineTdnnModelConfig { model: null() },
74+
telespeech_ctc: null(),
75+
tokens: tokens_ptr.as_ptr(),
76+
fire_red_asr: sherpa_rs_sys::SherpaOnnxOfflineFireRedAsrModelConfig {
77+
encoder: null(),
78+
decoder: null(),
79+
},
80+
transducer: sherpa_rs_sys::SherpaOnnxOfflineTransducerModelConfig {
81+
encoder: null(),
82+
decoder: null(),
83+
joiner: null(),
84+
},
85+
whisper: sherpa_rs_sys::SherpaOnnxOfflineWhisperModelConfig {
86+
encoder: null(),
87+
decoder: null(),
88+
language: null(),
89+
task: null(),
90+
tail_paddings: 0,
91+
},
92+
sense_voice: sherpa_rs_sys::SherpaOnnxOfflineSenseVoiceModelConfig {
93+
model: null(),
94+
language: null(),
95+
use_itn: 0,
96+
},
97+
moonshine: sherpa_rs_sys::SherpaOnnxOfflineMoonshineModelConfig {
98+
preprocessor: preprocessor_ptr.as_ptr(),
99+
encoder: encoder_ptr.as_ptr(),
100+
uncached_decoder: uncached_decoder_ptr.as_ptr(),
101+
cached_decoder: cached_decoder_ptr.as_ptr(),
102+
},
103+
}
98104
};
99105

100106
let config = sherpa_rs_sys::SherpaOnnxOfflineRecognizerConfig {

crates/sherpa-rs/src/paraformer.rs

Lines changed: 47 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
use crate::{get_default_provider, utils::cstring_from_str};
22
use eyre::{bail, Result};
3+
use std::mem;
34
use std::ptr::null;
45

56
#[derive(Debug)]
@@ -49,47 +50,52 @@ impl ParaformerRecognizer {
4950
};
5051

5152
// Offline model config
52-
let model_config = sherpa_rs_sys::SherpaOnnxOfflineModelConfig {
53-
debug,
54-
num_threads: config.num_threads.unwrap_or(1),
55-
provider: provider_ptr.as_ptr(),
56-
tokens: tokens_ptr.as_ptr(),
57-
paraformer: paraformer_config,
58-
59-
// Null other model types
60-
bpe_vocab: null(),
61-
model_type: null(),
62-
modeling_unit: null(),
63-
nemo_ctc: sherpa_rs_sys::SherpaOnnxOfflineNemoEncDecCtcModelConfig { model: null() },
64-
tdnn: sherpa_rs_sys::SherpaOnnxOfflineTdnnModelConfig { model: null() },
65-
telespeech_ctc: null(),
66-
fire_red_asr: sherpa_rs_sys::SherpaOnnxOfflineFireRedAsrModelConfig {
67-
encoder: null(),
68-
decoder: null(),
69-
},
70-
transducer: sherpa_rs_sys::SherpaOnnxOfflineTransducerModelConfig {
71-
encoder: null(),
72-
decoder: null(),
73-
joiner: null(),
74-
},
75-
whisper: sherpa_rs_sys::SherpaOnnxOfflineWhisperModelConfig {
76-
encoder: null(),
77-
decoder: null(),
78-
language: null(),
79-
task: null(),
80-
tail_paddings: 0,
81-
},
82-
sense_voice: sherpa_rs_sys::SherpaOnnxOfflineSenseVoiceModelConfig {
83-
model: null(),
84-
language: null(),
85-
use_itn: 0,
86-
},
87-
moonshine: sherpa_rs_sys::SherpaOnnxOfflineMoonshineModelConfig {
88-
preprocessor: null(),
89-
encoder: null(),
90-
uncached_decoder: null(),
91-
cached_decoder: null(),
92-
},
53+
let model_config = unsafe {
54+
sherpa_rs_sys::SherpaOnnxOfflineModelConfig {
55+
debug,
56+
num_threads: config.num_threads.unwrap_or(1),
57+
provider: provider_ptr.as_ptr(),
58+
tokens: tokens_ptr.as_ptr(),
59+
paraformer: paraformer_config,
60+
61+
// Null other model types
62+
dolphin: mem::zeroed::<_>(),
63+
bpe_vocab: null(),
64+
model_type: null(),
65+
modeling_unit: null(),
66+
nemo_ctc: sherpa_rs_sys::SherpaOnnxOfflineNemoEncDecCtcModelConfig {
67+
model: null(),
68+
},
69+
tdnn: sherpa_rs_sys::SherpaOnnxOfflineTdnnModelConfig { model: null() },
70+
telespeech_ctc: null(),
71+
fire_red_asr: sherpa_rs_sys::SherpaOnnxOfflineFireRedAsrModelConfig {
72+
encoder: null(),
73+
decoder: null(),
74+
},
75+
transducer: sherpa_rs_sys::SherpaOnnxOfflineTransducerModelConfig {
76+
encoder: null(),
77+
decoder: null(),
78+
joiner: null(),
79+
},
80+
whisper: sherpa_rs_sys::SherpaOnnxOfflineWhisperModelConfig {
81+
encoder: null(),
82+
decoder: null(),
83+
language: null(),
84+
task: null(),
85+
tail_paddings: 0,
86+
},
87+
sense_voice: sherpa_rs_sys::SherpaOnnxOfflineSenseVoiceModelConfig {
88+
model: null(),
89+
language: null(),
90+
use_itn: 0,
91+
},
92+
moonshine: sherpa_rs_sys::SherpaOnnxOfflineMoonshineModelConfig {
93+
preprocessor: null(),
94+
encoder: null(),
95+
uncached_decoder: null(),
96+
cached_decoder: null(),
97+
},
98+
}
9399
};
94100

95101
// Recognizer config

0 commit comments

Comments
 (0)