Skip to content

Commit 37461b4

Browse files
fix(stt): add batch language routing fallback (#5293)
Route Char Pro multi-language batch requests through Deepgram language detection fallback when Soniox is unavailable, and allow longer Soniox async polling.
1 parent d86fefa commit 37461b4

6 files changed

Lines changed: 131 additions & 7 deletions

File tree

crates/owhisper-client/src/adapter/deepgram/batch.rs

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,30 @@ mod tests {
259259
assert!(query.contains("multichannel=true"));
260260
}
261261

262+
#[test]
263+
fn batch_url_restricts_detect_language_for_unsupported_multi_language() {
264+
let params = ListenParams {
265+
languages: vec![
266+
hypr_language::ISO639::En.into(),
267+
hypr_language::ISO639::Pl.into(),
268+
],
269+
..Default::default()
270+
};
271+
272+
let url = build_batch_url(
273+
"https://api.deepgram.com/v1",
274+
&params,
275+
&DeepgramLanguageStrategy,
276+
&DeepgramKeywordStrategy,
277+
);
278+
279+
let query = url.query().unwrap_or_default();
280+
assert!(query.contains("detect_language=en"));
281+
assert!(query.contains("detect_language=pl"));
282+
assert!(!query.contains("detect_language=true"));
283+
assert!(!query.contains("language=multi"));
284+
}
285+
262286
#[tokio::test]
263287
#[ignore]
264288
async fn test_deepgram_batch_transcription() {

crates/owhisper-client/src/adapter/deepgram/language.rs

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,11 @@ use crate::adapter::deepgram_compat::{
77

88
const NOVA2_MULTI_LANGS: &[&str] = &["en", "es"];
99
const NOVA3_MULTI_LANGS: &[&str] = &["en", "es", "fr", "de", "hi", "ru", "pt", "ja", "it", "nl"];
10+
const LANGUAGE_DETECTION_LANGS: &[&str] = &[
11+
"bg", "ca", "cs", "da", "de", "el", "en", "es", "et", "fi", "fr", "hi", "hu", "id", "it", "ja",
12+
"ko", "lt", "lv", "ms", "nl", "no", "pl", "pt", "ro", "ru", "sk", "sv", "th", "tr", "uk", "vi",
13+
"zh",
14+
];
1015

1116
pub fn can_use_multi(model: &str, languages: &[hypr_language::Language]) -> bool {
1217
if languages.len() < 2 {
@@ -55,7 +60,7 @@ impl LanguageQueryStrategy for DeepgramLanguageStrategy {
5560
if can_use_multi(model, &params.languages) {
5661
query_pairs.append_pair("language", "multi");
5762
} else if mode == TranscriptionMode::Batch {
58-
query_pairs.append_pair("detect_language", "true");
63+
append_detect_language_query(query_pairs, &params.languages);
5964
} else if let Some(language) = params.languages.first() {
6065
let code = single_language_query_code(params, language);
6166
query_pairs.append_pair("language", &code);
@@ -82,6 +87,23 @@ fn single_language_query_code(params: &ListenParams, language: &hypr_language::L
8287
}
8388
}
8489

90+
fn append_detect_language_query<'a>(
91+
query_pairs: &mut Serializer<'a, UrlQuery>,
92+
languages: &[hypr_language::Language],
93+
) {
94+
if languages.iter().all(supports_language_detection) {
95+
for language in languages {
96+
query_pairs.append_pair("detect_language", language.iso639().code());
97+
}
98+
} else {
99+
query_pairs.append_pair("detect_language", "true");
100+
}
101+
}
102+
103+
pub(super) fn supports_language_detection(language: &hypr_language::Language) -> bool {
104+
LANGUAGE_DETECTION_LANGS.contains(&language.iso639().code())
105+
}
106+
85107
fn effective_model(params: &ListenParams) -> Option<DeepgramModel> {
86108
params
87109
.model

crates/owhisper-client/src/adapter/deepgram/mod.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,10 @@ impl DeepgramAdapter {
162162
Self::language_support_batch(languages, model).is_supported()
163163
}
164164

165+
pub fn supports_batch_language_detection(languages: &[hypr_language::Language]) -> bool {
166+
!languages.is_empty() && languages.iter().all(language::supports_language_detection)
167+
}
168+
165169
fn can_use_multi(languages: &[hypr_language::Language]) -> bool {
166170
language::can_use_multi(DeepgramModel::Nova3General.as_ref(), languages)
167171
|| language::can_use_multi(DeepgramModel::Nova2General.as_ref(), languages)
@@ -398,6 +402,15 @@ mod tests {
398402
}
399403
}
400404

405+
#[test]
406+
fn test_supports_batch_language_detection() {
407+
let en_pl: Vec<Language> = vec![ISO639::En.into(), ISO639::Pl.into()];
408+
assert!(DeepgramAdapter::supports_batch_language_detection(&en_pl));
409+
410+
let en_ar: Vec<Language> = vec![ISO639::En.into(), ISO639::Ar.into()];
411+
assert!(!DeepgramAdapter::supports_batch_language_detection(&en_ar));
412+
}
413+
401414
#[test]
402415
fn test_can_use_multi() {
403416
let cases: &[(&str, &[ISO639], bool)] = &[

crates/soniox/src/lib.rs

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
use serde::{Deserialize, Serialize};
2+
use std::time::Duration;
23

34
pub const API_HOST: &str = "https://api.soniox.com";
5+
const TRANSCRIPTION_POLL_INTERVAL: Duration = Duration::from_secs(2);
6+
const TRANSCRIPTION_MAX_POLLS: usize = 1800;
47

58
#[derive(Debug)]
69
pub struct Error {
@@ -319,8 +322,6 @@ pub async fn wait_for_completion(
319322
transcription_id: &str,
320323
api_key: &str,
321324
) -> Result<(), Error> {
322-
use std::time::Duration;
323-
324325
#[derive(Deserialize)]
325326
struct StatusResponse {
326327
status: String,
@@ -330,7 +331,7 @@ pub async fn wait_for_completion(
330331

331332
let url = format!("{API_HOST}/v1/transcriptions/{transcription_id}");
332333

333-
for _ in 0..300 {
334+
for _ in 0..TRANSCRIPTION_MAX_POLLS {
334335
let response = client
335336
.get(&url)
336337
.header("Authorization", format!("Bearer {api_key}"))
@@ -369,7 +370,7 @@ pub async fn wait_for_completion(
369370
});
370371
}
371372
"queued" | "processing" => {
372-
tokio::time::sleep(Duration::from_secs(2)).await;
373+
tokio::time::sleep(TRANSCRIPTION_POLL_INTERVAL).await;
373374
}
374375
unknown => {
375376
return Err(Error {
@@ -381,7 +382,23 @@ pub async fn wait_for_completion(
381382
}
382383

383384
Err(Error {
384-
message: "transcription timed out".to_string(),
385+
message: format!(
386+
"transcription timed out after {} seconds",
387+
TRANSCRIPTION_POLL_INTERVAL.as_secs() * TRANSCRIPTION_MAX_POLLS as u64
388+
),
385389
is_retryable: false,
386390
})
387391
}
392+
393+
#[cfg(test)]
394+
mod tests {
395+
use super::*;
396+
397+
#[test]
398+
fn soniox_polling_allows_long_batch_jobs() {
399+
assert_eq!(
400+
TRANSCRIPTION_POLL_INTERVAL.as_secs() * TRANSCRIPTION_MAX_POLLS as u64,
401+
3600
402+
);
403+
}
404+
}

crates/transcribe-proxy/src/routes/batch/sync.rs

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,9 @@ pub(super) async fn handle_hyprnote_batch(
105105
body: Bytes,
106106
content_type: &str,
107107
) -> Response {
108-
let provider_chain = state.resolve_hyprnote_provider_chain_for_mode(RoutingMode::Batch, params);
108+
let mut provider_chain =
109+
state.resolve_hyprnote_provider_chain_for_mode(RoutingMode::Batch, params);
110+
append_deepgram_batch_detection_fallback(state, &mut provider_chain, &listen_params);
109111

110112
if provider_chain.is_empty() {
111113
return (
@@ -213,6 +215,25 @@ pub(super) async fn handle_hyprnote_batch(
213215
.into_response()
214216
}
215217

218+
fn append_deepgram_batch_detection_fallback(
219+
state: &AppState,
220+
provider_chain: &mut Vec<SelectedProvider>,
221+
listen_params: &ListenParams,
222+
) {
223+
if listen_params.languages.len() <= 1
224+
|| provider_chain
225+
.iter()
226+
.any(|selected| selected.provider() == Provider::Deepgram)
227+
|| !DeepgramAdapter::supports_batch_language_detection(&listen_params.languages)
228+
{
229+
return;
230+
}
231+
232+
if let Ok(selected) = state.selector.select(Some(Provider::Deepgram)) {
233+
provider_chain.push(selected);
234+
}
235+
}
236+
216237
pub(super) async fn transcribe_with_retry(
217238
selected: &SelectedProvider,
218239
params: ListenParams,

crates/transcribe-proxy/tests/hyprnote_routing_mock.rs

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,33 @@ async fn batch_cloud_model_resolved_for_deepgram() {
9898
);
9999
}
100100

101+
#[tokio::test]
102+
async fn batch_en_pl_uses_deepgram_detection_fallback_when_soniox_unavailable() {
103+
let batch = start_mock_batch_upstream().await;
104+
let upstream_url = batch_upstream_url(batch.addr);
105+
let proxy = start_proxy(Some(&upstream_url), None).await;
106+
107+
send_batch(proxy, "model=cloud&language=en&language=pl").await;
108+
let query = wait_for_first_batch_query(&batch, TIMEOUT).await;
109+
110+
assert!(
111+
query.contains("detect_language=en"),
112+
"should restrict Deepgram detection to English: {query}"
113+
);
114+
assert!(
115+
query.contains("detect_language=pl"),
116+
"should restrict Deepgram detection to Polish: {query}"
117+
);
118+
assert!(
119+
!query.contains("detect_language=true"),
120+
"should not fall back to unrestricted language detection: {query}"
121+
);
122+
assert!(
123+
!query.contains("language=multi"),
124+
"Polish is not in Deepgram's multi-language model set: {query}"
125+
);
126+
}
127+
101128
#[tokio::test]
102129
async fn batch_explicit_model_preserved_for_deepgram() {
103130
let batch = start_mock_batch_upstream().await;

0 commit comments

Comments
 (0)