Skip to content

Commit f40b3d5

Browse files
committed
Add: Berget AI provider support
1 parent ce69784 commit f40b3d5

File tree

6 files changed

+156
-1
lines changed

6 files changed

+156
-1
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10+
### Added
11+
12+
- **Berget provider** - Added support for Berget transcription service with Swedish-optimized KB Whisper Large model
13+
1014
## 0.0.7 - 2026-02-05
1115

1216
### Added

README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,9 @@ ostt supports multiple AI transcription providers. Bring your own API key and ch
5050
- **groq-whisper-large-v3** - High accuracy processing
5151
- **groq-whisper-large-v3-turbo** - Fastest transcription speed
5252

53+
### Berget
54+
- **berget-whisper-kb-large** - KB Whisper Large (Swedish optimized)
55+
5356
Configure your preferred provider and model using `ostt auth`.
5457

5558
## Installation

src/transcription/api/berget.rs

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
//! Berget API implementation.
2+
//!
3+
//! Handles transcription requests to Berget's OpenAI-compatible Whisper API using multipart form data.
4+
5+
use std::path::Path;
6+
use serde::Deserialize;
7+
8+
use super::TranscriptionConfig;
9+
10+
/// Berget API response wrapper
11+
#[derive(Debug, Deserialize)]
12+
struct BergetResponse {
13+
text: String,
14+
}
15+
16+
/// Berget API error response
17+
#[derive(Debug, Deserialize)]
18+
#[allow(dead_code)]
19+
struct BergetErrorResponse {
20+
code: String,
21+
error: String,
22+
#[serde(default)]
23+
details: Option<String>,
24+
}
25+
26+
/// Transcribes an audio file using Berget's Whisper API.
27+
///
28+
/// Uses multipart form data with bearer token authentication.
29+
/// Berget provides an OpenAI-compatible API endpoint.
30+
///
31+
/// Keywords are passed as the `prompt` parameter to guide transcription context.
32+
pub async fn transcribe(
33+
config: &TranscriptionConfig,
34+
audio_path: &Path,
35+
) -> anyhow::Result<String> {
36+
let audio_data = std::fs::read(audio_path).map_err(|e| {
37+
anyhow::anyhow!("Failed to read audio file: {e}")
38+
})?;
39+
40+
let client = reqwest::Client::new();
41+
42+
let file_name = audio_path
43+
.file_name()
44+
.unwrap_or_default()
45+
.to_string_lossy()
46+
.to_string();
47+
48+
let file_part = reqwest::multipart::Part::bytes(audio_data)
49+
.file_name(file_name.clone())
50+
.mime_str("audio/mpeg")
51+
.map_err(|e| anyhow::anyhow!("Failed to create file part for upload: {e}"))?;
52+
53+
let mut form = reqwest::multipart::Form::new()
54+
.part("file", file_part)
55+
.text("model", config.model.api_model_name().to_string());
56+
57+
// Debug log: Log the API call details (without the audio data)
58+
let mut debug_params = vec![
59+
format!("model={}", config.model.api_model_name()),
60+
];
61+
62+
// Add keywords as prompt for better transcription context
63+
if !config.keywords.is_empty() {
64+
let prompt = config.keywords.join(", ");
65+
form = form.text("prompt", prompt.clone());
66+
debug_params.push(format!("prompt={prompt}"));
67+
tracing::debug!("Keywords used as prompt for Berget model: {:?}", config.keywords);
68+
}
69+
70+
let endpoint = config.model.endpoint();
71+
72+
tracing::debug!(
73+
"Berget API Call:\n URL: {}\n Method: POST\n Headers:\n Authorization: Bearer <redacted>\n Content-Type: multipart/form-data\n Body parameters: {}",
74+
endpoint,
75+
debug_params.join("\n ")
76+
);
77+
78+
let response = match client
79+
.post(endpoint)
80+
.bearer_auth(&config.api_key)
81+
.multipart(form)
82+
.send()
83+
.await
84+
{
85+
Ok(resp) => resp,
86+
Err(e) => {
87+
let error_msg = if e.is_connect() {
88+
"Failed to connect to Berget API server. Check your internet connection.".to_string()
89+
} else if e.is_timeout() {
90+
"Request to Berget timed out. The API server is not responding.".to_string()
91+
} else if e.to_string().contains("builder") {
92+
format!("Failed to build Berget API request: {e}. This may be a configuration error.")
93+
} else {
94+
format!("Berget network error: {e}")
95+
};
96+
return Err(anyhow::anyhow!(error_msg));
97+
}
98+
};
99+
100+
if !response.status().is_success() {
101+
let status = response.status();
102+
103+
// Parse the JSON error response - all errors follow the same structure
104+
let error_message = response
105+
.json::<BergetErrorResponse>()
106+
.await
107+
.map(|e| e.error)
108+
.unwrap_or_else(|_| format!("HTTP {status}"));
109+
110+
return Err(anyhow::anyhow!("Berget API error: {error_message}"));
111+
}
112+
113+
let berget_response: BergetResponse = response
114+
.json()
115+
.await
116+
.map_err(|e| anyhow::anyhow!("Failed to parse Berget response: {e}"))?;
117+
118+
// Debug log: Log the full response for debugging
119+
tracing::debug!(
120+
"Berget API Response:\n Status: Success\n Transcription length: {} characters\n Full response: {:#?}",
121+
berget_response.text.len(),
122+
berget_response
123+
);
124+
125+
Ok(berget_response.text.trim().to_string())
126+
}

src/transcription/api/mod.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ mod openai;
88
mod deepgram;
99
mod deepinfra;
1010
mod groq;
11+
mod berget;
1112

1213
use serde::Deserialize;
1314
use std::path::Path;
@@ -86,6 +87,9 @@ pub async fn transcribe(
8687
TranscriptionProvider::Groq => {
8788
groq::transcribe(config, audio_path).await
8889
}
90+
TranscriptionProvider::Berget => {
91+
berget::transcribe(config, audio_path).await
92+
}
8993
}?;
9094

9195
Ok(result)

src/transcription/model.rs

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ pub enum TranscriptionModel {
2828
GroqWhisperLargeV3,
2929
/// Groq Whisper Large V3 Turbo model (faster)
3030
GroqWhisperLargeV3Turbo,
31+
/// Berget Whisper KB Large model (Swedish optimized)
32+
BergetWhisperKBLarge,
3133
}
3234

3335
impl TranscriptionModel {
@@ -44,6 +46,7 @@ impl TranscriptionModel {
4446
| TranscriptionModel::DeepInfraWhisperBase => TranscriptionProvider::DeepInfra,
4547
TranscriptionModel::GroqWhisperLargeV3
4648
| TranscriptionModel::GroqWhisperLargeV3Turbo => TranscriptionProvider::Groq,
49+
TranscriptionModel::BergetWhisperKBLarge => TranscriptionProvider::Berget,
4750
}
4851
}
4952

@@ -59,6 +62,7 @@ impl TranscriptionModel {
5962
TranscriptionModel::DeepInfraWhisperBase => "deepinfra-whisper-base",
6063
TranscriptionModel::GroqWhisperLargeV3 => "groq-whisper-large-v3",
6164
TranscriptionModel::GroqWhisperLargeV3Turbo => "groq-whisper-large-v3-turbo",
65+
TranscriptionModel::BergetWhisperKBLarge => "berget-whisper-kb-large",
6266
}
6367
}
6468

@@ -74,6 +78,7 @@ impl TranscriptionModel {
7478
TranscriptionModel::DeepInfraWhisperBase => "Whisper Base (fast, lightweight)",
7579
TranscriptionModel::GroqWhisperLargeV3 => "Whisper Large V3 (high accuracy)",
7680
TranscriptionModel::GroqWhisperLargeV3Turbo => "Whisper Large V3 Turbo (fastest)",
81+
TranscriptionModel::BergetWhisperKBLarge => "KB Whisper Large (Swedish optimized)",
7782
}
7883
}
7984

@@ -89,7 +94,12 @@ impl TranscriptionModel {
8994
TranscriptionModel::DeepInfraWhisperLargeV3
9095
| TranscriptionModel::DeepInfraWhisperBase => "https://api.deepinfra.com/v1/inference",
9196
TranscriptionModel::GroqWhisperLargeV3
92-
| TranscriptionModel::GroqWhisperLargeV3Turbo => "https://api.groq.com/openai/v1/audio/transcriptions",
97+
| TranscriptionModel::GroqWhisperLargeV3Turbo => {
98+
"https://api.groq.com/openai/v1/audio/transcriptions"
99+
}
100+
TranscriptionModel::BergetWhisperKBLarge => {
101+
"https://api.berget.ai/v1/audio/transcriptions"
102+
}
93103
}
94104
}
95105

@@ -105,6 +115,7 @@ impl TranscriptionModel {
105115
TranscriptionModel::DeepInfraWhisperBase => "openai/whisper-base",
106116
TranscriptionModel::GroqWhisperLargeV3 => "whisper-large-v3",
107117
TranscriptionModel::GroqWhisperLargeV3Turbo => "whisper-large-v3-turbo",
118+
TranscriptionModel::BergetWhisperKBLarge => "KBLab/kb-whisper-large",
108119
}
109120
}
110121

@@ -120,6 +131,7 @@ impl TranscriptionModel {
120131
"deepinfra-whisper-base" => Some(TranscriptionModel::DeepInfraWhisperBase),
121132
"groq-whisper-large-v3" => Some(TranscriptionModel::GroqWhisperLargeV3),
122133
"groq-whisper-large-v3-turbo" => Some(TranscriptionModel::GroqWhisperLargeV3Turbo),
134+
"berget-whisper-kb-large" => Some(TranscriptionModel::BergetWhisperKBLarge),
123135
_ => None,
124136
}
125137
}
@@ -136,6 +148,7 @@ impl TranscriptionModel {
136148
TranscriptionModel::DeepInfraWhisperBase,
137149
TranscriptionModel::GroqWhisperLargeV3,
138150
TranscriptionModel::GroqWhisperLargeV3Turbo,
151+
TranscriptionModel::BergetWhisperKBLarge,
139152
]
140153
}
141154

src/transcription/provider.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ pub enum TranscriptionProvider {
1212
Deepgram,
1313
DeepInfra,
1414
Groq,
15+
Berget,
1516
}
1617

1718
impl TranscriptionProvider {
@@ -21,6 +22,7 @@ impl TranscriptionProvider {
2122
TranscriptionProvider::Deepgram => "deepgram",
2223
TranscriptionProvider::DeepInfra => "deepinfra",
2324
TranscriptionProvider::Groq => "groq",
25+
TranscriptionProvider::Berget => "berget",
2426
}
2527
}
2628

@@ -30,6 +32,7 @@ impl TranscriptionProvider {
3032
TranscriptionProvider::Deepgram => "Deepgram",
3133
TranscriptionProvider::DeepInfra => "DeepInfra",
3234
TranscriptionProvider::Groq => "Groq",
35+
TranscriptionProvider::Berget => "Berget",
3336
}
3437
}
3538

@@ -39,6 +42,7 @@ impl TranscriptionProvider {
3942
"deepgram" => Some(TranscriptionProvider::Deepgram),
4043
"deepinfra" => Some(TranscriptionProvider::DeepInfra),
4144
"groq" => Some(TranscriptionProvider::Groq),
45+
"berget" => Some(TranscriptionProvider::Berget),
4246
_ => None,
4347
}
4448
}
@@ -49,6 +53,7 @@ impl TranscriptionProvider {
4953
TranscriptionProvider::Deepgram,
5054
TranscriptionProvider::DeepInfra,
5155
TranscriptionProvider::Groq,
56+
TranscriptionProvider::Berget,
5257
]
5358
}
5459
}

0 commit comments

Comments
 (0)