Skip to content

Commit 593ca24

Browse files
authored
Merge pull request #754 from epicenter-so/fix-metal-linking
fix(whisper): resolve GPU acceleration build issues
2 parents 5d4dc14 + e64eb48 commit 593ca24

File tree

8 files changed

+52
-139
lines changed

8 files changed

+52
-139
lines changed

apps/whispering/src-tauri/Cargo.lock

Lines changed: 10 additions & 37 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

apps/whispering/src-tauri/Cargo.toml

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,8 @@ enigo = "0.5.0"
4242
cpal = "0.16.0"
4343
tracing = "0.1.41"
4444
thiserror = "2.0.12"
45-
whisper-rs = { version = "0.15.0", features = ["whisper-cpp-log"] }
45+
# Base whisper-rs dependency
46+
whisper-rs = { version = "0.15.0" }
4647
hound = "3.5"
4748
lazy_static = "1.4"
4849
tempfile = "3.8"
@@ -58,18 +59,21 @@ windows-sys = { version = "0.59", features = ["Win32_Foundation", "Win32_System_
5859
[target.'cfg(target_os = "macos")'.dependencies]
5960
accessibility-sys = "0.1.3"
6061
core-foundation-sys = "0.8.7"
61-
whisper-rs = { version = "0.15.0", features = ["whisper-cpp-log", "metal", "coreml"] }
62+
# macOS: CoreML for Apple Silicon Neural Engine (Metal disabled due to linking issues)
63+
whisper-rs = { version = "0.15.0", features = ["coreml"] }
6264

6365
[target.'cfg(not(any(target_os = "android", target_os = "ios")))'.dependencies]
6466
tauri-plugin-global-shortcut = "2"
6567
tauri-plugin-single-instance = "2"
6668
tauri-plugin-updater = "2"
6769

6870
[target.'cfg(target_os = "windows")'.dependencies]
69-
whisper-rs = { version = "0.15.0", features = ["whisper-cpp-log", "cuda"] }
71+
# Windows: CUDA for NVIDIA GPUs, Vulkan for AMD/Intel GPUs
72+
whisper-rs = { version = "0.15.0", features = ["cuda", "vulkan"] }
7073

7174
[target.'cfg(all(unix, not(target_os = "macos")))'.dependencies]
72-
whisper-rs = { version = "0.15.0", features = ["whisper-cpp-log", "cuda"] }
75+
# Linux: CUDA for NVIDIA, Vulkan for AMD/Intel, HipBLAS for AMD ROCm
76+
whisper-rs = { version = "0.15.0", features = ["cuda", "vulkan", "hipblas"] }
7377

7478
[profile.dev]
7579
incremental = true # Compile your binary in smaller steps.

apps/whispering/src-tauri/src/recorder/commands.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
use crate::recorder::recorder::{AudioRecording, RecorderState, Result};
22
use std::path::PathBuf;
33
use std::sync::Mutex;
4-
use tauri::{Manager, State};
4+
use tauri::State;
55
use tracing::{debug, info};
66

77
/// Application state containing the recorder
@@ -34,7 +34,7 @@ pub async fn init_recording_session(
3434
output_folder: String,
3535
sample_rate: Option<u32>,
3636
state: State<'_, AppData>,
37-
app_handle: tauri::AppHandle,
37+
_app_handle: tauri::AppHandle,
3838
) -> Result<()> {
3939
info!(
4040
"Initializing recording session: device={}, id={}, folder={}, sample_rate={:?}",

apps/whispering/src-tauri/src/whisper_cpp/mod.rs

Lines changed: 23 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -3,34 +3,6 @@ mod error;
33
use error::WhisperCppError;
44
use whisper_rs::{FullParams, SamplingStrategy, WhisperContext, WhisperContextParameters};
55
use std::io::Write;
6-
use serde::Serialize;
7-
8-
#[derive(Serialize)]
9-
pub struct GpuInfo {
10-
pub platform: String,
11-
pub expected_backend: String,
12-
pub gpu_enabled_in_settings: bool,
13-
}
14-
15-
/// Get information about the expected GPU backend for the current platform
16-
#[tauri::command]
17-
pub fn get_gpu_info(use_gpu: bool) -> GpuInfo {
18-
let (platform, expected_backend) = if cfg!(target_os = "windows") {
19-
("Windows", "CUDA (NVIDIA GPU)")
20-
} else if cfg!(target_os = "macos") {
21-
("macOS", "Metal/CoreML (Apple Silicon)")
22-
} else if cfg!(target_os = "linux") {
23-
("Linux", "CUDA (NVIDIA GPU)")
24-
} else {
25-
("Unknown", "CPU only")
26-
};
27-
28-
GpuInfo {
29-
platform: platform.to_string(),
30-
expected_backend: expected_backend.to_string(),
31-
gpu_enabled_in_settings: use_gpu,
32-
}
33-
}
346

357
/// Check if audio is already in whisper-compatible format (16kHz, mono, 16-bit PCM)
368
fn is_valid_wav_format(audio_data: &[u8]) -> bool {
@@ -113,48 +85,25 @@ fn convert_audio_for_whisper(audio_data: Vec<u8>) -> Result<Vec<u8>, WhisperCppE
11385
})
11486
}
11587

116-
/// Load Whisper model with automatic GPU fallback to CPU if needed
117-
fn load_whisper_model(model_path: &str, use_gpu: bool) -> Result<WhisperContext, WhisperCppError> {
118-
let mut params = WhisperContextParameters::default();
119-
params.use_gpu = use_gpu;
120-
121-
// Try loading with requested settings
122-
match WhisperContext::new_with_params(model_path, params) {
123-
Ok(context) => {
124-
let backend = if use_gpu { "GPU" } else { "CPU" };
125-
tracing::info!("Whisper model loaded with {} backend", backend);
126-
Ok(context)
127-
}
128-
Err(first_error) => {
129-
// If GPU was requested and failed, try CPU fallback
130-
if use_gpu {
131-
tracing::warn!("GPU failed, trying CPU fallback");
132-
params.use_gpu = false;
133-
134-
WhisperContext::new_with_params(model_path, params)
135-
.map(|ctx| {
136-
tracing::info!("Successfully fell back to CPU");
137-
ctx
138-
})
139-
.map_err(|_| WhisperCppError::ModelLoadError {
140-
message: format!("Failed to load model (GPU and CPU both failed)")
141-
})
142-
} else {
143-
// CPU failed with no fallback option
144-
Err(WhisperCppError::ModelLoadError {
145-
message: first_error.to_string()
146-
})
147-
}
148-
}
149-
}
88+
/// Load Whisper model with automatic GPU support based on compiled features
89+
fn load_whisper_model(model_path: &str) -> Result<WhisperContext, WhisperCppError> {
90+
// GPU acceleration is automatically enabled based on compile-time features:
91+
// - macOS: Metal + CoreML
92+
// - Windows: CUDA + Vulkan
93+
// - Linux: CUDA + Vulkan + HipBLAS
94+
// The whisper-rs library automatically selects the best available backend
95+
96+
WhisperContext::new_with_params(model_path, WhisperContextParameters::default())
97+
.map_err(|e| WhisperCppError::ModelLoadError {
98+
message: format!("Failed to load model: {}", e)
99+
})
150100
}
151101

152102
#[tauri::command]
153103
pub async fn transcribe_with_whisper_cpp(
154104
audio_data: Vec<u8>,
155105
model_path: String,
156106
language: Option<String>,
157-
use_gpu: bool,
158107
prompt: String,
159108
temperature: f32,
160109
) -> Result<String, WhisperCppError> {
@@ -182,8 +131,8 @@ pub async fn transcribe_with_whisper_cpp(
182131
return Ok(String::new());
183132
}
184133

185-
// Load model with automatic GPU fallback
186-
let context = load_whisper_model(&model_path, use_gpu)?;
134+
// Load model with automatic GPU acceleration based on compiled features
135+
let context = load_whisper_model(&model_path)?;
187136

188137
// Create state and configure parameters
189138
let mut state = context
@@ -197,7 +146,7 @@ pub async fn transcribe_with_whisper_cpp(
197146
params.set_no_timestamps(true);
198147
params.set_temperature(temperature);
199148
params.set_no_speech_thold(0.2); // Better silence detection
200-
params.set_suppress_non_speech_tokens(true); // Prevent hallucinations
149+
params.set_suppress_nst(true); // Prevent hallucinations (non-speech tokens)
201150

202151
// Set language if specified
203152
if let Some(ref lang) = language {
@@ -218,18 +167,19 @@ pub async fn transcribe_with_whisper_cpp(
218167
})?;
219168

220169
// Collect transcribed text from all segments
221-
let num_segments = state.full_n_segments()
222-
.map_err(|e| WhisperCppError::TranscriptionError {
223-
message: format!("Failed to get segments: {}", e),
224-
})?;
170+
let num_segments = state.full_n_segments();
225171

226172
let mut text = String::new();
227173
for i in 0..num_segments {
228-
let segment = state.full_get_segment_text(i)
174+
let segment = state.get_segment(i)
175+
.ok_or_else(|| WhisperCppError::TranscriptionError {
176+
message: format!("Failed to get segment {}", i),
177+
})?;
178+
let segment_text = segment.to_str()
229179
.map_err(|e| WhisperCppError::TranscriptionError {
230-
message: format!("Failed to get segment {}: {}", i, e),
180+
message: format!("Failed to get segment {} text: {}", i, e),
231181
})?;
232-
text.push_str(&segment);
182+
text.push_str(segment_text);
233183
}
234184

235185
Ok(text.trim().to_string())

apps/whispering/src/lib/query/transcription.ts

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -227,7 +227,6 @@ async function transcribeBlob(
227227
prompt: settings.value['transcription.prompt'],
228228
temperature: settings.value['transcription.temperature'],
229229
modelPath: settings.value['transcription.whispercpp.modelPath'],
230-
useGpu: settings.value['transcription.whispercpp.useGpu'],
231230
},
232231
);
233232
default:

apps/whispering/src/lib/services/transcription/whispercpp.ts

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ export function createWhisperCppTranscriptionService() {
2525
temperature: string;
2626
outputLanguage: Settings['transcription.outputLanguage'];
2727
modelPath: string;
28-
useGpu: boolean;
2928
},
3029
): Promise<Result<string, WhisperingError>> {
3130
// Pre-validation
@@ -87,7 +86,6 @@ export function createWhisperCppTranscriptionService() {
8786
modelPath: options.modelPath,
8887
language:
8988
options.outputLanguage === 'auto' ? null : options.outputLanguage,
90-
useGpu: options.useGpu,
9189
prompt: options.prompt,
9290
temperature: Number.parseFloat(options.temperature),
9391
}),

apps/whispering/src/lib/settings/settings.ts

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,6 @@ export const settingsSchema = z.object({
198198
.string()
199199
.default('Systran/faster-distil-whisper-small.en'),
200200
'transcription.whispercpp.modelPath': z.string().default(''),
201-
'transcription.whispercpp.useGpu': z.boolean().default(true),
202201

203202
'transformations.selectedTransformationId': z
204203
.string()

apps/whispering/src/routes/(config)/settings/transcription/+page.svelte

Lines changed: 9 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -59,10 +59,11 @@
5959
items={TRANSCRIPTION_SERVICE_OPTIONS}
6060
bind:selected={
6161
() => settings.value['transcription.selectedTranscriptionService'],
62-
(selected) => settings.updateKey(
63-
'transcription.selectedTranscriptionService',
64-
selected
65-
)
62+
(selected) =>
63+
settings.updateKey(
64+
'transcription.selectedTranscriptionService',
65+
selected,
66+
)
6667
}
6768
placeholder="Select a transcription service"
6869
/>
@@ -130,7 +131,8 @@
130131
}))}
131132
bind:selected={
132133
() => settings.value['transcription.deepgram.model'],
133-
(selected) => settings.updateKey('transcription.deepgram.model', selected)
134+
(selected) =>
135+
settings.updateKey('transcription.deepgram.model', selected)
134136
}
135137
renderOption={renderModelOption}
136138
/>
@@ -146,7 +148,8 @@
146148
}))}
147149
bind:selected={
148150
() => settings.value['transcription.elevenlabs.model'],
149-
(selected) => settings.updateKey('transcription.elevenlabs.model', selected)
151+
(selected) =>
152+
settings.updateKey('transcription.elevenlabs.model', selected)
150153
}
151154
renderOption={renderModelOption}
152155
>
@@ -362,19 +365,6 @@
362365
</Alert.Root>
363366
{/if}
364367
</div>
365-
366-
<div class="flex items-center space-x-2">
367-
<Checkbox
368-
id="whispercpp-use-gpu"
369-
bind:checked={
370-
() => settings.value['transcription.whispercpp.useGpu'],
371-
(checked) => settings.updateKey('transcription.whispercpp.useGpu', checked)
372-
}
373-
/>
374-
<label for="whispercpp-use-gpu" class="text-sm font-medium">
375-
Use GPU acceleration (if available)
376-
</label>
377-
</div>
378368
{/if}
379369

380370
<!-- Audio Compression Settings -->

0 commit comments

Comments
 (0)