Merge pull request #754 from epicenter-so/fix-metal-linking

braden-w · web-flow · commit 593ca24a6e07 · 2025-09-03T01:33:25.000-07:00
fix(whisper): resolve GPU acceleration build issues
diff --git a/apps/whispering/src-tauri/Cargo.lock b/apps/whispering/src-tauri/Cargo.lock
diff --git a/apps/whispering/src-tauri/Cargo.toml b/apps/whispering/src-tauri/Cargo.toml
@@ -42,7 +42,8 @@ enigo = "0.5.0"
 cpal = "0.16.0"
 tracing = "0.1.41"
 thiserror = "2.0.12"
-whisper-rs = { version = "0.15.0", features = ["whisper-cpp-log"] }
+# Base whisper-rs dependency
+whisper-rs = { version = "0.15.0" }
 hound = "3.5"
 lazy_static = "1.4"
 tempfile = "3.8"
@@ -58,18 +59,21 @@ windows-sys = { version = "0.59", features = ["Win32_Foundation", "Win32_System_
 [target.'cfg(target_os = "macos")'.dependencies]
 accessibility-sys =  "0.1.3"
 core-foundation-sys =  "0.8.7"
-whisper-rs = { version = "0.15.0", features = ["whisper-cpp-log", "metal", "coreml"] }
+# macOS: CoreML for Apple Silicon Neural Engine (Metal disabled due to linking issues)
+whisper-rs = { version = "0.15.0", features = ["coreml"] }
 
 [target.'cfg(not(any(target_os = "android", target_os = "ios")))'.dependencies]
 tauri-plugin-global-shortcut = "2"
 tauri-plugin-single-instance = "2"
 tauri-plugin-updater = "2"
 
 [target.'cfg(target_os = "windows")'.dependencies]
-whisper-rs = { version = "0.15.0", features = ["whisper-cpp-log", "cuda"] }
+# Windows: CUDA for NVIDIA GPUs, Vulkan for AMD/Intel GPUs
+whisper-rs = { version = "0.15.0", features = ["cuda", "vulkan"] }
 
 [target.'cfg(all(unix, not(target_os = "macos")))'.dependencies]
-whisper-rs = { version = "0.15.0", features = ["whisper-cpp-log", "cuda"] }
+# Linux: CUDA for NVIDIA, Vulkan for AMD/Intel, HipBLAS for AMD ROCm
+whisper-rs = { version = "0.15.0", features = ["cuda", "vulkan", "hipblas"] }
 
 [profile.dev]
 incremental = true # Compile your binary in smaller steps.
diff --git a/apps/whispering/src-tauri/src/recorder/commands.rs b/apps/whispering/src-tauri/src/recorder/commands.rs
@@ -1,7 +1,7 @@
 use crate::recorder::recorder::{AudioRecording, RecorderState, Result};
 use std::path::PathBuf;
 use std::sync::Mutex;
-use tauri::{Manager, State};
+use tauri::State;
 use tracing::{debug, info};
 
 /// Application state containing the recorder
@@ -34,7 +34,7 @@ pub async fn init_recording_session(
     output_folder: String,
     sample_rate: Option<u32>,
     state: State<'_, AppData>,
-    app_handle: tauri::AppHandle,
+    _app_handle: tauri::AppHandle,
 ) -> Result<()> {
     info!(
         "Initializing recording session: device={}, id={}, folder={}, sample_rate={:?}",
diff --git a/apps/whispering/src-tauri/src/whisper_cpp/mod.rs b/apps/whispering/src-tauri/src/whisper_cpp/mod.rs
@@ -3,34 +3,6 @@ mod error;
 use error::WhisperCppError;
 use whisper_rs::{FullParams, SamplingStrategy, WhisperContext, WhisperContextParameters};
 use std::io::Write;
-use serde::Serialize;
-
-#[derive(Serialize)]
-pub struct GpuInfo {
-    pub platform: String,
-    pub expected_backend: String,
-    pub gpu_enabled_in_settings: bool,
-}
-
-/// Get information about the expected GPU backend for the current platform
-#[tauri::command]
-pub fn get_gpu_info(use_gpu: bool) -> GpuInfo {
-    let (platform, expected_backend) = if cfg!(target_os = "windows") {
-        ("Windows", "CUDA (NVIDIA GPU)")
-    } else if cfg!(target_os = "macos") {
-        ("macOS", "Metal/CoreML (Apple Silicon)")
-    } else if cfg!(target_os = "linux") {
-        ("Linux", "CUDA (NVIDIA GPU)")
-    } else {
-        ("Unknown", "CPU only")
-    };
-    
-    GpuInfo {
-        platform: platform.to_string(),
-        expected_backend: expected_backend.to_string(),
-        gpu_enabled_in_settings: use_gpu,
-    }
-}
 
 /// Check if audio is already in whisper-compatible format (16kHz, mono, 16-bit PCM)
 fn is_valid_wav_format(audio_data: &[u8]) -> bool {
@@ -113,48 +85,25 @@ fn convert_audio_for_whisper(audio_data: Vec<u8>) -> Result<Vec<u8>, WhisperCppE
     })
 }
 
-/// Load Whisper model with automatic GPU fallback to CPU if needed
-fn load_whisper_model(model_path: &str, use_gpu: bool) -> Result<WhisperContext, WhisperCppError> {
-    let mut params = WhisperContextParameters::default();
-    params.use_gpu = use_gpu;
-    
-    // Try loading with requested settings
-    match WhisperContext::new_with_params(model_path, params) {
-        Ok(context) => {
-            let backend = if use_gpu { "GPU" } else { "CPU" };
-            tracing::info!("Whisper model loaded with {} backend", backend);
-            Ok(context)
-        }
-        Err(first_error) => {
-            // If GPU was requested and failed, try CPU fallback
-            if use_gpu {
-                tracing::warn!("GPU failed, trying CPU fallback");
-                params.use_gpu = false;
-                
-                WhisperContext::new_with_params(model_path, params)
-                    .map(|ctx| {
-                        tracing::info!("Successfully fell back to CPU");
-                        ctx
-                    })
-                    .map_err(|_| WhisperCppError::ModelLoadError {
-                        message: format!("Failed to load model (GPU and CPU both failed)")
-                    })
-            } else {
-                // CPU failed with no fallback option
-                Err(WhisperCppError::ModelLoadError {
-                    message: first_error.to_string()
-                })
-            }
-        }
-    }
+/// Load Whisper model with automatic GPU support based on compiled features
+fn load_whisper_model(model_path: &str) -> Result<WhisperContext, WhisperCppError> {
+    // GPU acceleration is automatically enabled based on compile-time features:
+    // - macOS: Metal + CoreML
+    // - Windows: CUDA + Vulkan  
+    // - Linux: CUDA + Vulkan + HipBLAS
+    // The whisper-rs library automatically selects the best available backend
+    
+    WhisperContext::new_with_params(model_path, WhisperContextParameters::default())
+        .map_err(|e| WhisperCppError::ModelLoadError {
+            message: format!("Failed to load model: {}", e)
+        })
 }
 
 #[tauri::command]
 pub async fn transcribe_with_whisper_cpp(
     audio_data: Vec<u8>,
     model_path: String,
     language: Option<String>,
-    use_gpu: bool,
     prompt: String,
     temperature: f32,
 ) -> Result<String, WhisperCppError> {
@@ -182,8 +131,8 @@ pub async fn transcribe_with_whisper_cpp(
         return Ok(String::new());
     }
     
-    // Load model with automatic GPU fallback
-    let context = load_whisper_model(&model_path, use_gpu)?;
+    // Load model with automatic GPU acceleration based on compiled features
+    let context = load_whisper_model(&model_path)?;
     
     // Create state and configure parameters
     let mut state = context
@@ -197,7 +146,7 @@ pub async fn transcribe_with_whisper_cpp(
     params.set_no_timestamps(true);
     params.set_temperature(temperature);
     params.set_no_speech_thold(0.2);  // Better silence detection
-    params.set_suppress_non_speech_tokens(true);  // Prevent hallucinations
+    params.set_suppress_nst(true);  // Prevent hallucinations (non-speech tokens)
     
     // Set language if specified
     if let Some(ref lang) = language {
@@ -218,18 +167,19 @@ pub async fn transcribe_with_whisper_cpp(
         })?;
     
     // Collect transcribed text from all segments
-    let num_segments = state.full_n_segments()
-        .map_err(|e| WhisperCppError::TranscriptionError {
-            message: format!("Failed to get segments: {}", e),
-        })?;
+    let num_segments = state.full_n_segments();
     
     let mut text = String::new();
     for i in 0..num_segments {
-        let segment = state.full_get_segment_text(i)
+        let segment = state.get_segment(i)
+            .ok_or_else(|| WhisperCppError::TranscriptionError {
+                message: format!("Failed to get segment {}", i),
+            })?;
+        let segment_text = segment.to_str()
             .map_err(|e| WhisperCppError::TranscriptionError {
-                message: format!("Failed to get segment {}: {}", i, e),
+                message: format!("Failed to get segment {} text: {}", i, e),
             })?;
-        text.push_str(&segment);
+        text.push_str(segment_text);
     }
     
     Ok(text.trim().to_string())
diff --git a/apps/whispering/src/lib/query/transcription.ts b/apps/whispering/src/lib/query/transcription.ts
@@ -227,7 +227,6 @@ async function transcribeBlob(
 							prompt: settings.value['transcription.prompt'],
 							temperature: settings.value['transcription.temperature'],
 							modelPath: settings.value['transcription.whispercpp.modelPath'],
-							useGpu: settings.value['transcription.whispercpp.useGpu'],
 						},
 					);
 				default:
diff --git a/apps/whispering/src/lib/services/transcription/whispercpp.ts b/apps/whispering/src/lib/services/transcription/whispercpp.ts
@@ -25,7 +25,6 @@ export function createWhisperCppTranscriptionService() {
 				temperature: string;
 				outputLanguage: Settings['transcription.outputLanguage'];
 				modelPath: string;
-				useGpu: boolean;
 			},
 		): Promise<Result<string, WhisperingError>> {
 			// Pre-validation
@@ -87,7 +86,6 @@ export function createWhisperCppTranscriptionService() {
 						modelPath: options.modelPath,
 						language:
 							options.outputLanguage === 'auto' ? null : options.outputLanguage,
-						useGpu: options.useGpu,
 						prompt: options.prompt,
 						temperature: Number.parseFloat(options.temperature),
 					}),
diff --git a/apps/whispering/src/lib/settings/settings.ts b/apps/whispering/src/lib/settings/settings.ts
@@ -198,7 +198,6 @@ export const settingsSchema = z.object({
 		.string()
 		.default('Systran/faster-distil-whisper-small.en'),
 	'transcription.whispercpp.modelPath': z.string().default(''),
-	'transcription.whispercpp.useGpu': z.boolean().default(true),
 
 	'transformations.selectedTransformationId': z
 		.string()
diff --git a/apps/whispering/src/routes/(config)/settings/transcription/+page.svelte b/apps/whispering/src/routes/(config)/settings/transcription/+page.svelte
@@ -59,10 +59,11 @@
 		items={TRANSCRIPTION_SERVICE_OPTIONS}
 		bind:selected={
 			() => settings.value['transcription.selectedTranscriptionService'],
-			(selected) => settings.updateKey(
-				'transcription.selectedTranscriptionService',
-				selected
-			)
+			(selected) =>
+				settings.updateKey(
+					'transcription.selectedTranscriptionService',
+					selected,
+				)
 		}
 		placeholder="Select a transcription service"
 	/>
@@ -130,7 +131,8 @@
 			}))}
 			bind:selected={
 				() => settings.value['transcription.deepgram.model'],
-				(selected) => settings.updateKey('transcription.deepgram.model', selected)
+				(selected) =>
+					settings.updateKey('transcription.deepgram.model', selected)
 			}
 			renderOption={renderModelOption}
 		/>
@@ -146,7 +148,8 @@
 			}))}
 			bind:selected={
 				() => settings.value['transcription.elevenlabs.model'],
-				(selected) => settings.updateKey('transcription.elevenlabs.model', selected)
+				(selected) =>
+					settings.updateKey('transcription.elevenlabs.model', selected)
 			}
 			renderOption={renderModelOption}
 		>
@@ -362,19 +365,6 @@
 				</Alert.Root>
 			{/if}
 		</div>
-
-		<div class="flex items-center space-x-2">
-			<Checkbox
-				id="whispercpp-use-gpu"
-				bind:checked={
-					() => settings.value['transcription.whispercpp.useGpu'],
-					(checked) => settings.updateKey('transcription.whispercpp.useGpu', checked)
-				}
-			/>
-			<label for="whispercpp-use-gpu" class="text-sm font-medium">
-				Use GPU acceleration (if available)
-			</label>
-		</div>
 	{/if}
 
 	<!-- Audio Compression Settings -->