feat(ai): Add automatic Ollama fallback when osvm.ai is unavailable

0xrinegade · claude · 0xrinegade · commit 0f40985aeee1 · 2025-11-27T07:19:06.000+03:00
- Add fallback mechanism to AiService that tries local Ollama when primary osvm.ai endpoint fails or times out - Preserve system prompt when falling back (critical for agentic behavior) - Add user notification in chat UI: "⚡ Using local AI (model) - primary service unavailable" - Add `did_use_fallback()` and `get_fallback_model()` public methods for UI integration - Use atomic flag for thread-safe fallback state tracking - Default fallback model: qwen3-coder:30b (configurable via OLLAMA_MODEL env) - Reduced retry attempts from 4 to 2 for faster fallback response This fixes the chat hanging forever when osvm.ai is down by automatically switching to local Ollama while preserving the full system prompt for proper agentic execution. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
diff --git a/src/services/ai_service.rs b/src/services/ai_service.rs
@@ -77,6 +77,8 @@ pub struct AiService {
     /// Fallback to Ollama if primary API fails
     fallback_url: Option<String>,
     fallback_model: Option<String>,
+    /// Track if last request used fallback (for UI notification)
+    last_used_fallback: std::sync::Arc<std::sync::atomic::AtomicBool>,
 }
 
 impl AiService {
@@ -257,9 +259,20 @@ impl AiService {
             conversation_history: std::sync::Arc::new(std::sync::Mutex::new(Vec::new())),
             fallback_url,
             fallback_model,
+            last_used_fallback: std::sync::Arc::new(std::sync::atomic::AtomicBool::new(false)),
         }
     }
 
+    /// Check if the last AI request used fallback (and reset the flag)
+    pub fn did_use_fallback(&self) -> bool {
+        self.last_used_fallback.swap(false, std::sync::atomic::Ordering::SeqCst)
+    }
+
+    /// Get the fallback model name if available
+    pub fn get_fallback_model(&self) -> Option<&str> {
+        self.fallback_model.as_deref()
+    }
+
     /// Add a message to the conversation history
     pub fn add_to_history(&self, role: &str, content: &str) {
         if let Ok(mut history) = self.conversation_history.lock() {
@@ -574,7 +587,9 @@ impl AiService {
 
                 // Check if Ollama is available before trying
                 if self.is_ollama_available().await {
-                    debug_print!("🔄 Falling back to local Ollama (system prompt preserved)");
+                    // Set fallback flag for UI notification
+                    self.last_used_fallback.store(true, std::sync::atomic::Ordering::SeqCst);
+                    log::info!("AI fallback: using local Ollama instead of osvm.ai");
 
                     // Use Ollama with the SAME system prompt
                     match self.query_ollama_fallback(question, system_prompt.clone(), debug_mode).await {
diff --git a/src/utils/agent_chat_v2/agent/execution.rs b/src/utils/agent_chat_v2/agent/execution.rs
@@ -143,6 +143,18 @@ impl AdvancedChatState {
         // Determine if any MCP servers/tools are configured
         let no_configured_tools = available_tools.is_empty();
 
+        // Check if AI service used Ollama fallback and notify user
+        if self.ai_service.did_use_fallback() {
+            let fallback_model = self.ai_service.get_fallback_model().unwrap_or("local model");
+            let _ = self.add_message_to_session(
+                session_id,
+                ChatMessage::System(format!(
+                    "⚡ Using local AI ({}) - primary service unavailable",
+                    fallback_model
+                )),
+            );
+        }
+
         match tool_plan_result {
             Err(_) => {
                 warn!("AI planning timed out");