fix: 5 critical bugs — CLIP fallback, BM25, agent image gen, whisper, chat history

PurpleDoubleD · claude · PurpleDoubleD · commit 621b5212df12 · 2026-04-01T23:11:49.000+02:00
1. CLIP/VAE fallback: throw descriptive error with download instructions
   instead of silently using wrong model (e.g. qwen LLM as FLUX encoder)
2. RAG BM25: proper IDF calculation using document frequency across all
   chunks instead of hardcoded Math.log(2)
3. Agent image_generate: actually calls ComfyUI via dynamic workflow
   builder instead of returning stub string
4. Whisper check: isSpeechRecognitionSupported() now checks if Whisper
   is actually running instead of hardcoded true
5. Chat history: filter empty assistant messages before sending to LLM
   (empty placeholder was confusing the model)

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/src/api/agents.ts b/src/api/agents.ts
@@ -295,7 +295,37 @@ export async function executeTool(
     }
 
     case "image_generate": {
-      return "Image generation delegated to Create tab";
+      const prompt = args.prompt || args.description || ''
+      if (!prompt) return "Error: No prompt provided for image generation."
+      try {
+        const { buildDynamicWorkflow } = await import('./dynamic-workflow')
+        const { submitWorkflow, getHistory, classifyModel, getImageModels } = await import('./comfyui')
+        const models = await getImageModels()
+        if (models.length === 0) return "Error: No image models available in ComfyUI."
+        const model = models[0]
+        const workflow = await buildDynamicWorkflow({
+          prompt, negativePrompt: '', model: model.name,
+          sampler: 'euler', scheduler: 'normal', steps: 20, cfgScale: 7,
+          width: 1024, height: 1024, seed: -1, batchSize: 1,
+        }, classifyModel(model.name))
+        const promptId = await submitWorkflow(workflow)
+        for (let i = 0; i < 300; i++) {
+          await new Promise(r => setTimeout(r, 1000))
+          const history = await getHistory(promptId)
+          if (history?.status?.completed) {
+            const outputs = history.outputs ?? {}
+            for (const nodeId of Object.keys(outputs)) {
+              const files = [...(outputs[nodeId].images ?? []), ...(outputs[nodeId].gifs ?? [])]
+              if (files.length > 0) return `Image generated: ${files[0].filename} (prompt: "${prompt}")`
+            }
+            return "Generation completed but no output produced."
+          }
+          if (history?.status?.status_str === 'error') return `Generation failed: ${history.status.messages?.[0]?.[1]?.message || 'Unknown error'}`
+        }
+        return "Generation timed out after 5 minutes."
+      } catch (err) {
+        return `Generation failed: ${err instanceof Error ? err.message : String(err)}`
+      }
     }
 
     default:
diff --git a/src/api/comfyui.ts b/src/api/comfyui.ts
@@ -229,44 +229,41 @@ export async function detectVideoBackend(): Promise<VideoBackend> {
 
 export async function findMatchingVAE(modelType: ModelType): Promise<string> {
   const vaes = await getVAEModels()
-  if (vaes.length === 0) throw new Error('No VAE models found in ComfyUI. Add a VAE to models/vae/')
+  if (vaes.length === 0) throw new Error('No VAE models found. Download a VAE for your model type from the Model Manager.')
   const lower = (s: string) => s.toLowerCase()
 
-  // Try to find a matching VAE by model type keywords
   if (modelType === 'flux' || modelType === 'flux2') {
-    const match = vaes.find(v => lower(v).includes('flux'))
+    const match = vaes.find(v => lower(v).includes('flux') || lower(v).includes('ae'))
     if (match) return match
+    throw new Error(`No FLUX VAE found. Download "ae.safetensors" from the Model Manager (FLUX bundles include it).`)
   }
-  if (modelType === 'wan') {
-    const match = vaes.find(v => lower(v).includes('wan'))
-    if (match) return match
-  }
-  if (modelType === 'hunyuan') {
-    const match = vaes.find(v => lower(v).includes('hunyuan') || lower(v).includes('wan'))
+  if (modelType === 'wan' || modelType === 'hunyuan') {
+    const match = vaes.find(v => lower(v).includes('wan') || lower(v).includes('hunyuan'))
     if (match) return match
+    throw new Error(`No Wan/Hunyuan VAE found. Download "wan_2.1_vae.safetensors" from the Model Manager.`)
   }
-
-  // Fallback: first available VAE
-  console.warn(`[ComfyUI] No ${modelType}-specific VAE found, using fallback: ${vaes[0]}`)
+  // SDXL/SD1.5 checkpoints include VAE — any VAE works as fallback
   return vaes[0]
 }
 
 export async function findMatchingCLIP(modelType: ModelType): Promise<string> {
   const clips = await getCLIPModels()
-  if (clips.length === 0) throw new Error('No CLIP/text encoder models found. Add one to models/text_encoders/')
+  if (clips.length === 0) throw new Error('No text encoder models found. Download a CLIP/T5 model for your model type from the Model Manager.')
   const lower = (s: string) => s.toLowerCase()
 
-  // Try to find a matching CLIP by model type
-  if (modelType === 'wan' || modelType === 'hunyuan') {
-    const match = clips.find(c => lower(c).includes('umt5') || lower(c).includes('wan') || lower(c).includes('t5'))
+  if (modelType === 'flux' || modelType === 'flux2') {
+    const match = clips.find(c => lower(c).includes('t5') && !lower(c).includes('umt5'))
+      || clips.find(c => lower(c).includes('clip_l'))
     if (match) return match
+    throw new Error(`No FLUX text encoder (T5) found. Download "t5xxl_fp8_e4m3fn.safetensors" from the Model Manager.`)
   }
-  if (modelType === 'flux' || modelType === 'flux2') {
-    const match = clips.find(c => lower(c).includes('t5') || lower(c).includes('clip'))
+  if (modelType === 'wan' || modelType === 'hunyuan') {
+    const match = clips.find(c => lower(c).includes('umt5') || lower(c).includes('wan'))
+      || clips.find(c => lower(c).includes('t5'))
     if (match) return match
+    throw new Error(`No Wan/Hunyuan text encoder found. Download "umt5_xxl_fp8_e4m3fn_scaled.safetensors" from the Model Manager.`)
   }
-
-  console.warn(`[ComfyUI] No ${modelType}-specific CLIP found, using fallback: ${clips[0]}`)
+  // SDXL/SD1.5 checkpoints include CLIP — any works
   return clips[0]
 }
 
diff --git a/src/api/rag.ts b/src/api/rag.ts
@@ -80,18 +80,20 @@ function cosineSimilarity(a: number[], b: number[]): number {
   return dot / (Math.sqrt(magA) * Math.sqrt(magB) || 1)
 }
 
-function bm25Score(query: string, document: string): number {
+function bm25Score(query: string, document: string, allDocs: string[]): number {
   const queryTerms = query.toLowerCase().split(/\s+/)
   const docTerms = document.toLowerCase().split(/\s+/)
   const docLen = docTerms.length
-  const avgDl = 200
+  const numDocs = allDocs.length || 1
+  const avgDl = allDocs.reduce((sum, d) => sum + d.split(/\s+/).length, 0) / numDocs || 200
   const k1 = 1.2
   const b = 0.75
 
   let score = 0
   for (const term of queryTerms) {
     const tf = docTerms.filter((t) => t === term).length
-    const idf = Math.log(1 + 1)
+    const docsWithTerm = allDocs.filter(d => d.toLowerCase().includes(term)).length
+    const idf = Math.log((numDocs - docsWithTerm + 0.5) / (docsWithTerm + 0.5) + 1)
     score += idf * ((tf * (k1 + 1)) / (tf + k1 * (1 - b + (b * docLen) / avgDl)))
   }
   return score
@@ -109,10 +111,11 @@ function hybridSearch(
     vectorScore: cosineSimilarity(queryEmbedding, chunk.embedding),
   }))
 
-  // Get BM25 scores
+  // Get BM25 scores (pass all docs for proper IDF calculation)
+  const allDocTexts = chunks.map(c => c.content)
   const bm25Results = chunks.map((chunk) => ({
     chunk,
-    bm25Score: bm25Score(query, chunk.content),
+    bm25Score: bm25Score(query, chunk.content, allDocTexts),
   }))
 
   // Normalize both score sets to 0-1
diff --git a/src/api/voice.ts b/src/api/voice.ts
@@ -6,9 +6,24 @@ import { backendCall, isTauri } from "./backend";
  * TTS: Browser SpeechSynthesis (runs locally in the browser, no cloud)
  */
 
+let whisperChecked = false
+let whisperAvailable = false
+
 export function isSpeechRecognitionSupported(): boolean {
-  // Always true — we use local Whisper, not browser SpeechRecognition API
-  return true;
+  return whisperAvailable
+}
+
+// Call once at startup to check if Whisper is actually running
+export async function initWhisperCheck(): Promise<boolean> {
+  if (whisperChecked) return whisperAvailable
+  try {
+    const result = await checkWhisperAvailable()
+    whisperAvailable = result.available
+  } catch {
+    whisperAvailable = false
+  }
+  whisperChecked = true
+  return whisperAvailable
 }
 
 export function isSpeechSynthesisSupported(): boolean {
diff --git a/src/hooks/useChat.ts b/src/hooks/useChat.ts
@@ -86,7 +86,9 @@ export function useChat() {
 
     const messages = [
       ...(systemPrompt ? [{ role: "system", content: systemPrompt }] : []),
-      ...conv.messages.map((m) => ({ role: m.role, content: m.content })),
+      ...conv.messages
+        .filter((m) => m.content.trim() !== '')
+        .map((m) => ({ role: m.role, content: m.content })),
     ]
 
     const abort = new AbortController()

Original file line number	Diff line number	Diff line change
`@@ -86,7 +86,9 @@ export function useChat() {`
`86`	`86`
`87`	`87`	`const messages = [`
`88`	`88`	`...(systemPrompt ? [{ role: "system", content: systemPrompt }] : []),`
`89`		`- ...conv.messages.map((m) => ({ role: m.role, content: m.content })),`
	`89`	`+ ...conv.messages`
	`90`	`+ .filter((m) => m.content.trim() !== '')`
	`91`	`+ .map((m) => ({ role: m.role, content: m.content })),`
`90`	`92`	`]`
`91`	`93`
`92`	`94`	`const abort = new AbortController()`