Skip to content

Commit 621b521

Browse files
PurpleDoubleDclaude
andcommitted
fix: 5 critical bugs — CLIP fallback, BM25, agent image gen, whisper, chat history
1. CLIP/VAE fallback: throw descriptive error with download instructions instead of silently using wrong model (e.g. qwen LLM as FLUX encoder) 2. RAG BM25: proper IDF calculation using document frequency across all chunks instead of hardcoded Math.log(2) 3. Agent image_generate: actually calls ComfyUI via dynamic workflow builder instead of returning stub string 4. Whisper check: isSpeechRecognitionSupported() now checks if Whisper is actually running instead of hardcoded true 5. Chat history: filter empty assistant messages before sending to LLM (empty placeholder was confusing the model) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent c742c6a commit 621b521

5 files changed

Lines changed: 76 additions & 29 deletions

File tree

src/api/agents.ts

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -295,7 +295,37 @@ export async function executeTool(
295295
}
296296

297297
case "image_generate": {
298-
return "Image generation delegated to Create tab";
298+
const prompt = args.prompt || args.description || ''
299+
if (!prompt) return "Error: No prompt provided for image generation."
300+
try {
301+
const { buildDynamicWorkflow } = await import('./dynamic-workflow')
302+
const { submitWorkflow, getHistory, classifyModel, getImageModels } = await import('./comfyui')
303+
const models = await getImageModels()
304+
if (models.length === 0) return "Error: No image models available in ComfyUI."
305+
const model = models[0]
306+
const workflow = await buildDynamicWorkflow({
307+
prompt, negativePrompt: '', model: model.name,
308+
sampler: 'euler', scheduler: 'normal', steps: 20, cfgScale: 7,
309+
width: 1024, height: 1024, seed: -1, batchSize: 1,
310+
}, classifyModel(model.name))
311+
const promptId = await submitWorkflow(workflow)
312+
for (let i = 0; i < 300; i++) {
313+
await new Promise(r => setTimeout(r, 1000))
314+
const history = await getHistory(promptId)
315+
if (history?.status?.completed) {
316+
const outputs = history.outputs ?? {}
317+
for (const nodeId of Object.keys(outputs)) {
318+
const files = [...(outputs[nodeId].images ?? []), ...(outputs[nodeId].gifs ?? [])]
319+
if (files.length > 0) return `Image generated: ${files[0].filename} (prompt: "${prompt}")`
320+
}
321+
return "Generation completed but no output produced."
322+
}
323+
if (history?.status?.status_str === 'error') return `Generation failed: ${history.status.messages?.[0]?.[1]?.message || 'Unknown error'}`
324+
}
325+
return "Generation timed out after 5 minutes."
326+
} catch (err) {
327+
return `Generation failed: ${err instanceof Error ? err.message : String(err)}`
328+
}
299329
}
300330

301331
default:

src/api/comfyui.ts

Lines changed: 17 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -229,44 +229,41 @@ export async function detectVideoBackend(): Promise<VideoBackend> {
229229

230230
export async function findMatchingVAE(modelType: ModelType): Promise<string> {
231231
const vaes = await getVAEModels()
232-
if (vaes.length === 0) throw new Error('No VAE models found in ComfyUI. Add a VAE to models/vae/')
232+
if (vaes.length === 0) throw new Error('No VAE models found. Download a VAE for your model type from the Model Manager.')
233233
const lower = (s: string) => s.toLowerCase()
234234

235-
// Try to find a matching VAE by model type keywords
236235
if (modelType === 'flux' || modelType === 'flux2') {
237-
const match = vaes.find(v => lower(v).includes('flux'))
236+
const match = vaes.find(v => lower(v).includes('flux') || lower(v).includes('ae'))
238237
if (match) return match
238+
throw new Error(`No FLUX VAE found. Download "ae.safetensors" from the Model Manager (FLUX bundles include it).`)
239239
}
240-
if (modelType === 'wan') {
241-
const match = vaes.find(v => lower(v).includes('wan'))
242-
if (match) return match
243-
}
244-
if (modelType === 'hunyuan') {
245-
const match = vaes.find(v => lower(v).includes('hunyuan') || lower(v).includes('wan'))
240+
if (modelType === 'wan' || modelType === 'hunyuan') {
241+
const match = vaes.find(v => lower(v).includes('wan') || lower(v).includes('hunyuan'))
246242
if (match) return match
243+
throw new Error(`No Wan/Hunyuan VAE found. Download "wan_2.1_vae.safetensors" from the Model Manager.`)
247244
}
248-
249-
// Fallback: first available VAE
250-
console.warn(`[ComfyUI] No ${modelType}-specific VAE found, using fallback: ${vaes[0]}`)
245+
// SDXL/SD1.5 checkpoints include VAE — any VAE works as fallback
251246
return vaes[0]
252247
}
253248

254249
export async function findMatchingCLIP(modelType: ModelType): Promise<string> {
255250
const clips = await getCLIPModels()
256-
if (clips.length === 0) throw new Error('No CLIP/text encoder models found. Add one to models/text_encoders/')
251+
if (clips.length === 0) throw new Error('No text encoder models found. Download a CLIP/T5 model for your model type from the Model Manager.')
257252
const lower = (s: string) => s.toLowerCase()
258253

259-
// Try to find a matching CLIP by model type
260-
if (modelType === 'wan' || modelType === 'hunyuan') {
261-
const match = clips.find(c => lower(c).includes('umt5') || lower(c).includes('wan') || lower(c).includes('t5'))
254+
if (modelType === 'flux' || modelType === 'flux2') {
255+
const match = clips.find(c => lower(c).includes('t5') && !lower(c).includes('umt5'))
256+
|| clips.find(c => lower(c).includes('clip_l'))
262257
if (match) return match
258+
throw new Error(`No FLUX text encoder (T5) found. Download "t5xxl_fp8_e4m3fn.safetensors" from the Model Manager.`)
263259
}
264-
if (modelType === 'flux' || modelType === 'flux2') {
265-
const match = clips.find(c => lower(c).includes('t5') || lower(c).includes('clip'))
260+
if (modelType === 'wan' || modelType === 'hunyuan') {
261+
const match = clips.find(c => lower(c).includes('umt5') || lower(c).includes('wan'))
262+
|| clips.find(c => lower(c).includes('t5'))
266263
if (match) return match
264+
throw new Error(`No Wan/Hunyuan text encoder found. Download "umt5_xxl_fp8_e4m3fn_scaled.safetensors" from the Model Manager.`)
267265
}
268-
269-
console.warn(`[ComfyUI] No ${modelType}-specific CLIP found, using fallback: ${clips[0]}`)
266+
// SDXL/SD1.5 checkpoints include CLIP — any works
270267
return clips[0]
271268
}
272269

src/api/rag.ts

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -80,18 +80,20 @@ function cosineSimilarity(a: number[], b: number[]): number {
8080
return dot / (Math.sqrt(magA) * Math.sqrt(magB) || 1)
8181
}
8282

83-
function bm25Score(query: string, document: string): number {
83+
function bm25Score(query: string, document: string, allDocs: string[]): number {
8484
const queryTerms = query.toLowerCase().split(/\s+/)
8585
const docTerms = document.toLowerCase().split(/\s+/)
8686
const docLen = docTerms.length
87-
const avgDl = 200
87+
const numDocs = allDocs.length || 1
88+
const avgDl = allDocs.reduce((sum, d) => sum + d.split(/\s+/).length, 0) / numDocs || 200
8889
const k1 = 1.2
8990
const b = 0.75
9091

9192
let score = 0
9293
for (const term of queryTerms) {
9394
const tf = docTerms.filter((t) => t === term).length
94-
const idf = Math.log(1 + 1)
95+
const docsWithTerm = allDocs.filter(d => d.toLowerCase().includes(term)).length
96+
const idf = Math.log((numDocs - docsWithTerm + 0.5) / (docsWithTerm + 0.5) + 1)
9597
score += idf * ((tf * (k1 + 1)) / (tf + k1 * (1 - b + (b * docLen) / avgDl)))
9698
}
9799
return score
@@ -109,10 +111,11 @@ function hybridSearch(
109111
vectorScore: cosineSimilarity(queryEmbedding, chunk.embedding),
110112
}))
111113

112-
// Get BM25 scores
114+
// Get BM25 scores (pass all docs for proper IDF calculation)
115+
const allDocTexts = chunks.map(c => c.content)
113116
const bm25Results = chunks.map((chunk) => ({
114117
chunk,
115-
bm25Score: bm25Score(query, chunk.content),
118+
bm25Score: bm25Score(query, chunk.content, allDocTexts),
116119
}))
117120

118121
// Normalize both score sets to 0-1

src/api/voice.ts

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,24 @@ import { backendCall, isTauri } from "./backend";
66
* TTS: Browser SpeechSynthesis (runs locally in the browser, no cloud)
77
*/
88

9+
let whisperChecked = false
10+
let whisperAvailable = false
11+
912
export function isSpeechRecognitionSupported(): boolean {
10-
// Always true — we use local Whisper, not browser SpeechRecognition API
11-
return true;
13+
return whisperAvailable
14+
}
15+
16+
// Call once at startup to check if Whisper is actually running
17+
export async function initWhisperCheck(): Promise<boolean> {
18+
if (whisperChecked) return whisperAvailable
19+
try {
20+
const result = await checkWhisperAvailable()
21+
whisperAvailable = result.available
22+
} catch {
23+
whisperAvailable = false
24+
}
25+
whisperChecked = true
26+
return whisperAvailable
1227
}
1328

1429
export function isSpeechSynthesisSupported(): boolean {

src/hooks/useChat.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,9 @@ export function useChat() {
8686

8787
const messages = [
8888
...(systemPrompt ? [{ role: "system", content: systemPrompt }] : []),
89-
...conv.messages.map((m) => ({ role: m.role, content: m.content })),
89+
...conv.messages
90+
.filter((m) => m.content.trim() !== '')
91+
.map((m) => ({ role: m.role, content: m.content })),
9092
]
9193

9294
const abort = new AbortController()

0 commit comments

Comments
 (0)