Skip to content

Commit b1d14dc

Browse files
committed
fix svelte warning & fix youtube and uploaded images for MCP
1 parent 8c324e8 commit b1d14dc

3 files changed

Lines changed: 58 additions & 6 deletions

File tree

src/lib/backend/mcp-tools.ts

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -206,14 +206,14 @@ export const mcpToolDefinitions: ChatCompletionTool[] = [
206206
},
207207
image_url: {
208208
type: 'string',
209-
description: 'URL of the image to analyze'
209+
description: 'URL of the image to analyze. If omitted, the most recent image in the chat will be used.'
210210
},
211211
model: {
212212
type: 'string',
213213
description: 'Optional vision model to use (default: glm-4.6v)'
214214
}
215215
},
216-
required: ['prompt', 'image_url']
216+
required: ['prompt']
217217
}
218218
}
219219
}
@@ -387,7 +387,7 @@ async function executeYoutubeTranscribe(
387387
'Authorization': `Bearer ${apiKey}`,
388388
'Content-Type': 'application/json'
389389
},
390-
body: JSON.stringify({ url })
390+
body: JSON.stringify({ urls: [url] })
391391
});
392392

393393
if (!response.ok) {
@@ -399,7 +399,21 @@ async function executeYoutubeTranscribe(
399399

400400
let resultText = `YouTube transcript for: ${url}\n\n`;
401401

402-
if (data.transcript) {
402+
// Handle new API response format (array of transcripts)
403+
if (data.transcripts && Array.isArray(data.transcripts) && data.transcripts.length > 0) {
404+
const result = data.transcripts[0];
405+
if (result.success && result.transcript) {
406+
const transcript = result.transcript.slice(0, 8000);
407+
resultText += transcript;
408+
if (result.transcript.length > 8000) {
409+
resultText += `\n... (transcript truncated, ${result.transcript.length} chars total)`;
410+
}
411+
} else {
412+
return { success: false, result: '', error: result.error || 'Failed to retrieve transcript' };
413+
}
414+
}
415+
// Fallback for potential legacy/singular response format
416+
else if (data.transcript) {
403417
const transcript = data.transcript.slice(0, 8000);
404418
resultText += transcript;
405419
if (data.transcript.length > 8000) {
@@ -411,6 +425,8 @@ async function executeYoutubeTranscribe(
411425
if (data.text.length > 8000) {
412426
resultText += `\n... (transcript truncated, ${data.text.length} chars total)`;
413427
}
428+
} else {
429+
return { success: false, result: '', error: 'No transcript found in response' };
414430
}
415431

416432
return { success: true, result: resultText };

src/routes/account/+page.svelte

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -654,10 +654,11 @@
654654

655655
<div class="flex flex-col gap-2">
656656
<div class="flex justify-between">
657-
<label class="text-sm leading-none font-medium">Speed</label>
657+
<label for="tts-speed" class="text-sm leading-none font-medium">Speed</label>
658658
<span class="text-muted-foreground text-xs">{localSettings.ttsSpeed}x</span>
659659
</div>
660660
<input
661+
id="tts-speed"
661662
type="range"
662663
min="0.25"
663664
max="4.0"

src/routes/api/generate-message/+server.ts

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -908,7 +908,42 @@ ${attachedRules.map((r) => `- ${r.name}: ${r.rule}`).join('\n')}`;
908908
if (!tc.id || !tc.function.name) continue;
909909

910910
try {
911-
const args = JSON.parse(tc.function.arguments || '{}');
911+
let args = JSON.parse(tc.function.arguments || '{}');
912+
913+
// Special handling for vision tool: inject image from context if missing
914+
if (tc.function.name === 'nanogpt_vision') {
915+
const imgUrl = args.image_url as string | undefined;
916+
// If no URL provided, or it's not a valid URL/data-uri, try to find one in context
917+
if (!imgUrl || (!imgUrl.startsWith('http') && !imgUrl.startsWith('data:'))) {
918+
log('Background: Vision tool called without valid URL, searching context for images', startTime);
919+
920+
// Search backwards for the most recent image
921+
let foundImage: string | null = null;
922+
// Iterate formattedMessages in reverse
923+
for (let i = formattedMessages.length - 1; i >= 0; i--) {
924+
const msg = formattedMessages[i];
925+
if (!msg) continue;
926+
if (typeof msg.content !== 'string' && Array.isArray(msg.content)) {
927+
for (const part of msg.content) {
928+
if (part && typeof part === 'object' && 'type' in part && part.type === 'image_url' && 'image_url' in part) {
929+
// @ts-ignore - TS might not infer the type perfectly here
930+
foundImage = part.image_url.url;
931+
break;
932+
}
933+
}
934+
}
935+
if (foundImage) break;
936+
}
937+
938+
if (foundImage) {
939+
log('Background: Injected image from context into vision tool', startTime);
940+
args.image_url = foundImage;
941+
} else {
942+
log('Background: No image found in context for vision tool', startTime);
943+
}
944+
}
945+
}
946+
912947
log(`Background: Executing tool ${tc.function.name}`, startTime);
913948

914949
const result = await executeMcpTool(tc.function.name, args, apiKey);

0 commit comments

Comments
 (0)