Skip to content

Commit cd729ce

Browse files
fix: detect Ollama vision capability from capabilities array
Newer Ollama versions (v0.6.4+) report multimodal support via a top-level `capabilities` array (e.g. ["vision", "tools"]) rather than via model_info keys. The old code only checked model_info, so models like Gemma 4 were always detected as non-vision. Now checks capabilities array first, falls back to model_info key scan, then projector_info keys. Also wires supportsToolCalling from the capabilities array. Co-Authored-By: Dishit Karia <hanmadishit74@gmail.com>
1 parent f8c5719 commit cd729ce

1 file changed

Lines changed: 17 additions & 2 deletions

File tree

src/stores/remoteModelCapabilities.ts

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,10 +43,25 @@ function extractOllamaCapabilities(data: Record<string, unknown>): RemoteModelIn
4343
let contextLength = 4096;
4444
let supportsVision = false;
4545

46+
// Newer Ollama versions expose a top-level `capabilities` array (e.g. ["vision", "tools"]).
47+
// Gemma 4 and similar models use this field instead of model_info keys.
48+
let supportsToolCalling: boolean | undefined;
49+
if (Array.isArray(data.capabilities)) {
50+
const caps = data.capabilities as unknown[];
51+
supportsVision = caps.includes('vision');
52+
supportsToolCalling = caps.includes('tools');
53+
}
54+
4655
if (data.model_info && typeof data.model_info === 'object') {
4756
const parsed = parseModelInfoKeys(data.model_info as Record<string, unknown>);
4857
if (parsed.contextLength > 0) contextLength = parsed.contextLength;
49-
supportsVision = parsed.supportsVision;
58+
if (!supportsVision) supportsVision = parsed.supportsVision;
59+
}
60+
61+
// projector_info is present for multimodal models when capabilities array is missing.
62+
if (!supportsVision && data.projector_info && typeof data.projector_info === 'object') {
63+
const projectorKeys = Object.keys(data.projector_info as Record<string, unknown>);
64+
supportsVision = projectorKeys.some(k => k.includes('vision') || k.includes('clip'));
5065
}
5166

5267
if (contextLength === 4096 && typeof data.parameters === 'string') {
@@ -63,7 +78,7 @@ function extractOllamaCapabilities(data: Record<string, unknown>): RemoteModelIn
6378
/\.Think|\.Thinking|\.IsThinkSet/.test(template) ||
6479
/^RENDERER\s/m.test(modelfile);
6580

66-
return { contextLength, supportsVision, supportsThinking };
81+
return { contextLength, supportsVision, supportsToolCalling, supportsThinking };
6782
}
6883

6984
/**

0 commit comments

Comments
 (0)