Skip to content

Commit 9a8609f

Browse files
authored
Multimodal Support: Image and Screenshot Handling for Vision Models (#24)
* fix: enhance OpenAI chat message structure to support multimodal content * test: add integration and unit tests for multimodal support in OpenRouter API * chore: update version to 0.4.2 and add multimodal support improvements in changelog
1 parent 914b3c0 commit 9a8609f

File tree

16 files changed

+1404
-38
lines changed

16 files changed

+1404
-38
lines changed

CHANGELOG.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,14 @@ All notable changes to the OpenRouter IntelliJ Plugin will be documented in this
55
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
66
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
77

8+
## [0.4.2] - 2026-01-20
9+
10+
### Improvements
11+
12+
- **Multimodal Support** - Added support for image, audio, and video content with AI models
13+
- **Test Coverage** - Added automated integration tests for multimodal capabilities (images, audio, video)
14+
- **Test Infrastructure** - Added TestMediaGenerator utility for automated test media generation
15+
816
## [0.4.1] - 2025-12-23
917

1018
### Bug Fixes

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# OpenRouter IntelliJ Plugin
22

33
[![JetBrains Plugin](https://img.shields.io/badge/JetBrains-Plugin-orange.svg)](https://plugins.jetbrains.com/plugin/28520)
4-
[![Version](https://img.shields.io/badge/version-0.4.1-blue.svg)](https://github.com/DimazzzZ/openrouter-intellij-plugin/releases)
4+
[![Version](https://img.shields.io/badge/version-0.4.2-blue.svg)](https://github.com/DimazzzZ/openrouter-intellij-plugin/releases)
55
[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
66

77
An IntelliJ IDEA plugin for integrating with [OpenRouter.ai](https://openrouter.ai), providing access to 400+ AI models with usage monitoring, quota tracking, and seamless JetBrains AI Assistant integration.

gradle.properties

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
pluginGroup = org.zhavoronkov
44
pluginName = OpenRouter
55
pluginRepositoryUrl = https://github.com/DimazzzZ/openrouter-intellij-plugin
6-
pluginVersion = 0.4.1
6+
pluginVersion = 0.4.2
77

88
# Plugin metadata
99
pluginId = org.zhavoronkov.openrouter

src/main/kotlin/org/zhavoronkov/openrouter/models/OpenRouterModels.kt

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -238,9 +238,15 @@ data class ChatCompletionRequest(
238238
val stream: Boolean? = false
239239
)
240240

241+
/**
242+
* Chat message that supports both text-only and multimodal content
243+
* Content can be:
244+
* - A simple string for text-only messages
245+
* - An array of content parts for multimodal messages (text + images/files)
246+
*/
241247
data class ChatMessage(
242248
val role: String, // "system", "user", "assistant"
243-
val content: String,
249+
val content: com.google.gson.JsonElement, // Can be String or Array of content parts
244250
val name: String? = null
245251
)
246252

src/main/kotlin/org/zhavoronkov/openrouter/proxy/models/OpenAIModels.kt

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package org.zhavoronkov.openrouter.proxy.models
22

3+
import com.google.gson.JsonElement
34
import com.google.gson.annotations.SerializedName
45

56
/**
@@ -21,12 +22,45 @@ data class OpenAIChatCompletionRequest(
2122
val user: String? = null
2223
)
2324

25+
/**
26+
* OpenAI chat message that supports both text-only and multimodal content
27+
* Content can be:
28+
* - A simple string for text-only messages
29+
* - An array of content parts for multimodal messages (text + images/files)
30+
*/
2431
data class OpenAIChatMessage(
2532
val role: String, // "system", "user", "assistant"
26-
val content: String,
33+
val content: JsonElement, // Can be String or Array of ContentPart
2734
val name: String? = null
2835
)
2936

37+
/**
38+
* Content part for multimodal messages
39+
* Used when content is an array
40+
*/
41+
data class OpenAIContentPart(
42+
val type: String, // "text", "image_url", "input_audio", etc.
43+
val text: String? = null, // For type="text"
44+
@SerializedName("image_url") val imageUrl: OpenAIImageUrl? = null, // For type="image_url"
45+
@SerializedName("input_audio") val inputAudio: OpenAIInputAudio? = null // For type="input_audio"
46+
)
47+
48+
/**
49+
* Image URL content for vision models
50+
*/
51+
data class OpenAIImageUrl(
52+
val url: String, // URL or base64 data URI
53+
val detail: String? = null // "auto", "low", "high"
54+
)
55+
56+
/**
57+
* Audio input content for audio models
58+
*/
59+
data class OpenAIInputAudio(
60+
val data: String, // Base64 encoded audio data
61+
val format: String // "wav", "mp3", etc.
62+
)
63+
3064
// Chat Completion Response Models
3165
data class OpenAIChatCompletionResponse(
3266
val id: String,

src/main/kotlin/org/zhavoronkov/openrouter/proxy/translation/RequestTranslator.kt

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,9 @@ object RequestTranslator {
7878
// Basic validation
7979
request.model.isNotBlank() &&
8080
request.messages.isNotEmpty() &&
81-
request.messages.all { it.role.isNotBlank() && it.content.isNotBlank() } &&
81+
request.messages.all { message ->
82+
message.role.isNotBlank() && isValidContent(message.content)
83+
} &&
8284
(request.temperature == null || request.temperature in 0.0..2.0) &&
8385
(request.maxTokens == null || request.maxTokens > 0) &&
8486
(request.topP == null || request.topP in 0.0..1.0)
@@ -91,6 +93,19 @@ object RequestTranslator {
9193
}
9294
}
9395

96+
/**
97+
* Validates that content is either a non-blank string or a non-empty array
98+
*/
99+
private fun isValidContent(content: com.google.gson.JsonElement): Boolean {
100+
return when {
101+
content.isJsonPrimitive && content.asJsonPrimitive.isString ->
102+
content.asString.isNotBlank()
103+
content.isJsonArray ->
104+
content.asJsonArray.size() > 0
105+
else -> false
106+
}
107+
}
108+
94109
/**
95110
* Gets available model mappings for documentation/debugging
96111
*/

src/main/kotlin/org/zhavoronkov/openrouter/proxy/translation/ResponseTranslator.kt

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package org.zhavoronkov.openrouter.proxy.translation
22

3+
import com.google.gson.JsonPrimitive
34
import org.zhavoronkov.openrouter.models.ChatCompletionResponse
45
import org.zhavoronkov.openrouter.models.ProvidersResponse
56
import org.zhavoronkov.openrouter.proxy.models.OpenAIChatChoice
@@ -43,7 +44,7 @@ object ResponseTranslator {
4344
index = index,
4445
message = OpenAIChatMessage(
4546
role = choice.message?.role ?: "assistant",
46-
content = choice.message?.content ?: ""
47+
content = choice.message?.content ?: JsonPrimitive("")
4748
),
4849
finishReason = choice.finishReason
4950
)
@@ -192,7 +193,9 @@ object ResponseTranslator {
192193
response.model.isNotBlank() &&
193194
response.choices.isNotEmpty() &&
194195
response.choices.all { choice ->
195-
choice.message.role.isNotBlank() && choice.message.content.isNotBlank()
196+
choice.message.role.isNotBlank() &&
197+
(choice.message.content.isJsonPrimitive &&
198+
choice.message.content.asString.isNotBlank())
196199
}
197200
} catch (e: NullPointerException) {
198201
PluginLogger.Service.error("Response validation failed: null value encountered", e)

src/main/kotlin/org/zhavoronkov/openrouter/startup/WhatsNewNotificationActivity.kt

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ import java.io.IOException
2626
class WhatsNewNotificationActivity : ProjectActivity {
2727

2828
companion object {
29-
private const val CURRENT_VERSION = "0.4.1"
29+
private const val CURRENT_VERSION = "0.4.2"
3030
private const val CHANGELOG_URL =
3131
"https://github.com/DimazzzZ/openrouter-intellij-plugin/blob/main/CHANGELOG.md"
3232
}
@@ -68,16 +68,9 @@ class WhatsNewNotificationActivity : ProjectActivity {
6868
.createNotification(
6969
"OpenRouter Plugin Updated to v$CURRENT_VERSION",
7070
"""
71-
<b>🔧 Bug Fixes:</b><br/>
72-
• <b>Fixed AI Assistant Streaming</b> - SSE format now complies with specification<br/>
73-
• <b>Stream Termination</b> - All streams properly end with [DONE] marker<br/>
74-
• <b>Error Handling</b> - Fixed error response SSE format<br/>
75-
<br/>
76-
<b>📝 Logging Improvements:</b><br/>
77-
• 78% reduction in log noise • Request duration metrics • Standardized request IDs<br/>
78-
<br/>
79-
<b>🧪 Testing:</b><br/>
80-
• 11 new SSE format compliance tests • Regression tests for streaming bug
71+
<b>Improvements:</b><br/>
72+
• <b>Multimodal Support</b> - Added support for image, audio, and video content with AI models<br/>
73+
• <b>Test Coverage</b> - Added automated tests for multimodal capabilities
8174
""".trimIndent(),
8275
NotificationType.INFORMATION
8376
)

src/main/resources/META-INF/plugin.xml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,15 @@
6464

6565
<!-- Change notes for this version -->
6666
<change-notes><![CDATA[
67+
<h3>Version 0.4.2 - Multimodal Support (2026-01-20)</h3>
68+
<p><strong>Improvements:</strong></p>
69+
<ul>
70+
<li><strong>Multimodal Support</strong> - Added support for image, audio, and video content with AI models</li>
71+
<li><strong>Test Coverage</strong> - Added automated tests for multimodal capabilities</li>
72+
</ul>
73+
74+
<hr/>
75+
6776
<h3>🔧 Version 0.4.1 - SSE Streaming Fix & Logging Improvements (2025-12-23)</h3>
6877
<p><strong>🐛 Bug Fixes:</strong></p>
6978
<ul>

0 commit comments

Comments
 (0)