Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 87 additions & 6 deletions src/providers/copilot.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ struct ApiChatRequest<'a> {
struct ApiMessage {
role: String,
#[serde(skip_serializing_if = "Option::is_none")]
content: Option<String>,
content: Option<ApiContent>,
#[serde(skip_serializing_if = "Option::is_none")]
tool_call_id: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
Expand Down Expand Up @@ -131,6 +131,28 @@ struct NativeFunctionCall {
arguments: String,
}

/// Multi-part content for vision messages (OpenAI format).
#[derive(Debug, Clone, Serialize)]
#[serde(untagged)]
enum ApiContent {
Text(String),
Parts(Vec<ContentPart>),
}

#[derive(Debug, Clone, Serialize)]
#[serde(tag = "type")]
enum ContentPart {
#[serde(rename = "text")]
Text { text: String },
#[serde(rename = "image_url")]
ImageUrl { image_url: ImageUrlDetail },
}

#[derive(Debug, Clone, Serialize)]
struct ImageUrlDetail {
url: String,
}

#[derive(Debug, Deserialize)]
struct ApiChatResponse {
choices: Vec<Choice>,
Expand Down Expand Up @@ -245,6 +267,34 @@ impl CopilotProvider {
})
}

/// Convert message content to API format, with multi-part support for
/// user messages containing `[IMAGE:...]` markers.
fn to_api_content(role: &str, content: &str) -> Option<ApiContent> {
if role != "user" {
return Some(ApiContent::Text(content.to_string()));
}

let (cleaned_text, image_refs) = crate::multimodal::parse_image_markers(content);
if image_refs.is_empty() {
return Some(ApiContent::Text(content.to_string()));
}

let mut parts = Vec::with_capacity(image_refs.len() + 1);
let trimmed = cleaned_text.trim();
if !trimmed.is_empty() {
parts.push(ContentPart::Text {
text: trimmed.to_string(),
});
}
for image_ref in image_refs {
parts.push(ContentPart::ImageUrl {
image_url: ImageUrlDetail { url: image_ref },
});
}

Some(ApiContent::Parts(parts))
Comment on lines +277 to +295
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Preserve text/image ordering when building multipart content.

parse_image_markers() gives you one cleaned text string plus the collected refs, so this always serializes as text?, image1, image2, ... regardless of where each marker originally appeared. That changes the prompt semantics for inputs like compare [IMAGE:a] and [IMAGE:b] or any interleaved text/image sequence. Please build ContentParts in source order here, or extend src/multimodal.rs to return ordered segments, and add a regression test for that case.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@src/providers/copilot.rs` around lines 277 - 295, The current code uses
parse_image_markers(content) which returns a cleaned text plus a list of image
refs and then always emits parts as "text? followed by all images", losing the
original interleaving; update the logic so ApiContent::Parts contains
ContentPart entries in the same source order as the original content. Either
modify parse_image_markers to return ordered segments (e.g., an enum
Segment::Text(String) | Segment::Image(String)) and iterate those to build
ContentPart::Text / ContentPart::ImageUrl (ImageUrlDetail) in order, or keep
parse_image_markers but also return the positions of markers and rebuild parts
by scanning the original content to interleave text and images; ensure you
create ContentPart::Text only for non-empty text segments and ImageUrlDetail {
url: ... } for image refs so ApiContent::Parts preserves original ordering (and
add a regression test for an interleaved case like "compare [IMAGE:a] and
[IMAGE:b]").

}

fn convert_messages(messages: &[ChatMessage]) -> Vec<ApiMessage> {
messages
.iter()
Expand All @@ -270,7 +320,7 @@ impl CopilotProvider {
let content = value
.get("content")
.and_then(serde_json::Value::as_str)
.map(ToString::to_string);
.map(|s| ApiContent::Text(s.to_string()));

return ApiMessage {
role: "assistant".to_string(),
Expand All @@ -292,7 +342,7 @@ impl CopilotProvider {
let content = value
.get("content")
.and_then(serde_json::Value::as_str)
.map(ToString::to_string);
.map(|s| ApiContent::Text(s.to_string()));

return ApiMessage {
role: "tool".to_string(),
Expand All @@ -305,7 +355,7 @@ impl CopilotProvider {

ApiMessage {
role: message.role.clone(),
content: Some(message.content.clone()),
content: Self::to_api_content(&message.role, &message.content),
tool_call_id: None,
tool_calls: None,
}
Expand Down Expand Up @@ -609,14 +659,14 @@ impl Provider for CopilotProvider {
if let Some(system) = system_prompt {
messages.push(ApiMessage {
role: "system".to_string(),
content: Some(system.to_string()),
content: Some(ApiContent::Text(system.to_string())),
tool_call_id: None,
tool_calls: None,
});
}
messages.push(ApiMessage {
role: "user".to_string(),
content: Some(message.to_string()),
content: Self::to_api_content("user", message),
tool_call_id: None,
tool_calls: None,
});
Expand Down Expand Up @@ -735,4 +785,35 @@ mod tests {
let resp: ApiChatResponse = serde_json::from_str(json).unwrap();
assert!(resp.usage.is_none());
}

#[test]
fn to_api_content_user_with_image_returns_parts() {
let content = "describe this [IMAGE:data:image/png;base64,abc123]";
let result = CopilotProvider::to_api_content("user", content).unwrap();
match result {
ApiContent::Parts(parts) => {
assert_eq!(parts.len(), 2);
assert!(matches!(&parts[0], ContentPart::Text { text } if text == "describe this"));
assert!(
matches!(&parts[1], ContentPart::ImageUrl { image_url } if image_url.url == "data:image/png;base64,abc123")
);
}
_ => panic!("expected ApiContent::Parts for user message with image marker"),
}
}

#[test]
fn to_api_content_user_plain_returns_text() {
let result = CopilotProvider::to_api_content("user", "hello world").unwrap();
assert!(matches!(result, ApiContent::Text(ref s) if s == "hello world"));
}

#[test]
fn to_api_content_non_user_returns_text() {
let result = CopilotProvider::to_api_content("system", "you are helpful").unwrap();
assert!(matches!(result, ApiContent::Text(ref s) if s == "you are helpful"));

let result = CopilotProvider::to_api_content("assistant", "sure").unwrap();
assert!(matches!(result, ApiContent::Text(ref s) if s == "sure"));
}
}