diff --git a/src-tauri/src/provider.rs b/src-tauri/src/provider.rs index 648575de06..8b8bef669c 100644 --- a/src-tauri/src/provider.rs +++ b/src-tauri/src/provider.rs @@ -396,6 +396,9 @@ pub struct ProviderMeta { skip_serializing_if = "HashMap::is_empty" )] pub claude_desktop_model_routes: HashMap, + /// 图片处理模型:仅供本地代理识别图片内容,不写入 live 配置。 + #[serde(rename = "imageModel", skip_serializing_if = "Option::is_none")] + pub image_model: Option, /// 用量查询脚本配置 #[serde(skip_serializing_if = "Option::is_none")] pub usage_script: Option, @@ -955,6 +958,21 @@ mod tests { assert!(value.get("pricingModelSource").is_none()); } + #[test] + fn provider_meta_serializes_image_model() { + let meta: ProviderMeta = + serde_json::from_value(json!({ "imageModel": "glm-5.1" })).expect("deserialize meta"); + + assert_eq!(meta.image_model.as_deref(), Some("glm-5.1")); + + let value = serde_json::to_value(&meta).expect("serialize ProviderMeta"); + assert_eq!( + value.get("imageModel").and_then(|item| item.as_str()), + Some("glm-5.1") + ); + assert!(value.get("image_model").is_none()); + } + #[test] fn provider_with_id_populates_defaults() { let settings_config = json!({ diff --git a/src-tauri/src/proxy/forwarder.rs b/src-tauri/src/proxy/forwarder.rs index e83e2f68ed..28dbd034a0 100644 --- a/src-tauri/src/proxy/forwarder.rs +++ b/src-tauri/src/proxy/forwarder.rs @@ -53,6 +53,23 @@ pub struct ForwardError { pub provider: Option, } +#[derive(Debug, Clone, Copy)] +struct ForwardOptions { + apply_model_mapping: bool, + image_context: bool, + media_prevention: bool, +} + +impl Default for ForwardOptions { + fn default() -> Self { + Self { + apply_model_mapping: true, + image_context: true, + media_prevention: true, + } + } +} + /// 活跃连接 RAII guard /// /// 构造时把 `ProxyStatus.active_connections` +1;Drop 时在 tokio runtime 上调度 @@ -465,6 +482,7 @@ impl RequestForwarder { &headers, &extensions, adapter.as_ref(), + ForwardOptions::default(), ) .await { @@ -564,6 +582,7 @@ impl RequestForwarder { &headers, &extensions, adapter.as_ref(), + ForwardOptions::default(), ) .await { @@ -710,6 +729,7 @@ impl RequestForwarder { &headers, &extensions, adapter.as_ref(), + ForwardOptions::default(), ) .await { @@ -876,6 +896,7 @@ impl RequestForwarder { &headers, &extensions, adapter.as_ref(), + ForwardOptions::default(), ) .await { @@ -1100,6 +1121,7 @@ impl RequestForwarder { headers: &axum::http::HeaderMap, extensions: &Extensions, adapter: &dyn ProviderAdapter, + options: ForwardOptions, ) -> Result<(ProxyResponse, Option, Option), ProxyError> { // 使用适配器提取 base_url let mut base_url = adapter.extract_base_url(provider)?; @@ -1121,7 +1143,9 @@ impl RequestForwarder { // 应用模型映射(独立于格式转换) // Claude Desktop proxy 模式必须先把 Desktop 可见的 claude-* route // 映射成真实上游模型名,并且未知 route 要直接报错,不能使用默认模型兜底。 - let mapped_body = if matches!(app_type, AppType::ClaudeDesktop) { + let mapped_body = if !options.apply_model_mapping { + body.clone() + } else if matches!(app_type, AppType::ClaudeDesktop) { crate::claude_desktop_config::map_proxy_request_model(body.clone(), provider) .map_err(|e| ProxyError::InvalidRequest(e.to_string()))? } else { @@ -1289,7 +1313,22 @@ impl RequestForwarder { provider, api_format, ); - self.apply_media_prevention(&mut mapped_body, provider); + if options.image_context { + self.apply_image_context_if_configured( + app_type, + method, + provider, + endpoint, + &mut mapped_body, + headers, + extensions, + adapter, + ) + .await?; + } + if options.media_prevention { + self.apply_media_prevention(&mut mapped_body, provider); + } } } let needs_transform = match resolved_claude_api_format.as_deref() { @@ -2041,6 +2080,89 @@ impl RequestForwarder { "openai_chat".to_string() } + #[allow(clippy::too_many_arguments)] + async fn apply_image_context_if_configured( + &self, + app_type: &AppType, + method: &http::Method, + provider: &Provider, + endpoint: &str, + mapped_body: &mut Value, + headers: &axum::http::HeaderMap, + extensions: &Extensions, + adapter: &dyn ProviderAdapter, + ) -> Result<(), ProxyError> { + if !super::image_context::contains_image_blocks(mapped_body) { + return Ok(()); + } + + let image_count = super::image_context::count_image_blocks(mapped_body); + let Some(image_model) = super::image_context::image_model_from_provider(provider) else { + log::info!( + "[ImageContext] 检测到图片但未配置图片处理模型: provider={} images={image_count}", + provider.id + ); + return Ok(()); + }; + + let main_model = mapped_body + .get("model") + .and_then(Value::as_str) + .unwrap_or(""); + log::info!( + "[ImageContext] 准备图片识别: app={:?} provider={} main_model={} image_model={} images={image_count}", + app_type, + provider.id, + main_model, + image_model + ); + + let analysis_request = + super::image_context::create_image_analysis_request(mapped_body, &image_model); + let image_forward = self.forward( + app_type, + method, + provider, + endpoint, + &analysis_request, + headers, + extensions, + adapter, + ForwardOptions { + apply_model_mapping: false, + image_context: false, + media_prevention: false, + }, + ); + let (response, _, _) = Box::pin(image_forward) + .await + .map_err(|error| ProxyError::ForwardFailed(format!("图片识别失败: {error}")))?; + let body_bytes = response.bytes().await?; + let response_json: Value = serde_json::from_slice(&body_bytes).map_err(|err| { + ProxyError::ForwardFailed(format!("图片识别失败: 响应不是有效 JSON: {err}")) + })?; + let text = super::image_context::extract_text_from_response(&response_json); + let text = text.trim(); + if text.is_empty() { + return Err(ProxyError::ForwardFailed( + "图片识别失败: 图片处理模型未返回文本描述".to_string(), + )); + } + + let analysis = super::image_context::parse_image_analysis_response(text, image_count); + if let Some(messages) = mapped_body.get("messages").cloned() { + mapped_body["messages"] = + super::image_context::inject_image_context(&messages, &analysis); + } + log::info!( + "[ImageContext] 图片识别完成: provider={} image_model={} images={image_count}", + provider.id, + image_model + ); + + Ok(()) + } + /// 用 Copilot live `/models` 列表确认 model ID 真实可用,找不到时按 family 降级。 /// 命中缓存后是同步的;首次请求或 5 min 缓存过期后会触发一次 HTTP。 async fn apply_copilot_live_model_resolution( diff --git a/src-tauri/src/proxy/image_context.rs b/src-tauri/src/proxy/image_context.rs new file mode 100644 index 0000000000..ea39b24cea --- /dev/null +++ b/src-tauri/src/proxy/image_context.rs @@ -0,0 +1,767 @@ +use regex::Regex; +use serde_json::{json, Map, Value}; +use std::collections::BTreeMap; + +const IMAGE_ANALYSIS_PROMPT: &str = "You are an image content extractor. Do not answer the user's final question.\n\ +Use the original order of the user's text and images to extract key details from each image that are relevant to the user's request.\n\ +If there are multiple images, summarize their obvious relationships, differences, or comparable points.\n\ +Output exactly in this structure:\n\ +Image 1:\n\ +
\n\n\ +Image 2:\n\ +
\n\n\ +Cross-image relationship:\n\ +\n\n\ +Only output image details and cross-image relationship context. Do not answer the user's final question."; + +const IMAGE_MIME_TYPES: &[&str] = &[ + "image/png", + "image/jpeg", + "image/jpg", + "image/gif", + "image/webp", + "image/svg+xml", +]; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct ImageAnalysis { + pub images: BTreeMap, + pub relation: Option, + pub raw_text: String, +} + +pub fn image_model_from_provider(provider: &crate::provider::Provider) -> Option { + provider + .meta + .as_ref() + .and_then(|meta| meta.image_model.as_deref()) + .map(str::trim) + .filter(|model| !model.is_empty()) + .map(ToString::to_string) +} + +pub fn contains_image_blocks(body: &Value) -> bool { + count_image_blocks(body) > 0 +} + +pub fn count_image_blocks(body: &Value) -> usize { + body.get("messages") + .and_then(Value::as_array) + .map(|messages| { + messages + .iter() + .filter_map(|message| message.get("content")) + .map(count_images_in_content) + .sum() + }) + .unwrap_or(0) +} + +pub fn create_image_analysis_request(original_body: &Value, image_model: &str) -> Value { + let mut request = copy_request_options(original_body); + request.insert("model".to_string(), Value::String(image_model.to_string())); + request.insert( + "max_tokens".to_string(), + json!(normalize_max_tokens(original_body.get("max_tokens"))), + ); + request.insert( + "messages".to_string(), + json!([{ + "role": "user", + "content": build_ordered_analysis_content(original_body.get("messages")), + }]), + ); + Value::Object(request) +} + +pub fn parse_image_analysis_response(text: &str, image_count: usize) -> ImageAnalysis { + let raw_text = text.trim().to_string(); + let mut images = BTreeMap::new(); + let mut relation = None; + if raw_text.is_empty() { + return ImageAnalysis { + images, + relation, + raw_text, + }; + } + + let header = Regex::new( + r"(?i)^\s*((?:image|图片)\s*(\d+)|cross-image relationship|multi-image relationship|image relationship|多图关系)\s*[::]\s*(.*)$", + ) + .expect("valid image section regex"); + let mut current: Option
= None; + + for line in raw_text.lines() { + if let Some(captures) = header.captures(line) { + store_section(current.take(), &mut images, &mut relation); + let label = captures + .get(1) + .map(|m| { + m.as_str() + .chars() + .filter(|c| !c.is_whitespace()) + .collect::() + }) + .unwrap_or_default(); + let first_line = captures + .get(3) + .map(|m| m.as_str().trim().to_string()) + .unwrap_or_default(); + let label_lower = label.to_ascii_lowercase(); + let kind = if label == "多图关系" || label_lower.contains("relationship") { + SectionKind::Relation + } else { + let index = captures + .get(2) + .and_then(|m| m.as_str().parse::().ok()) + .unwrap_or(0); + SectionKind::Image(index) + }; + current = Some(Section { + kind, + lines: if first_line.is_empty() { + Vec::new() + } else { + vec![first_line] + }, + }); + continue; + } + + if let Some(section) = current.as_mut() { + section.lines.push(line.to_string()); + } + } + + store_section(current.take(), &mut images, &mut relation); + + if images.is_empty() && image_count > 0 { + for index in 1..=image_count { + images.insert(index, raw_text.clone()); + } + } + + ImageAnalysis { + images, + relation, + raw_text, + } +} + +pub fn inject_image_context(messages: &Value, analysis: &ImageAnalysis) -> Value { + let Some(message_items) = messages.as_array() else { + return json!([]); + }; + + let mut output = Vec::with_capacity(message_items.len()); + let mut image_index = 0usize; + let mut last_user_with_images = None; + let mut last_user = None; + + for message in message_items { + let mut next_message = message.clone(); + if next_message.get("role").and_then(Value::as_str) == Some("user") { + last_user = Some(output.len()); + } + + let mut replaced = false; + if let Some(content) = next_message.get_mut("content") { + if content.is_array() { + *content = + inject_content_blocks(content, analysis, &mut image_index, &mut replaced); + } + } + + if replaced && next_message.get("role").and_then(Value::as_str) == Some("user") { + last_user_with_images = Some(output.len()); + } + output.push(next_message); + } + + if let Some(relation) = analysis + .relation + .as_deref() + .map(str::trim) + .filter(|s| !s.is_empty()) + { + let target_index = last_user_with_images.or(last_user); + let relation_text = format_relation_context(relation); + if let Some(index) = target_index { + append_text_block(&mut output[index], relation_text); + } else { + output.push(json!({ + "role": "user", + "content": relation_text, + })); + } + } + + Value::Array(output) +} + +pub fn extract_text_from_response(data: &Value) -> String { + if let Some(content) = data.get("content").and_then(Value::as_array) { + let text = content + .iter() + .filter_map(|block| block.get("text").and_then(Value::as_str)) + .filter(|text| !text.is_empty()) + .collect::>() + .join("\n"); + if !text.is_empty() { + return text; + } + } + + if let Some(content) = data + .get("choices") + .and_then(Value::as_array) + .and_then(|choices| choices.first()) + .and_then(|first| first.get("message")) + .and_then(|message| message.get("content")) + .and_then(Value::as_str) + { + return content.to_string(); + } + + if let Some(output_text) = data.get("output_text").and_then(Value::as_str) { + return output_text.to_string(); + } + + if let Some(output) = data.get("output").and_then(Value::as_array) { + let text = output + .iter() + .filter_map(|item| item.get("content").and_then(Value::as_array)) + .flat_map(|content| content.iter()) + .filter_map(|block| { + block + .get("text") + .or_else(|| block.get("output_text")) + .and_then(Value::as_str) + }) + .filter(|text| !text.is_empty()) + .collect::>() + .join("\n"); + if !text.is_empty() { + return text; + } + } + + if let Some(candidates) = data.get("candidates").and_then(Value::as_array) { + let text = candidates + .iter() + .filter_map(|candidate| { + candidate + .get("content") + .and_then(|content| content.get("parts")) + .and_then(Value::as_array) + }) + .flat_map(|parts| parts.iter()) + .filter_map(|part| part.get("text").and_then(Value::as_str)) + .filter(|text| !text.is_empty()) + .collect::>() + .join("\n"); + if !text.is_empty() { + return text; + } + } + + data.get("text") + .and_then(Value::as_str) + .unwrap_or("") + .to_string() +} + +fn build_ordered_analysis_content(messages: Option<&Value>) -> Vec { + let Some(messages) = messages.and_then(Value::as_array) else { + return Vec::new(); + }; + let mut content = vec![json!({ "type": "text", "text": IMAGE_ANALYSIS_PROMPT })]; + let mut image_index = 0usize; + + for message in messages { + let role = message + .get("role") + .and_then(Value::as_str) + .unwrap_or("unknown"); + match message.get("content") { + Some(Value::String(text)) if !text.trim().is_empty() => { + content.push(json!({ + "type": "text", + "text": format!("[{role} text]\n{text}"), + })); + } + Some(Value::Array(blocks)) => { + build_ordered_blocks(blocks, role, &mut image_index, &mut content); + } + _ => {} + } + } + + content +} + +fn build_ordered_blocks( + blocks: &[Value], + role: &str, + image_index: &mut usize, + content: &mut Vec, +) { + for block in blocks { + if is_image_block(block) { + *image_index += 1; + content.push(json!({ "type": "text", "text": format!("Image {}:", *image_index) })); + content.push(normalize_image_block_for_analysis(block)); + continue; + } + + if block.get("type").and_then(Value::as_str) == Some("text") { + if let Some(text) = block.get("text").and_then(Value::as_str) { + if !text.trim().is_empty() { + content.push(json!({ + "type": "text", + "text": format!("[{role} text]\n{text}"), + })); + } + } + continue; + } + + if let Some(nested) = block.get("content").and_then(Value::as_array) { + build_ordered_blocks(nested, role, image_index, content); + } + } +} + +fn inject_content_blocks( + content: &Value, + analysis: &ImageAnalysis, + image_index: &mut usize, + replaced: &mut bool, +) -> Value { + let Some(blocks) = content.as_array() else { + return content.clone(); + }; + + let mut output = Vec::with_capacity(blocks.len()); + for block in blocks { + if is_image_block(block) { + *image_index += 1; + *replaced = true; + let image_text = analysis + .images + .get(image_index) + .map(String::as_str) + .unwrap_or(analysis.raw_text.as_str()) + .trim(); + if !image_text.is_empty() { + output.push(json!({ + "type": "text", + "text": format_image_context(*image_index, image_text), + })); + } + continue; + } + + let mut next = block.clone(); + if let Some(nested) = next.get_mut("content") { + if nested.is_array() { + *nested = inject_content_blocks(nested, analysis, image_index, replaced); + } + } + output.push(next); + } + + Value::Array(output) +} + +fn count_images_in_content(content: &Value) -> usize { + let Some(blocks) = content.as_array() else { + return 0; + }; + + blocks + .iter() + .map(|block| { + if is_image_block(block) { + 1 + } else { + block + .get("content") + .map(count_images_in_content) + .unwrap_or(0) + } + }) + .sum() +} + +fn is_image_block(block: &Value) -> bool { + if image_type_is_image_like(block.get("type").and_then(Value::as_str)) { + return true; + } + + if image_mime_from_value(block).is_some() || image_url_from_value(block).is_some() { + return true; + } + + ["source", "file", "image", "input_image"] + .into_iter() + .filter_map(|key| block.get(key)) + .any(|value| { + image_mime_from_value(value).is_some() || image_url_from_value(value).is_some() + }) +} + +fn normalize_image_block_for_analysis(block: &Value) -> Value { + if block.get("type").and_then(Value::as_str) == Some("image") && block.get("source").is_some() { + return block.clone(); + } + + if let Some(url) = image_url_from_value(block) { + if let Some((media_type, data)) = parse_data_image_url(url) { + return json!({ + "type": "image", + "source": { + "type": "base64", + "media_type": media_type, + "data": data, + } + }); + } + } + + if let Some(source) = block.get("source") { + if let (Some(media_type), Some(data)) = ( + image_mime_from_value(source).or_else(|| image_mime_from_value(block)), + source.get("data").and_then(Value::as_str), + ) { + return json!({ + "type": "image", + "source": { + "type": "base64", + "media_type": media_type, + "data": data, + } + }); + } + } + + block.clone() +} + +fn image_type_is_image_like(value: Option<&str>) -> bool { + value.is_some_and(|value| { + matches!( + value.trim().to_ascii_lowercase().as_str(), + "image" | "input_image" | "image_url" + ) + }) +} + +fn image_mime_from_value(value: &Value) -> Option<&str> { + [ + "media_type", + "mediaType", + "mime_type", + "mimeType", + "mime", + "type", + ] + .into_iter() + .find_map(|key| { + value + .get(key) + .and_then(Value::as_str) + .filter(|mime| is_image_mime(mime)) + }) +} + +fn is_image_mime(value: &str) -> bool { + let normalized = value.trim().to_ascii_lowercase(); + normalized.starts_with("image/") || IMAGE_MIME_TYPES.contains(&normalized.as_str()) +} + +fn image_url_from_value(value: &Value) -> Option<&str> { + let url = value + .get("image_url") + .and_then(|image_url| { + image_url + .as_str() + .or_else(|| image_url.get("url").and_then(Value::as_str)) + }) + .or_else(|| value.get("url").and_then(Value::as_str)) + .or_else(|| { + value + .get("source") + .and_then(|source| source.get("url")) + .and_then(Value::as_str) + })?; + + if url.trim_start().starts_with("data:image/") { + Some(url) + } else { + None + } +} + +fn parse_data_image_url(url: &str) -> Option<(String, String)> { + let rest = url.trim().strip_prefix("data:")?; + let (metadata, data) = rest.split_once(',')?; + let media_type = metadata.split(';').next().unwrap_or("").trim(); + if !metadata.to_ascii_lowercase().contains(";base64") || !is_image_mime(media_type) { + return None; + } + Some((media_type.to_string(), data.to_string())) +} + +fn copy_request_options(body: &Value) -> Map { + let mut copy = Map::new(); + for key in ["system", "temperature", "top_p", "top_k", "metadata"] { + if let Some(value) = body.get(key) { + copy.insert(key.to_string(), value.clone()); + } + } + copy +} + +fn normalize_max_tokens(value: Option<&Value>) -> u64 { + value + .and_then(Value::as_u64) + .map(|value| value.clamp(512, 4096)) + .unwrap_or(2048) +} + +fn append_text_block(message: &mut Value, text: String) { + match message.get_mut("content") { + Some(Value::String(existing)) => { + if existing.trim().is_empty() { + *existing = text; + } else { + existing.push_str("\n\n"); + existing.push_str(&text); + } + } + Some(Value::Array(blocks)) => { + blocks.push(json!({ "type": "text", "text": text })); + } + _ => { + if let Some(object) = message.as_object_mut() { + object.insert("content".to_string(), Value::String(text)); + } + } + } +} + +fn format_image_context(index: usize, description: &str) -> String { + format!("[Image {index} analysis]\n{}", description.trim()) +} + +fn format_relation_context(relation: &str) -> String { + format!("[Cross-image relationship]\n{}", relation.trim()) +} + +#[derive(Debug)] +struct Section { + kind: SectionKind, + lines: Vec, +} + +#[derive(Debug)] +enum SectionKind { + Image(usize), + Relation, +} + +fn store_section( + section: Option
, + images: &mut BTreeMap, + relation: &mut Option, +) { + let Some(section) = section else { + return; + }; + let content = section.lines.join("\n").trim().to_string(); + if content.is_empty() { + return; + } + match section.kind { + SectionKind::Image(index) if index > 0 => { + images.insert(index, content); + } + SectionKind::Relation => { + *relation = Some(content); + } + _ => {} + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn injects_multi_image_results_in_original_order() { + let messages = json!([ + { + "role": "user", + "content": [ + { "type": "text", "text": "先看这里" }, + { "type": "image", "source": { "type": "base64", "media_type": "image/png", "data": "a" } }, + { "type": "text", "text": "再比较这个" }, + { "type": "image", "source": { "type": "base64", "media_type": "image/png", "data": "b" } }, + { "type": "text", "text": "区别是什么?" } + ] + } + ]); + let analysis = parse_image_analysis_response( + "图片1:\n第一张是登录页\n\n图片2:\n第二张是错误页\n\n多图关系:\n第二张比第一张多了错误提示", + 2, + ); + + let injected = inject_image_context(&messages, &analysis); + let content = injected[0]["content"].as_array().unwrap(); + + assert_eq!(content[0]["text"], "先看这里"); + assert_eq!(content[1]["text"], "[Image 1 analysis]\n第一张是登录页"); + assert_eq!(content[2]["text"], "再比较这个"); + assert_eq!(content[3]["text"], "[Image 2 analysis]\n第二张是错误页"); + assert_eq!(content[4]["text"], "区别是什么?"); + assert_eq!( + content[5]["text"], + "[Cross-image relationship]\n第二张比第一张多了错误提示" + ); + assert!(!serde_json::to_string(&injected) + .unwrap() + .contains(r#""type":"image""#)); + } + + #[test] + fn unstructured_analysis_is_used_for_each_image_without_losing_text() { + let messages = json!([ + { + "role": "user", + "content": [ + { "type": "text", "text": "解释" }, + { "type": "image", "source": { "type": "base64", "media_type": "image/png", "data": "a" } } + ] + } + ]); + let analysis = parse_image_analysis_response("这张图是一个设置页面", 1); + let injected = inject_image_context(&messages, &analysis); + let content = injected[0]["content"].as_array().unwrap(); + + assert_eq!(content[0]["text"], "解释"); + assert_eq!( + content[1]["text"], + "[Image 1 analysis]\n这张图是一个设置页面" + ); + } + + #[test] + fn image_only_request_injects_result_without_empty_text_blocks() { + let messages = json!([ + { + "role": "user", + "content": [ + { "type": "image", "source": { "type": "base64", "media_type": "image/png", "data": "a" } } + ] + } + ]); + let analysis = parse_image_analysis_response("图片1:\n只有一张流程图", 1); + let injected = inject_image_context(&messages, &analysis); + let content = injected[0]["content"].as_array().unwrap(); + + assert_eq!(content.len(), 1); + assert_eq!(content[0]["text"], "[Image 1 analysis]\n只有一张流程图"); + } + + #[test] + fn creates_analysis_request_with_ordered_text_and_image_markers() { + let body = json!({ + "model": "main", + "max_tokens": 9999, + "temperature": 0.2, + "messages": [{ + "role": "user", + "content": [ + { "type": "text", "text": "看图" }, + { "type": "image", "source": { "type": "base64", "media_type": "image/png", "data": "a" } } + ] + }] + }); + + let request = create_image_analysis_request(&body, "vision-model"); + let content = request["messages"][0]["content"].as_array().unwrap(); + + assert_eq!(request["model"], "vision-model"); + assert_eq!(request["max_tokens"], 4096); + assert_eq!(request["temperature"], 0.2); + assert!(content[0]["text"] + .as_str() + .unwrap() + .contains("Do not answer the user's final question")); + assert_eq!(content[1]["text"], "[user text]\n看图"); + assert_eq!(content[2]["text"], "Image 1:"); + assert_eq!(content[3]["type"], "image"); + } + + #[test] + fn handles_openai_style_input_image_data_urls() { + let body = json!({ + "model": "main", + "messages": [{ + "role": "user", + "content": [ + { "type": "text", "text": "这是什么" }, + { "type": "input_image", "image_url": "data:image/png;base64,abc123" } + ] + }] + }); + + assert!(contains_image_blocks(&body)); + + let request = create_image_analysis_request(&body, "vision-model"); + let content = request["messages"][0]["content"].as_array().unwrap(); + assert_eq!(content[3]["type"], "image"); + assert_eq!(content[3]["source"]["media_type"], "image/png"); + assert_eq!(content[3]["source"]["data"], "abc123"); + + let analysis = parse_image_analysis_response("图片1:\n一个路由配置页面", 1); + let injected = inject_image_context(&body["messages"], &analysis); + let injected_content = injected[0]["content"].as_array().unwrap(); + assert_eq!(injected_content[0]["text"], "这是什么"); + assert_eq!( + injected_content[1]["text"], + "[Image 1 analysis]\n一个路由配置页面" + ); + assert!(!serde_json::to_string(&injected) + .unwrap() + .contains("input_image")); + } + + #[test] + fn detects_image_blocks_by_nested_mime_type() { + let body = json!({ + "messages": [{ + "role": "user", + "content": [ + { + "type": "file", + "source": { + "type": "base64", + "mimeType": "image/jpeg", + "data": "abc" + } + } + ] + }] + }); + + assert_eq!(count_image_blocks(&body), 1); + let request = create_image_analysis_request(&body, "vision-model"); + let content = request["messages"][0]["content"].as_array().unwrap(); + assert_eq!(content[1]["text"], "Image 1:"); + assert_eq!(content[2]["type"], "image"); + assert_eq!(content[2]["source"]["media_type"], "image/jpeg"); + } +} diff --git a/src-tauri/src/proxy/media_sanitizer.rs b/src-tauri/src/proxy/media_sanitizer.rs index ad30fd2fc7..c1dd7b3e6f 100644 --- a/src-tauri/src/proxy/media_sanitizer.rs +++ b/src-tauri/src/proxy/media_sanitizer.rs @@ -70,7 +70,12 @@ pub fn is_unsupported_image_error(error: &ProxyError) -> bool { || message.contains("modality") || message.contains("modalities") || message.contains("media") - || message.contains("attachment"); + || message.contains("attachment") + || message.contains("图片") + || message.contains("图像") + || message.contains("多模态") + || message.contains("视觉") + || message.contains("附件"); if !mentions_image { return false; @@ -96,6 +101,14 @@ pub fn is_unsupported_image_error(error: &ProxyError) -> bool { "can't process", "can't handle", "unable to process", + "不支持", + "无法识别", + "无法处理", + "不能处理", + "仅支持文本", + "只支持文本", + "未知的 content", + "无效的 content", ]; UNSUPPORTED_HINTS.iter().any(|hint| message.contains(hint)) @@ -107,8 +120,7 @@ fn content_has_image_blocks(content: &Value) -> bool { }; blocks.iter().any(|block| { - is_image_block_type(block.get("type").and_then(Value::as_str)) - || block.get("content").is_some_and(content_has_image_blocks) + is_image_like_block(block) || block.get("content").is_some_and(content_has_image_blocks) }) } @@ -143,7 +155,7 @@ fn replace_images_in_content_with_text_type(content: &mut Value, text_type: &str let mut replaced = 0usize; for block in blocks { - if is_image_block_type(block.get("type").and_then(Value::as_str)) { + if is_image_like_block(block) { replace_image_block_with_text_marker(block, text_type); replaced += 1; continue; @@ -210,10 +222,6 @@ fn replace_images_in_responses_input_item(item: &mut Value) -> usize { replaced } -fn is_image_block_type(block_type: Option<&str>) -> bool { - matches!(block_type, Some("image" | "image_url" | "input_image")) -} - fn replace_image_block_with_text_marker(block: &mut Value, text_type: &str) { let cache_control = block.get("cache_control").cloned(); *block = json!({ @@ -225,6 +233,73 @@ fn replace_image_block_with_text_marker(block: &mut Value, text_type: &str) { } } +fn is_image_like_block(block: &Value) -> bool { + if block + .get("type") + .and_then(Value::as_str) + .is_some_and(|block_type| { + matches!( + block_type.trim().to_ascii_lowercase().as_str(), + "image" | "input_image" | "image_url" + ) + }) + { + return true; + } + + if image_mime_from_value(block).is_some() || image_url_from_value(block).is_some() { + return true; + } + + ["source", "file", "image", "input_image"] + .into_iter() + .filter_map(|key| block.get(key)) + .any(|value| { + image_mime_from_value(value).is_some() || image_url_from_value(value).is_some() + }) +} + +fn image_mime_from_value(value: &Value) -> Option<&str> { + [ + "media_type", + "mediaType", + "mime_type", + "mimeType", + "mime", + "type", + ] + .into_iter() + .find_map(|key| { + value + .get(key) + .and_then(Value::as_str) + .filter(|mime| mime.trim().to_ascii_lowercase().starts_with("image/")) + }) +} + +fn image_url_from_value(value: &Value) -> Option<&str> { + let url = value + .get("image_url") + .and_then(|image_url| { + image_url + .as_str() + .or_else(|| image_url.get("url").and_then(Value::as_str)) + }) + .or_else(|| value.get("url").and_then(Value::as_str)) + .or_else(|| { + value + .get("source") + .and_then(|source| source.get("url")) + .and_then(Value::as_str) + })?; + + if url.trim_start().starts_with("data:image/") { + Some(url) + } else { + None + } +} + fn explicit_model_image_support(provider: &Provider, model: &str) -> Option { let settings = &provider.settings_config; [ @@ -717,6 +792,41 @@ mod tests { assert!(is_unsupported_image_error(&error)); } + #[test] + fn detects_chinese_unsupported_image_errors() { + let error = ProxyError::UpstreamError { + status: 400, + body: Some( + r#"{"error":{"message":"当前模型不支持图片等多模态内容,messages 中含上游无法识别的 content 类型"}}"# + .to_string(), + ), + }; + + assert!(is_unsupported_image_error(&error)); + } + + #[test] + fn replaces_openai_style_input_image_blocks() { + let mut body = json!({ + "model": "deepseek-v4-pro", + "messages": [{ + "role": "user", + "content": [ + { "type": "input_image", "image_url": "data:image/png;base64,abc" } + ] + }] + }); + + assert!(contains_image_blocks(&body)); + let count = replace_image_blocks_with_marker(&mut body); + + assert_eq!(count, 1); + assert_eq!( + body["messages"][0]["content"][0]["text"], + UNSUPPORTED_IMAGE_MARKER + ); + } + #[test] fn ignores_non_image_errors() { let error = ProxyError::UpstreamError { diff --git a/src-tauri/src/proxy/mod.rs b/src-tauri/src/proxy/mod.rs index 585703c63a..9e09c77ce2 100644 --- a/src-tauri/src/proxy/mod.rs +++ b/src-tauri/src/proxy/mod.rs @@ -17,6 +17,7 @@ mod handlers; mod health; pub mod http_client; pub mod hyper_client; +pub mod image_context; pub(crate) mod json_canonical; pub mod log_codes; pub mod media_sanitizer; diff --git a/src/App.tsx b/src/App.tsx index 966e48a6ff..14a607bf9a 100644 --- a/src/App.tsx +++ b/src/App.tsx @@ -266,6 +266,16 @@ function App() { }); const providers = useMemo(() => data?.providers ?? {}, [data]); const currentProviderId = data?.currentProviderId ?? ""; + const currentProvider = currentProviderId + ? providers[currentProviderId] + : undefined; + const shouldShowProxyToggle = + activeApp === "claude" + ? Boolean( + isCurrentAppTakeoverActive || + currentProvider?.meta?.imageModel?.trim(), + ) + : Boolean(settingsData?.enableLocalProxy || isCurrentAppTakeoverActive); const isOpenClawView = activeApp === "openclaw" && (currentView === "providers" || @@ -1222,10 +1232,10 @@ function App() { > {activeApp === "claude-desktop" ? ( + ) : shouldShowProxyToggle ? ( + ) : ( - settingsData?.enableLocalProxy && ( - - ) + <> )} {activeApp !== "claude-desktop" && settingsData?.enableFailoverToggle && ( diff --git a/src/components/providers/forms/ClaudeDesktopProviderForm.tsx b/src/components/providers/forms/ClaudeDesktopProviderForm.tsx index bd0fd3e440..b76b2ba958 100644 --- a/src/components/providers/forms/ClaudeDesktopProviderForm.tsx +++ b/src/components/providers/forms/ClaudeDesktopProviderForm.tsx @@ -260,6 +260,9 @@ export function ClaudeDesktopProviderForm({ const [apiFormat, setApiFormat] = useState( initialData?.meta?.apiFormat ?? "anthropic", ); + const [imageModel, setImageModel] = useState( + initialData?.meta?.imageModel ?? "", + ); const [baseUrl, setBaseUrl] = useState( envString(initialData?.settingsConfig, "ANTHROPIC_BASE_URL"), ); @@ -394,6 +397,7 @@ export function ClaudeDesktopProviderForm({ setApiKey(""); setApiKeyField(preset.apiKeyField ?? "ANTHROPIC_AUTH_TOKEN"); setApiFormat(preset.apiFormat ?? "anthropic"); + setImageModel(""); didSeedDefaultRoutes.current = true; setMode(preset.mode); @@ -425,6 +429,7 @@ export function ClaudeDesktopProviderForm({ setApiKey(""); setApiKeyField("ANTHROPIC_AUTH_TOKEN"); setApiFormat("anthropic"); + setImageModel(""); didSeedDefaultRoutes.current = false; setMode("direct"); setRoutes([]); @@ -539,6 +544,7 @@ export function ClaudeDesktopProviderForm({ delete meta.apiFormat; delete meta.endpointAutoSelect; delete meta.isFullUrl; + delete meta.imageModel; await onSubmit({ ...values, name: values.name.trim(), @@ -649,6 +655,8 @@ export function ClaudeDesktopProviderForm({ ...(initialData?.meta ?? {}), claudeDesktopMode: mode, apiFormat: mode === "proxy" ? apiFormat : "anthropic", + imageModel: + mode === "proxy" && imageModel.trim() ? imageModel.trim() : undefined, }; meta.claudeDesktopModelRoutes = routeMap; @@ -1005,6 +1013,36 @@ export function ClaudeDesktopProviderForm({ ); })} +
+ +
+ setImageModel(event.target.value)} + placeholder={t("providerForm.imageModelPlaceholder", { + defaultValue: "例如 qwen3.7-plus", + })} + className="flex-1" + /> + {fetchedModels.length > 0 && ( + + )} +
+

+ {t("providerForm.imageModelHint", { + defaultValue: + "仅在请求包含图片时先识别图片内容,再把结果注入上下文;最终回答仍由上面的主模型处理。留空时保持现有图片降级逻辑。", + })} +

+
)} diff --git a/src/components/providers/forms/ClaudeFormFields.tsx b/src/components/providers/forms/ClaudeFormFields.tsx index 2ce0f1d985..fde32f17f7 100644 --- a/src/components/providers/forms/ClaudeFormFields.tsx +++ b/src/components/providers/forms/ClaudeFormFields.tsx @@ -123,9 +123,11 @@ interface ClaudeFormFieldsProps { defaultSonnetModelName: string; defaultOpusModel: string; defaultOpusModelName: string; + imageModel?: string; defaultFableModel: string; defaultFableModelName: string; onModelChange: (field: ClaudeModelEnvField, value: string) => void; + onImageModelChange?: (value: string) => void; // Speed Test Endpoints speedTestEndpoints: EndpointCandidate[]; @@ -189,9 +191,11 @@ export function ClaudeFormFields({ defaultSonnetModelName, defaultOpusModel, defaultOpusModelName, + imageModel = "", defaultFableModel, defaultFableModelName, onModelChange, + onImageModelChange = () => {}, speedTestEndpoints, apiFormat, onApiFormatChange, @@ -208,6 +212,7 @@ export function ClaudeFormFields({ defaultHaikuModel || defaultSonnetModel || defaultOpusModel || + imageModel || defaultFableModel || apiFormat !== "anthropic" || apiKeyField !== "ANTHROPIC_AUTH_TOKEN" || @@ -383,12 +388,15 @@ export function ClaudeFormFields({ const renderModelInput = ( id: string, value: string, - field: ClaudeModelEnvField, + field: ClaudeModelEnvField | null, placeholder?: string, onValueChange?: (value: string) => void, ) => { const updateValue = - onValueChange ?? ((next: string) => onModelChange(field, next)); + onValueChange ?? + ((next: string) => { + if (field) onModelChange(field, next); + }); if (isCodexOauthPreset) { return ( @@ -938,6 +946,29 @@ export function ClaudeFormFields({ })} +
+ + {t("providerForm.imageModelLabel", { + defaultValue: "图片处理模型", + })} + + {renderModelInput( + "claudeImageModel", + imageModel, + null, + t("providerForm.imageModelPlaceholder", { + defaultValue: "例如 qwen3.7-plus", + }), + onImageModelChange, + )} +

+ {t("providerForm.imageModelHint", { + defaultValue: + "仅用于先识别图片内容;最终回答仍由上面的 Sonnet / Opus / Haiku 或兜底模型处理。留空时沿用现有不支持图片降级逻辑。", + })} +

+
+
{t("providerForm.fallbackModelLabel", { diff --git a/src/components/providers/forms/ProviderForm.tsx b/src/components/providers/forms/ProviderForm.tsx index 95240a4a21..9ad85daf70 100644 --- a/src/components/providers/forms/ProviderForm.tsx +++ b/src/components/providers/forms/ProviderForm.tsx @@ -305,6 +305,9 @@ function ProviderFormFull({ if (!supportsFullUrl) return false; return initialData?.meta?.isFullUrl ?? false; }); + const [localImageModel, setLocalImageModel] = useState( + () => initialData?.meta?.imageModel ?? "", + ); const [testConfig, setTestConfig] = useState( () => initialData?.meta?.testConfig ?? { enabled: false }, @@ -344,6 +347,7 @@ function ProviderFormFull({ setLocalIsFullUrl( supportsFullUrl ? (initialData?.meta?.isFullUrl ?? false) : false, ); + setLocalImageModel(initialData?.meta?.imageModel ?? ""); setTestConfig(initialData?.meta?.testConfig ?? { enabled: false }); setPricingConfig({ enabled: @@ -1420,6 +1424,10 @@ function ProviderFormFull({ supportsFullUrl && category !== "official" && localIsFullUrl ? true : undefined, + imageModel: + appId === "claude" && category !== "official" && localImageModel.trim() + ? localImageModel.trim() + : undefined, }; if (!isCodexOauthProvider && "codexFastMode" in nextMeta) { @@ -1542,6 +1550,9 @@ function ProviderFormFull({ if (appId === "gemini") { resetGeminiConfig({}, {}); } + if (appId === "claude") { + setLocalImageModel(""); + } if (appId === "opencode") { opencodeForm.resetOpencodeState(); omoDraft.resetOmoDraftState(); @@ -2011,9 +2022,11 @@ function ProviderFormFull({ defaultSonnetModelName={defaultSonnetModelName} defaultOpusModel={defaultOpusModel} defaultOpusModelName={defaultOpusModelName} + imageModel={localImageModel} defaultFableModel={defaultFableModel} defaultFableModelName={defaultFableModelName} onModelChange={handleModelChange} + onImageModelChange={setLocalImageModel} speedTestEndpoints={speedTestEndpoints} apiFormat={localApiFormat} onApiFormatChange={handleApiFormatChange} diff --git a/src/hooks/useProviderActions.ts b/src/hooks/useProviderActions.ts index 139154e66c..29d74a81c6 100644 --- a/src/hooks/useProviderActions.ts +++ b/src/hooks/useProviderActions.ts @@ -2,7 +2,13 @@ import { useCallback } from "react"; import { useQueryClient } from "@tanstack/react-query"; import { toast } from "sonner"; import { useTranslation } from "react-i18next"; -import { providersApi, settingsApi, openclawApi, type AppId } from "@/lib/api"; +import { + providersApi, + settingsApi, + openclawApi, + proxyApi, + type AppId, +} from "@/lib/api"; import type { Provider, UsageScript, @@ -41,6 +47,33 @@ export function useProviderActions( const deleteProviderMutation = useDeleteProviderMutation(activeApp); const switchProviderMutation = useSwitchProviderMutation(activeApp); + const claudeProviderNeedsProxy = useCallback( + (provider: Provider) => + activeApp === "claude" && + provider.category !== "official" && + Boolean(provider.meta?.imageModel?.trim()), + [activeApp], + ); + + const ensureClaudeProxyTakeover = useCallback( + async (provider: Provider) => { + if (!claudeProviderNeedsProxy(provider)) return; + + await proxyApi.setProxyTakeoverForApp("claude", true); + await queryClient.invalidateQueries({ queryKey: ["proxyStatus"] }); + await queryClient.invalidateQueries({ + queryKey: ["proxyTakeoverStatus"], + }); + toast.success( + t("notifications.claudeProxyTakeoverEnabled", { + defaultValue: "已接管 Claude Code 配置,请重启 Claude Code 后生效", + }), + { closeButton: true }, + ); + }, + [claudeProviderNeedsProxy, queryClient, t], + ); + // Claude 插件同步逻辑 const syncClaudePlugin = useCallback( async (provider: Provider) => { @@ -134,6 +167,27 @@ export function useProviderActions( const updateProvider = useCallback( async (provider: Provider, originalId?: string) => { await updateProviderMutation.mutateAsync({ provider, originalId }); + if (claudeProviderNeedsProxy(provider)) { + const currentProviderId = await providersApi.getCurrent(activeApp); + if ( + currentProviderId === provider.id || + (originalId && currentProviderId === originalId) + ) { + await ensureClaudeProxyTakeover(provider); + } + } else if (activeApp === "claude" && isProxyTakeover) { + const currentProviderId = await providersApi.getCurrent(activeApp); + if ( + currentProviderId === provider.id || + (originalId && currentProviderId === originalId) + ) { + await proxyApi.setProxyTakeoverForApp("claude", false); + await queryClient.invalidateQueries({ queryKey: ["proxyStatus"] }); + await queryClient.invalidateQueries({ + queryKey: ["proxyTakeoverStatus"], + }); + } + } // 更新托盘菜单(失败不影响主操作) try { @@ -145,7 +199,14 @@ export function useProviderActions( ); } }, - [updateProviderMutation], + [ + activeApp, + claudeProviderNeedsProxy, + ensureClaudeProxyTakeover, + isProxyTakeover, + queryClient, + updateProviderMutation, + ], ); // 切换供应商 @@ -231,6 +292,21 @@ export function useProviderActions( try { const result = await switchProviderMutation.mutateAsync(provider.id); + if (claudeProviderNeedsProxy(provider)) { + await ensureClaudeProxyTakeover(provider); + } else if (activeApp === "claude" && isProxyTakeover) { + await proxyApi.setProxyTakeoverForApp("claude", false); + await queryClient.invalidateQueries({ queryKey: ["proxyStatus"] }); + await queryClient.invalidateQueries({ + queryKey: ["proxyTakeoverStatus"], + }); + toast.success( + t("notifications.claudeProxyTakeoverDisabled", { + defaultValue: "已恢复 Claude Code 配置", + }), + { closeButton: true }, + ); + } await syncClaudePlugin(provider); // Show backfill warning if present @@ -275,9 +351,12 @@ export function useProviderActions( [ switchProviderMutation, syncClaudePlugin, + ensureClaudeProxyTakeover, activeApp, + claudeProviderNeedsProxy, isProxyRunning, isProxyTakeover, + queryClient, t, ], ); diff --git a/src/i18n/locales/en.json b/src/i18n/locales/en.json index aaa45b0c3d..f089cc69e2 100644 --- a/src/i18n/locales/en.json +++ b/src/i18n/locales/en.json @@ -230,6 +230,8 @@ "codexRestartRequired": "Switched successfully. Restart the client to apply changes.", "claudeDesktopRestartRequired": "Switched successfully. Restart Claude Desktop to apply changes.", "claudeDesktopProxyRestartRequired": "Switched successfully. Keep CC Switch running and restart Claude Desktop to apply changes.", + "claudeProxyTakeoverEnabled": "Claude Code configuration is now routed through CC Switch. Restart Claude Code to apply changes.", + "claudeProxyTakeoverDisabled": "Claude Code configuration restored", "addToConfigSuccess": "Added to config", "removeFromConfigSuccess": "Removed from config", "switchFailedTitle": "Switch failed", @@ -1078,6 +1080,9 @@ "codexApiFormatResponses": "OpenAI Responses API (Native)", "codexApiFormatOpenAIChat": "OpenAI Chat Completions (Requires routing)", "codexApiFormatHint": "Select the Codex API format actually supported by this provider; the config stays on Responses for newer Codex versions, while Chat Completions is converted through local routing.", + "imageModelLabel": "Image Processing Model", + "imageModelPlaceholder": "e.g. qwen3.7-plus", + "imageModelHint": "Only processes image content when the request contains images, then injects the result into context; the final response is still handled by the main model above. Leave empty to keep the current image fallback logic.", "authField": "Auth Field", "authFieldAuthToken": "ANTHROPIC_AUTH_TOKEN (Default)", "authFieldApiKey": "ANTHROPIC_API_KEY", diff --git a/src/i18n/locales/ja.json b/src/i18n/locales/ja.json index f94fe37013..d205e039d1 100644 --- a/src/i18n/locales/ja.json +++ b/src/i18n/locales/ja.json @@ -230,6 +230,8 @@ "codexRestartRequired": "切り替えました。反映するにはクライアントを再起動してください", "claudeDesktopRestartRequired": "切り替えました。反映するには Claude Desktop を再起動してください", "claudeDesktopProxyRestartRequired": "切り替えました。CC Switch を起動したまま Claude Desktop を再起動してください", + "claudeProxyTakeoverEnabled": "Claude Code 設定を CC Switch 経由にしました。反映するには Claude Code を再起動してください", + "claudeProxyTakeoverDisabled": "Claude Code 設定を復元しました", "addToConfigSuccess": "設定に追加しました", "removeFromConfigSuccess": "設定から削除しました", "switchFailedTitle": "切り替えに失敗しました", @@ -1078,6 +1080,9 @@ "codexApiFormatResponses": "OpenAI Responses API(ネイティブ)", "codexApiFormatOpenAIChat": "OpenAI Chat Completions(ルーティングが必要)", "codexApiFormatHint": "このプロバイダーが実際に対応している Codex API フォーマットを選択します。新しい Codex との互換性のため設定は Responses のままにし、Chat Completions はローカルルーティングで変換します。", + "imageModelLabel": "画像処理モデル", + "imageModelPlaceholder": "例: qwen3.7-plus", + "imageModelHint": "リクエストに画像が含まれる場合のみ画像内容を先に認識し、結果をコンテキストに注入します。最終的な回答は上記のメインモデルが処理します。空欄の場合は既存の画像フォールバックロジックが維持されます。", "authField": "認証フィールド", "authFieldAuthToken": "ANTHROPIC_AUTH_TOKEN(デフォルト)", "authFieldApiKey": "ANTHROPIC_API_KEY", diff --git a/src/i18n/locales/zh-TW.json b/src/i18n/locales/zh-TW.json index b8769d3137..0d28d1000e 100644 --- a/src/i18n/locales/zh-TW.json +++ b/src/i18n/locales/zh-TW.json @@ -230,6 +230,8 @@ "codexRestartRequired": "切換成功,請重新啟動客戶端以套用", "claudeDesktopRestartRequired": "切換成功,重新啟動 Claude Desktop 後生效", "claudeDesktopProxyRestartRequired": "切換成功,請保持 CC Switch 執行,並重新啟動 Claude Desktop 後生效", + "claudeProxyTakeoverEnabled": "已接管 Claude Code 設定,請重新啟動 Claude Code 後生效", + "claudeProxyTakeoverDisabled": "已還原 Claude Code 設定", "addToConfigSuccess": "已新增至設定", "removeFromConfigSuccess": "已從設定移除", "switchFailedTitle": "切換失敗", @@ -1050,6 +1052,9 @@ "codexApiFormatResponses": "OpenAI Responses API (原生)", "codexApiFormatOpenAIChat": "OpenAI Chat Completions (需開啟路由)", "codexApiFormatHint": "選擇供應商真實支援的 Codex API 格式;設定仍保持 Responses 以相容新版 Codex,Chat Completions 會透過本地路由自動轉換。", + "imageModelLabel": "圖片處理模型", + "imageModelPlaceholder": "例如 qwen3.7-plus", + "imageModelHint": "僅在請求包含圖片時先識別圖片內容,再把結果注入上下文;最終回答仍由上面的主模型處理。留空時保持現有圖片降級邏輯。", "authField": "驗證欄位", "authFieldAuthToken": "ANTHROPIC_AUTH_TOKEN(預設)", "authFieldApiKey": "ANTHROPIC_API_KEY", diff --git a/src/i18n/locales/zh.json b/src/i18n/locales/zh.json index ea5562ce6c..4a5fdc2967 100644 --- a/src/i18n/locales/zh.json +++ b/src/i18n/locales/zh.json @@ -230,6 +230,8 @@ "codexRestartRequired": "切换成功,请重启客户端以生效", "claudeDesktopRestartRequired": "切换成功,重启 Claude Desktop 后生效", "claudeDesktopProxyRestartRequired": "切换成功,请保持 CC Switch 运行,并重启 Claude Desktop 后生效", + "claudeProxyTakeoverEnabled": "已接管 Claude Code 配置,请重启 Claude Code 后生效", + "claudeProxyTakeoverDisabled": "已恢复 Claude Code 配置", "addToConfigSuccess": "已添加到配置", "removeFromConfigSuccess": "已从配置移除", "switchFailedTitle": "切换失败", @@ -1078,6 +1080,9 @@ "codexApiFormatResponses": "OpenAI Responses API (原生)", "codexApiFormatOpenAIChat": "OpenAI Chat Completions (需开启路由)", "codexApiFormatHint": "选择供应商真实支持的 Codex API 格式;配置仍保持 Responses 以兼容新版 Codex,Chat Completions 会通过本地路由自动转换。", + "imageModelLabel": "图片处理模型", + "imageModelPlaceholder": "例如 qwen3.7-plus", + "imageModelHint": "仅在请求包含图片时先识别图片内容,再把结果注入上下文;最终回答仍由上面的主模型处理。留空时保持现有图片降级逻辑。", "authField": "认证字段", "authFieldAuthToken": "ANTHROPIC_AUTH_TOKEN(默认)", "authFieldApiKey": "ANTHROPIC_API_KEY", diff --git a/src/types.ts b/src/types.ts index 0c3c33b0ed..c3a95a36e6 100644 --- a/src/types.ts +++ b/src/types.ts @@ -179,6 +179,8 @@ export interface ProviderMeta { claudeDesktopMode?: "direct" | "proxy"; // Claude Desktop 本地路由模式:Claude-safe route -> upstream model claudeDesktopModelRoutes?: Record; + // 图片处理模型:仅供本地代理先识别图片,再把识别结果注入主模型上下文 + imageModel?: string; // 用量查询脚本配置 usage_script?: UsageScript; // 请求地址管理:测速后自动选择最佳端点 diff --git a/tests/hooks/useProviderActions.test.tsx b/tests/hooks/useProviderActions.test.tsx index fa1826e987..be09ff9477 100644 --- a/tests/hooks/useProviderActions.test.tsx +++ b/tests/hooks/useProviderActions.test.tsx @@ -55,6 +55,8 @@ vi.mock("@/lib/query", () => ({ const providersApiUpdateMock = vi.fn(); const providersApiUpdateTrayMenuMock = vi.fn(); +const providersApiGetCurrentMock = vi.fn(); +const proxyApiSetProxyTakeoverForAppMock = vi.fn(); const settingsApiGetMock = vi.fn(); const settingsApiApplyMock = vi.fn(); const openclawApiGetModelCatalogMock = vi.fn(); @@ -64,9 +66,14 @@ const openclawApiSetDefaultModelMock = vi.fn(); vi.mock("@/lib/api", () => ({ providersApi: { update: (...args: unknown[]) => providersApiUpdateMock(...args), + getCurrent: (...args: unknown[]) => providersApiGetCurrentMock(...args), updateTrayMenu: (...args: unknown[]) => providersApiUpdateTrayMenuMock(...args), }, + proxyApi: { + setProxyTakeoverForApp: (...args: unknown[]) => + proxyApiSetProxyTakeoverForAppMock(...args), + }, settingsApi: { get: (...args: unknown[]) => settingsApiGetMock(...args), applyClaudePluginConfig: (...args: unknown[]) => @@ -113,6 +120,8 @@ beforeEach(() => { switchProviderMutateAsync.mockReset(); providersApiUpdateMock.mockReset(); providersApiUpdateTrayMenuMock.mockReset(); + providersApiGetCurrentMock.mockReset(); + proxyApiSetProxyTakeoverForAppMock.mockReset(); settingsApiGetMock.mockReset(); settingsApiApplyMock.mockReset(); openclawApiGetModelCatalogMock.mockReset(); @@ -176,6 +185,32 @@ describe("useProviderActions", () => { expect(providersApiUpdateTrayMenuMock).toHaveBeenCalledTimes(1); }); + it("does not disable Claude proxy takeover when updating an inactive text-only provider", async () => { + updateProviderMutateAsync.mockResolvedValueOnce(undefined); + providersApiGetCurrentMock.mockResolvedValueOnce("active-provider"); + providersApiUpdateTrayMenuMock.mockResolvedValueOnce(true); + const { wrapper } = createWrapper(); + const provider = createProvider({ + id: "inactive-provider", + category: "custom", + meta: {}, + }); + + const { result } = renderHook( + () => useProviderActions("claude", true, true), + { + wrapper, + }, + ); + + await act(async () => { + await result.current.updateProvider(provider); + }); + + expect(providersApiGetCurrentMock).toHaveBeenCalledWith("claude"); + expect(proxyApiSetProxyTakeoverForAppMock).not.toHaveBeenCalled(); + }); + it("should not request plugin sync when switching non-Claude provider", async () => { switchProviderMutateAsync.mockResolvedValueOnce(undefined); const { wrapper } = createWrapper();