💄 style: add SenseNova-V6 series & SenseChat-Vision support (#7439)

hezhijie0327 · web-flow · commit 9c8597ffc0d3 · 2025-04-18T09:57:36.000+08:00
diff --git a/src/config/aiModels/sensenova.ts b/src/config/aiModels/sensenova.ts
@@ -4,6 +4,62 @@ import { AIChatModelCard } from '@/types/aiModel';
 // https://www.sensecore.cn/help/docs/model-as-a-service/nova/release
 
 const sensenovaChatModels: AIChatModelCard[] = [
+  {
+    abilities: {
+      reasoning: true,
+      vision: true,
+    },
+    contextWindowTokens: 131_072,
+    description:
+      '兼顾视觉、语言深度推理，实现慢思考和深度推理，呈现完整的思维链过程。',
+    displayName: 'SenseNova V6 Reasoner',
+    enabled: true,
+    id: 'SenseNova-V6-Reasoner',
+    pricing: {
+      currency: 'CNY',
+      input: 4,
+      output: 16,
+    },
+    releasedAt: '2025-04-14',
+    type: 'chat',
+  },
+  {
+    abilities: {
+      reasoning: true,
+      vision: true,
+    },
+    contextWindowTokens: 131_072,
+    description:
+      '实现图片、文本、视频能力的原生统一，突破传统多模态分立局限，在多模基础能力、语言基础能力等核心维度全面领先，文理兼修，在多项测评中多次位列国内外第一梯队水平。',
+    displayName: 'SenseNova V6 Turbo',
+    enabled: true,
+    id: 'SenseNova-V6-Turbo',
+    pricing: {
+      currency: 'CNY',
+      input: 1.5,
+      output: 4.5,
+    },
+    releasedAt: '2025-04-14',
+    type: 'chat',
+  },
+  {
+    abilities: {
+      vision: true,
+    },
+    contextWindowTokens: 131_072,
+    description:
+      '实现图片、文本、视频能力的原生统一，突破传统多模态分立局限，在OpenCompass和SuperCLUE评测中斩获双冠军。',
+    displayName: 'SenseNova V6 Pro',
+    enabled: true,
+    id: 'SenseNova-V6-Pro',
+    pricing: {
+      currency: 'CNY',
+      input: 9,
+      output: 3,
+    },
+    releasedAt: '2025-04-14',
+    type: 'chat',
+  },
   {
     abilities: {
       functionCall: true,
@@ -12,7 +68,6 @@ const sensenovaChatModels: AIChatModelCard[] = [
     description:
       '是基于V5.5的最新版本，较上版本在中英文基础能力，聊天，理科知识， 文科知识，写作，数理逻辑，字数控制 等几个维度的表现有显著提升。',
     displayName: 'SenseChat 5.5 1202',
-    enabled: true,
     id: 'SenseChat-5-1202',
     pricing: {
       currency: 'CNY',
@@ -30,7 +85,6 @@ const sensenovaChatModels: AIChatModelCard[] = [
     description:
       '是最新的轻量版本模型，达到全量模型90%以上能力，显著降低推理成本。',
     displayName: 'SenseChat Turbo 1202',
-    enabled: true,
     id: 'SenseChat-Turbo-1202',
     pricing: {
       currency: 'CNY',
@@ -48,7 +102,6 @@ const sensenovaChatModels: AIChatModelCard[] = [
     description:
       '最新版本模型 (V5.5)，128K上下文长度，在数学推理、英文对话、指令跟随以及长文本理解等领域能力显著提升，比肩GPT-4o。',
     displayName: 'SenseChat 5.5',
-    enabled: true,
     id: 'SenseChat-5',
     pricing: {
       currency: 'CNY',
@@ -58,10 +111,12 @@ const sensenovaChatModels: AIChatModelCard[] = [
     type: 'chat',
   },
   {
+    abilities: {
+      vision: true,
+    },
     contextWindowTokens: 32_768,
     description: '最新版本模型 (V5.5)，支持多图的输入，全面实现模型基础能力优化，在对象属性识别、空间关系、动作事件识别、场景理解、情感识别、逻辑常识推理和文本理解生成上都实现了较大提升。',
     displayName: 'SenseChat 5.5 Vision',
-    enabled: true,
     id: 'SenseChat-Vision',
     pricing: {
       currency: 'CNY',
@@ -78,7 +133,6 @@ const sensenovaChatModels: AIChatModelCard[] = [
     contextWindowTokens: 32_768,
     description: '适用于快速问答、模型微调场景',
     displayName: 'SenseChat 5.0 Turbo',
-    enabled: true,
     id: 'SenseChat-Turbo',
     pricing: {
       currency: 'CNY',
@@ -160,6 +214,67 @@ const sensenovaChatModels: AIChatModelCard[] = [
     },
     type: 'chat',
   },
+  {
+    contextWindowTokens: 32_768,
+    description:
+      'DeepSeek-V3 是一款由深度求索公司自研的MoE模型。DeepSeek-V3 多项评测成绩超越了 Qwen2.5-72B 和 Llama-3.1-405B 等其他开源模型，并在性能上和世界顶尖的闭源模型 GPT-4o 以及 Claude-3.5-Sonnet 不分伯仲。',
+    displayName: 'DeepSeek V3',
+    id: 'DeepSeek-V3',
+    pricing: {
+      currency: 'CNY',
+      input: 2,
+      output: 8,
+    },
+    type: 'chat',
+  },
+  {
+    abilities: {
+      reasoning: true,
+    },
+    contextWindowTokens: 32_768,
+    description:
+      'DeepSeek-R1 在后训练阶段大规模使用了强化学习技术，在仅有极少标注数据的情况下，极大提升了模型推理能力。在数学、代码、自然语言推理等任务上，性能比肩 OpenAI o1 正式版。',
+    displayName: 'DeepSeek R1',
+    id: 'DeepSeek-R1',
+    pricing: {
+      currency: 'CNY',
+      input: 4,
+      output: 16,
+    },
+    type: 'chat',
+  },
+  {
+    abilities: {
+      reasoning: true,
+    },
+    contextWindowTokens: 32_768,
+    description:
+      'DeepSeek-R1-Distill 模型是在开源模型的基础上通过微调训练得到的，训练过程中使用了由 DeepSeek-R1 生成的样本数据。',
+    displayName: 'DeepSeek R1 Distill Qwen 14B',
+    id: 'DeepSeek-R1-Distill-Qwen-14B',
+    pricing: {
+      currency: 'CNY',
+      input: 0,
+      output: 0,
+    },
+    type: 'chat',
+  },
+  {
+    abilities: {
+      reasoning: true,
+    },
+    contextWindowTokens: 8192,
+    description:
+      'DeepSeek-R1-Distill 模型是在开源模型的基础上通过微调训练得到的，训练过程中使用了由 DeepSeek-R1 生成的样本数据。',
+    displayName: 'DeepSeek R1 Distill Qwen 32B',
+    id: 'DeepSeek-R1-Distill-Qwen-32B',
+    pricing: {
+      currency: 'CNY',
+      input: 0,
+      output: 0,
+    },
+    type: 'chat',
+  },
 ];
 
 export const allModels = [...sensenovaChatModels];
diff --git a/src/libs/agent-runtime/sensenova/index.ts b/src/libs/agent-runtime/sensenova/index.ts
@@ -1,6 +1,8 @@
 import { ModelProvider } from '../types';
 import { LobeOpenAICompatibleFactory } from '../utils/openaiCompatibleFactory';
 
+import { convertSenseNovaMessage } from '../utils/sensenovaHelpers';
+
 import type { ChatModelCard } from '@/types/llm';
 
 export interface SenseNovaModelCard {
@@ -11,14 +13,20 @@ export const LobeSenseNovaAI = LobeOpenAICompatibleFactory({
   baseURL: 'https://api.sensenova.cn/compatible-mode/v1',
   chatCompletion: {
     handlePayload: (payload) => {
-      const { frequency_penalty, temperature, top_p, ...rest } = payload;
+      const { frequency_penalty, messages, model, temperature, top_p, ...rest } = payload;
 
       return {
         ...rest,
         frequency_penalty:
           frequency_penalty !== undefined && frequency_penalty > 0 && frequency_penalty <= 2
             ? frequency_penalty
             : undefined,
+        messages: messages.map((message) => 
+          message.role !== 'user' || !/^Sense(Nova-V6|Chat-Vision)/.test(model)
+            ? message
+            : { ...message, content: convertSenseNovaMessage(message.content) }
+        ) as any[],
+        model,
         stream: true,
         temperature:
           temperature !== undefined && temperature > 0 && temperature <= 2
@@ -35,12 +43,17 @@ export const LobeSenseNovaAI = LobeOpenAICompatibleFactory({
     const { LOBE_DEFAULT_MODEL_LIST } = await import('@/config/aiModels');
 
     const functionCallKeywords = [
-      'deepseek-v3',
       'sensechat-5',
     ];
 
+    const visionKeywords = [
+      'vision',
+      'sensenova-v6',
+    ];
+
     const reasoningKeywords = [
-      'deepseek-r1'
+      'deepseek-r1',
+      'sensenova-v6',
     ];
 
     client.baseURL = 'https://api.sensenova.cn/v1/llm';
@@ -66,7 +79,7 @@ export const LobeSenseNovaAI = LobeOpenAICompatibleFactory({
             || knownModel?.abilities?.reasoning
             || false,
           vision:
-            model.id.toLowerCase().includes('vision')
+            visionKeywords.some(keyword => model.id.toLowerCase().includes(keyword))
             || knownModel?.abilities?.vision
             || false,
         };
diff --git a/src/libs/agent-runtime/utils/sensenovaHelpers.test.ts b/src/libs/agent-runtime/utils/sensenovaHelpers.test.ts
@@ -0,0 +1,108 @@
+import { describe, expect, it } from 'vitest';
+import { convertSenseNovaMessage } from './sensenovaHelpers';
+
+describe('convertSenseNovaMessage', () => {
+  it('should convert string content to text type array', () => {
+    const content = 'Hello world';
+    const result = convertSenseNovaMessage(content);
+
+    expect(result).toEqual([{ type: 'text', text: 'Hello world' }]);
+  });
+
+  it('should handle array content with text type', () => {
+    const content = [
+      { type: 'text', text: 'Hello world' }
+    ];
+    const result = convertSenseNovaMessage(content);
+
+    expect(result).toEqual([{ type: 'text', text: 'Hello world' }]);
+  });
+
+  it('should convert image_url with base64 format to image_base64', () => {
+    const content = [
+      { type: 'image_url', image_url: { url: 'data:image/jpeg;base64,ABCDEF123456' } }
+    ];
+    const result = convertSenseNovaMessage(content);
+
+    expect(result).toEqual([
+      { type: 'image_base64', image_base64: 'ABCDEF123456' }
+    ]);
+  });
+
+  it('should keep image_url format for non-base64 urls', () => {
+    const content = [
+      { type: 'image_url', image_url: { url: 'https://example.com/image.jpg' } }
+    ];
+    const result = convertSenseNovaMessage(content);
+
+    expect(result).toEqual([
+      { type: 'image_url', image_url: 'https://example.com/image.jpg' }
+    ]);
+  });
+
+  it('should handle mixed content types', () => {
+    const content = [
+      { type: 'text', text: 'Hello world' },
+      { type: 'image_url', image_url: { url: 'data:image/jpeg;base64,ABCDEF123456' } },
+      { type: 'image_url', image_url: { url: 'https://example.com/image.jpg' } }
+    ];
+    const result = convertSenseNovaMessage(content);
+
+    expect(result).toEqual([
+      { type: 'text', text: 'Hello world' },
+      { type: 'image_base64', image_base64: 'ABCDEF123456' },
+      { type: 'image_url', image_url: 'https://example.com/image.jpg' }
+    ]);
+  });
+
+  it('should filter out invalid items', () => {
+    const content = [
+      { type: 'text', text: 'Hello world' },
+      { type: 'unknown', value: 'should be filtered' },
+      { type: 'image_url', image_url: { notUrl: 'missing url field' } }
+    ];
+    const result = convertSenseNovaMessage(content);
+
+    expect(result).toEqual([
+      { type: 'text', text: 'Hello world' }
+    ]);
+  });
+
+  it('should handle the example input format correctly', () => {
+    const messages = [
+      {
+        content: [
+          {
+            content: "Hi",
+            role: "user"
+          },
+          {
+            image_url: {
+              detail: "auto",
+              url: "data:image/jpeg;base64,ABCDEF123456"
+            },
+            type: "image_url"
+          }
+        ],
+        role: "user"
+      }
+    ];
+
+    // This is simulating how you might use convertSenseNovaMessage with the example input
+    // Note: The actual function only converts the content part, not the entire messages array
+    const content = messages[0].content;
+    
+    // This is how the function would be expected to handle a mixed array like this
+    // However, the actual test would need to be adjusted based on how your function 
+    // is intended to handle this specific format with nested content objects
+    const result = convertSenseNovaMessage([
+      { type: 'text', text: "Hi" },
+      { type: 'image_url', image_url: { url: "data:image/jpeg;base64,ABCDEF123456" } }
+    ]);
+
+    expect(result).toEqual([
+      { type: 'text', text: "Hi" },
+      { type: 'image_base64', image_base64: "ABCDEF123456" }
+    ]);
+  });
+});
diff --git a/src/libs/agent-runtime/utils/sensenovaHelpers.ts b/src/libs/agent-runtime/utils/sensenovaHelpers.ts
@@ -0,0 +1,30 @@
+export const convertSenseNovaMessage = (content: any) => {
+
+  // 如果为单条 string 类 content，则格式转换为 text 类
+  if (typeof content === 'string') {
+    return [{ text: content, type: 'text' }];
+  }
+
+  // 如果内容包含图片内容，则需要对 array 类 content，进行格式转换
+  return content
+    ?.map((item: any) => {
+      // 如果为 content，则格式转换为 text 类
+      if (item.type === 'text') return item;
+
+      // 如果为 image_url，则格式转换为 image_url 类
+      if (item.type === 'image_url' && item.image_url?.url) {
+        const url = item.image_url.url;
+
+        // 如果 image_url 为 base64 格式，则返回 image_base64 类，否则返回 image_url 类
+        return url.startsWith('data:image/jpeg;base64') 
+          ? { 
+              image_base64: url.split(',')[1],
+              type: 'image_base64',
+            }
+          : { image_url: url, type: 'image_url' };
+      }
+
+      return null;
+    })
+    .filter(Boolean);
+};