Skip to content

Commit 760a5f1

Browse files
committed
feat(llm): enhance AI SDK integration with improved media type handling and provider-specific configurations
- Add mediaType support to image content conversion for better AI SDK compliance - Provide default media type (image/png) for base64 images without explicit type - Return structured image data with mediaType from convertImageSource method - Remove custom baseUrl OpenAI Compatible mode logic from modelFactory - Rename 'gemini' provider case to 'google' for consistency with AI SDK - Restructure streamText parameters with organized sections for core and advanced options - Add provider-specific thinking mode configurations for OpenAI, Anthropic, and Google - Implement OpenAI-specific parameters (logitBias, parallelToolCalls, reasoningEffort) - Add timeout and maxRetries configuration support to streaming service - Ensure mimeType defaults to 'image/png' when media_type is not provided in Agent - Improve code organization and maintainability of LLM service layer
1 parent 67a416c commit 760a5f1

File tree

12 files changed

+363
-61
lines changed

12 files changed

+363
-61
lines changed

src/main/services/llm/core/MessageConverter.ts

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -75,22 +75,26 @@ export class MessageConverter {
7575

7676
/**
7777
* 转换 user 消息的多模态内容
78+
* 改进:添加 mediaType 支持,更符合 AI SDK 规范
7879
*/
7980
private convertUserContentParts(
8081
content: MessageContentPart[]
81-
): Array<{ type: 'text'; text: string } | { type: 'image'; image: string | URL }> {
82-
const parts: Array<{ type: 'text'; text: string } | { type: 'image'; image: string | URL }> =
83-
[]
82+
): Array<{ type: 'text'; text: string } | { type: 'image'; image: string | URL; mediaType?: string }> {
83+
const parts: Array<{ type: 'text'; text: string } | { type: 'image'; image: string | URL; mediaType?: string }> = []
8484

8585
for (const item of content) {
8686
if (item.type === 'text' && 'text' in item) {
8787
parts.push({ type: 'text', text: item.text })
8888
} else if (item.type === 'image' && 'source' in item) {
89-
const imageUrl = this.convertImageSource(
89+
const result = this.convertImageSource(
9090
item.source as { type: string; url?: string; data?: string; media_type?: string }
9191
)
92-
if (imageUrl) {
93-
parts.push({ type: 'image', image: imageUrl })
92+
if (result) {
93+
parts.push({
94+
type: 'image',
95+
image: result.image,
96+
...(result.mediaType && { mediaType: result.mediaType }),
97+
})
9498
}
9599
}
96100
}
@@ -100,18 +104,26 @@ export class MessageConverter {
100104

101105
/**
102106
* 转换图片源
107+
* 改进:返回 mediaType 以便 AI SDK 更好地处理
103108
*/
104109
private convertImageSource(source: {
105110
type: string
106111
url?: string
107112
data?: string
108113
media_type?: string
109-
}): string | URL | null {
114+
}): { image: string | URL; mediaType?: string } | null {
110115
if (source.type === 'url' && source.url) {
111-
return source.url
116+
return {
117+
image: source.url,
118+
mediaType: source.media_type,
119+
}
112120
}
113-
if (source.type === 'base64' && source.data && source.media_type) {
114-
return `data:${source.media_type};base64,${source.data}`
121+
if (source.type === 'base64' && source.data) {
122+
const mediaType = source.media_type || 'image/png' // 默认 PNG
123+
return {
124+
image: `data:${mediaType};base64,${source.data}`,
125+
mediaType,
126+
}
115127
}
116128
return null
117129
}

src/main/services/llm/modelFactory.ts

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -47,20 +47,6 @@ function createBuiltinModel(
4747
throw new Error(`Unknown builtin provider: ${providerId}`)
4848
}
4949

50-
// 如果用户自定义了 baseUrl 且不是默认值,使用 OpenAI Compatible 模式
51-
// 这样可以兼容各种 OpenAI-compatible 的 API(如 NVIDIA、智谱等)
52-
const isCustomBaseUrl = baseUrl && baseUrl !== providerDef.baseUrl
53-
54-
if (isCustomBaseUrl) {
55-
console.log(`[ModelFactory] Using OpenAI Compatible mode for custom baseUrl: ${baseUrl}`)
56-
const provider = createOpenAICompatible({
57-
name: `${providerId}-custom`,
58-
apiKey,
59-
baseURL: baseUrl,
60-
})
61-
return provider(model)
62-
}
63-
6450
switch (providerId) {
6551
case 'openai': {
6652
const openai = createOpenAI({
@@ -127,7 +113,7 @@ function createCustomModel(
127113
return anthropic(model)
128114
}
129115

130-
case 'gemini': {
116+
case 'google': {
131117
const google = createGoogleGenerativeAI({
132118
apiKey,
133119
baseURL: baseUrl,

src/main/services/llm/services/StreamingService.ts

Lines changed: 63 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,12 +75,14 @@ export class StreamingService {
7575
// 转换工具
7676
const coreTools = tools ? this.toolConverter.convert(tools) : undefined
7777

78-
// 流式生成 - AI SDK 6.0 自动处理所有 reasoning
79-
const result = streamText({
78+
// 构建 streamText 参数
79+
const streamParams: Parameters<typeof streamText>[0] = {
8080
model,
8181
messages: coreMessages,
8282
tools: coreTools,
8383
activeTools, // 动态限制可用工具
84+
85+
// 核心参数
8486
maxOutputTokens: config.maxTokens,
8587
temperature: config.temperature,
8688
topP: config.topP,
@@ -89,7 +91,66 @@ export class StreamingService {
8991
presencePenalty: config.presencePenalty,
9092
stopSequences: config.stopSequences,
9193
seed: config.seed,
94+
95+
// AI SDK 高级参数
96+
maxRetries: config.maxRetries,
97+
toolChoice: config.toolChoice,
98+
headers: config.headers,
99+
timeout: config.timeout, // 超时配置
92100
abortSignal,
101+
}
102+
103+
// OpenAI 特定参数
104+
if (config.provider === 'openai') {
105+
if (config.logitBias) {
106+
// @ts-expect-error - OpenAI specific parameter
107+
streamParams.logitBias = config.logitBias
108+
}
109+
if (config.parallelToolCalls !== undefined) {
110+
streamParams.providerOptions = {
111+
...streamParams.providerOptions,
112+
openai: {
113+
...streamParams.providerOptions?.openai,
114+
parallelToolCalls: config.parallelToolCalls,
115+
},
116+
}
117+
}
118+
}
119+
120+
// 启用 thinking 模式(各厂商配置不同)
121+
if (config.enableThinking) {
122+
if (config.provider === 'gemini') {
123+
// Google Gemini: thinkingConfig
124+
streamParams.providerOptions = {
125+
google: {
126+
thinkingConfig: {
127+
includeThoughts: true,
128+
},
129+
},
130+
}
131+
} else if (config.provider === 'anthropic') {
132+
// Anthropic Claude: thinking.type = "enabled"
133+
streamParams.providerOptions = {
134+
anthropic: {
135+
thinking: {
136+
type: 'enabled',
137+
},
138+
},
139+
}
140+
} else if (config.provider === 'openai') {
141+
// OpenAI: reasoningEffort + forceReasoning
142+
streamParams.providerOptions = {
143+
openai: {
144+
reasoningEffort: 'medium',
145+
forceReasoning: true,
146+
},
147+
}
148+
}
149+
}
150+
151+
// 流式生成 - AI SDK 6.0 自动处理所有 reasoning
152+
const result = streamText({
153+
...streamParams,
93154
// 自动修复工具调用 JSON 格式错误
94155
experimental_repairToolCall: async ({ toolCall, error }) => {
95156
logger.llm.warn('[StreamingService] Tool call parse error, attempting repair:', {

src/renderer/agent/core/Agent.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -290,7 +290,7 @@ class AgentClass {
290290
.filter((p): p is ImageContent => p.type === 'image')
291291
.map(p => ({
292292
id: crypto.randomUUID(),
293-
mimeType: p.source.media_type,
293+
mimeType: (p.source.media_type || 'image/png') as string,
294294
base64: p.source.data,
295295
}))
296296
}

0 commit comments

Comments
 (0)