Skip to content

Commit 65a0c35

Browse files
authored
Add Qwen 3 Coder models to Amazon Bedrock models (cline#7022)
* Add Qwen 3 Coder models to Amazon Bedrock models * Update comments to reference qwen * Update cost.ts to round to avoid flakey tests * remove math.round
1 parent 0c8e02c commit 65a0c35

File tree

5 files changed

+268
-2
lines changed

5 files changed

+268
-2
lines changed

.changeset/shaggy-zebras-bake.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"claude-dev": patch
3+
---
4+
5+
Add Qwen3 models to Amazon Bedrock provider

src/core/api/providers/bedrock.ts

Lines changed: 142 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ import {
1010
} from "@aws-sdk/client-bedrock-runtime"
1111
import { fromNodeProviderChain } from "@aws-sdk/credential-providers"
1212
import { BedrockModelId, bedrockDefaultModelId, bedrockModels, CLAUDE_SONNET_1M_SUFFIX, ModelInfo } from "@shared/api"
13-
import { calculateApiCostOpenAI } from "@utils/cost"
13+
import { calculateApiCostOpenAI, calculateApiCostQwen } from "@utils/cost"
1414
import { ExtensionRegistryInfo } from "@/registry"
1515
import { ApiHandler, CommonApiHandlerOptions } from "../"
1616
import { withRetry } from "../retry"
@@ -150,6 +150,12 @@ export class AwsBedrockHandler implements ApiHandler {
150150
return
151151
}
152152

153+
// Check if this is a Qwen model
154+
if (baseModelId.includes("qwen")) {
155+
yield* this.createQwenMessage(systemPrompt, messages, modelId, model)
156+
return
157+
}
158+
153159
// Check if this is a Deepseek model
154160
if (baseModelId.includes("deepseek")) {
155161
yield* this.createDeepseekMessage(systemPrompt, messages, modelId, model)
@@ -1126,4 +1132,139 @@ export class AwsBedrockHandler implements ApiHandler {
11261132
}
11271133
}
11281134
}
1135+
1136+
/**
1137+
* Creates a message using Qwen models through AWS Bedrock
1138+
* Uses non-streaming Converse API and simulates streaming for models that don't support it
1139+
*/
1140+
private async *createQwenMessage(
1141+
systemPrompt: string,
1142+
messages: Anthropic.Messages.MessageParam[],
1143+
modelId: string,
1144+
model: { id: string; info: ModelInfo },
1145+
): ApiStream {
1146+
// Get Bedrock client with proper credentials
1147+
const client = await this.getBedrockClient()
1148+
1149+
// Format messages for Converse API
1150+
const formattedMessages = this.formatMessagesForConverseAPI(messages)
1151+
1152+
// Prepare system message
1153+
const systemMessages = systemPrompt ? [{ text: systemPrompt }] : undefined
1154+
1155+
// Prepare the non-streaming Converse command
1156+
const command = new ConverseCommand({
1157+
modelId: modelId,
1158+
messages: formattedMessages,
1159+
system: systemMessages,
1160+
inferenceConfig: {
1161+
maxTokens: model.info.maxTokens || 8192,
1162+
temperature: 0,
1163+
},
1164+
})
1165+
1166+
try {
1167+
// Track token usage
1168+
const inputTokenEstimate = this.estimateInputTokens(systemPrompt, messages)
1169+
let outputTokens = 0
1170+
1171+
// Execute the non-streaming request
1172+
const response = await client.send(command)
1173+
1174+
// Extract the complete response text and reasoning content
1175+
let fullText = ""
1176+
let reasoningText = ""
1177+
1178+
if (response.output?.message?.content) {
1179+
for (const contentBlock of response.output.message.content) {
1180+
// Check for reasoning content first
1181+
if ("reasoningContent" in contentBlock && contentBlock.reasoningContent) {
1182+
// Handle nested reasoning structure
1183+
const reasoning = contentBlock.reasoningContent
1184+
if ("reasoningText" in reasoning && reasoning.reasoningText && "text" in reasoning.reasoningText) {
1185+
reasoningText += reasoning.reasoningText.text
1186+
}
1187+
}
1188+
// Handle regular text content
1189+
else if ("text" in contentBlock && contentBlock.text) {
1190+
fullText += contentBlock.text
1191+
}
1192+
}
1193+
}
1194+
1195+
// If we have actual usage data from the response, use it
1196+
if (response.usage) {
1197+
const actualInputTokens = response.usage.inputTokens || inputTokenEstimate
1198+
const actualOutputTokens = response.usage.outputTokens || this.estimateTokenCount(fullText + reasoningText)
1199+
outputTokens = actualOutputTokens
1200+
1201+
// Report actual usage after processing content
1202+
const actualCost = calculateApiCostQwen(model.info, actualInputTokens, actualOutputTokens, 0, 0)
1203+
yield {
1204+
type: "usage",
1205+
inputTokens: actualInputTokens,
1206+
outputTokens: actualOutputTokens,
1207+
totalCost: actualCost,
1208+
}
1209+
} else {
1210+
// Estimate output tokens if not provided (includes both regular text and reasoning)
1211+
outputTokens = this.estimateTokenCount(fullText + reasoningText)
1212+
}
1213+
1214+
// Yield reasoning content first if present
1215+
if (reasoningText) {
1216+
const reasoningChunkSize = 1000 // Characters per chunk
1217+
for (let i = 0; i < reasoningText.length; i += reasoningChunkSize) {
1218+
const chunk = reasoningText.slice(i, Math.min(i + reasoningChunkSize, reasoningText.length))
1219+
1220+
yield {
1221+
type: "reasoning",
1222+
reasoning: chunk,
1223+
}
1224+
}
1225+
}
1226+
1227+
// Simulate streaming by chunking the response text
1228+
if (fullText) {
1229+
const chunkSize = 1000 // Characters per chunk
1230+
1231+
for (let i = 0; i < fullText.length; i += chunkSize) {
1232+
const chunk = fullText.slice(i, Math.min(i + chunkSize, fullText.length))
1233+
1234+
yield {
1235+
type: "text",
1236+
text: chunk,
1237+
}
1238+
}
1239+
}
1240+
1241+
// Report final usage if we didn't have actual usage data earlier
1242+
if (!response.usage) {
1243+
const finalCost = calculateApiCostQwen(model.info, inputTokenEstimate, outputTokens, 0, 0)
1244+
yield {
1245+
type: "usage",
1246+
inputTokens: inputTokenEstimate,
1247+
outputTokens: outputTokens,
1248+
totalCost: finalCost,
1249+
}
1250+
}
1251+
} catch (error) {
1252+
console.error("Error with Qwen model via Converse API:", error)
1253+
1254+
// Try to extract more detailed error information
1255+
let errorMessage = "Failed to process Qwen model request"
1256+
if (error instanceof Error) {
1257+
errorMessage = error.message
1258+
// Check for specific AWS SDK errors
1259+
if ("name" in error) {
1260+
errorMessage = `${error.name}: ${error.message}`
1261+
}
1262+
}
1263+
1264+
yield {
1265+
type: "text",
1266+
text: `[ERROR] ${errorMessage}`,
1267+
}
1268+
}
1269+
}
11291270
}

src/shared/api.ts

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -659,6 +659,26 @@ export const bedrockModels = {
659659
description:
660660
"A compact 20B open-weight Mixture-of-Experts language model designed for strong reasoning and tool use, ideal for edge devices and local inference.",
661661
},
662+
"qwen.qwen3-coder-30b-a3b-v1:0": {
663+
maxTokens: 8192,
664+
contextWindow: 262_144,
665+
supportsImages: false,
666+
supportsPromptCache: false,
667+
inputPrice: 0.15,
668+
outputPrice: 0.6,
669+
description:
670+
"Qwen3 Coder 30B MoE model with 3.3B activated parameters, optimized for code generation and analysis with 256K context window.",
671+
},
672+
"qwen.qwen3-coder-480b-a35b-v1:0": {
673+
maxTokens: 8192,
674+
contextWindow: 262_144,
675+
supportsImages: false,
676+
supportsPromptCache: false,
677+
inputPrice: 0.22,
678+
outputPrice: 1.8,
679+
description:
680+
"Qwen3 Coder 480B flagship MoE model with 35B activated parameters, designed for complex coding tasks with advanced reasoning capabilities and 256K context window.",
681+
},
662682
} as const satisfies Record<string, ModelInfo>
663683

664684
// OpenRouter

src/utils/cost.test.ts

Lines changed: 76 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import { describe, it } from "mocha"
22
import "should"
33
import { ModelInfo } from "@shared/api"
4-
import { calculateApiCostAnthropic, calculateApiCostOpenAI } from "@utils/cost"
4+
import { calculateApiCostAnthropic, calculateApiCostOpenAI, calculateApiCostQwen } from "@utils/cost"
55

66
describe("Cost Utilities", () => {
77
describe("calculateApiCostAnthropic", () => {
@@ -123,4 +123,79 @@ describe("Cost Utilities", () => {
123123
cost.should.equal(0)
124124
})
125125
})
126+
127+
describe("calculateApiCostQwen", () => {
128+
it("should calculate basic input/output costs", () => {
129+
const modelInfo: ModelInfo = {
130+
supportsPromptCache: false,
131+
inputPrice: 0.15, // Qwen 30B pricing
132+
outputPrice: 0.6,
133+
}
134+
135+
const cost = calculateApiCostQwen(modelInfo, 1000, 500)
136+
// Input: (0.15 / 1_000_000) * 1000 = 0.00015
137+
// Output: (0.6 / 1_000_000) * 500 = 0.0003
138+
// Total: 0.00015 + 0.0003 = 0.00045
139+
cost.should.equal(0.00045)
140+
})
141+
142+
it("should handle missing prices", () => {
143+
const modelInfo: ModelInfo = {
144+
supportsPromptCache: true,
145+
// No prices specified
146+
}
147+
148+
const cost = calculateApiCostQwen(modelInfo, 1000, 500)
149+
cost.should.equal(0)
150+
})
151+
152+
it("should use real Qwen model configuration (30B)", () => {
153+
const modelInfo: ModelInfo = {
154+
maxTokens: 8192,
155+
contextWindow: 262_144,
156+
supportsImages: false,
157+
supportsPromptCache: false,
158+
inputPrice: 0.15,
159+
outputPrice: 0.6,
160+
}
161+
162+
const cost = calculateApiCostQwen(modelInfo, 1000, 500, 0, 0)
163+
// Input: (0.15 / 1_000_000) * 1000 = 0.00015
164+
// Output: (0.6 / 1_000_000) * 500 = 0.0003
165+
// Total: 0.00015 + 0.0003 = 0.00045
166+
cost.should.equal(0.00045)
167+
})
168+
169+
it("should handle cache tokens correctly (Qwen-style)", () => {
170+
const modelInfo: ModelInfo = {
171+
supportsPromptCache: true,
172+
inputPrice: 0.15,
173+
outputPrice: 0.6,
174+
cacheWritesPrice: 0.2,
175+
cacheReadsPrice: 0.05,
176+
}
177+
178+
// Qwen-style: inputTokens includes cached tokens
179+
const cost = calculateApiCostQwen(modelInfo, 2100, 1000, 1500, 500)
180+
// Cache writes: (0.2 / 1_000_000) * 1500 = 0.0003
181+
// Cache reads: (0.05 / 1_000_000) * 500 = 0.000025
182+
// Input: (0.15 / 1_000_000) * (2100 - 1500 - 500) = 0.000015
183+
// Output: (0.6 / 1_000_000) * 1000 = 0.0006
184+
// Total: 0.0003 + 0.000025 + 0.000015 + 0.0006 = 0.00094
185+
cost.should.equal(0.00094)
186+
})
187+
188+
it("should handle zero token counts", () => {
189+
const modelInfo: ModelInfo = {
190+
supportsPromptCache: true,
191+
inputPrice: 0.15,
192+
outputPrice: 0.6,
193+
cacheWritesPrice: 0.2,
194+
cacheReadsPrice: 0.05,
195+
}
196+
197+
const cost = calculateApiCostQwen(modelInfo, 0, 0, 0, 0)
198+
cost.should.equal(0)
199+
})
200+
})
126201
})

src/utils/cost.ts

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,3 +110,28 @@ export function calculateApiCostOpenAI(
110110
thinkingBudgetTokens,
111111
)
112112
}
113+
114+
// For Qwen compliant usage, follows OpenAI-style token counting where input tokens include cached tokens
115+
export function calculateApiCostQwen(
116+
modelInfo: ModelInfo,
117+
inputTokens: number, // For Qwen-style, this includes cached tokens
118+
outputTokens: number,
119+
cacheCreationInputTokens?: number,
120+
cacheReadInputTokens?: number,
121+
thinkingBudgetTokens?: number,
122+
): number {
123+
const cacheCreationInputTokensNum = cacheCreationInputTokens || 0
124+
const cacheReadInputTokensNum = cacheReadInputTokens || 0
125+
// Calculate non-cached tokens for the internal function's 'inputTokens' parameter
126+
const nonCachedInputTokens = Math.max(0, inputTokens - cacheCreationInputTokensNum - cacheReadInputTokensNum)
127+
// Pass the original 'inputTokens' as 'totalInputTokensForPricing' for tier lookup
128+
return calculateApiCostInternal(
129+
modelInfo,
130+
nonCachedInputTokens,
131+
outputTokens,
132+
cacheCreationInputTokensNum,
133+
cacheReadInputTokensNum,
134+
inputTokens,
135+
thinkingBudgetTokens,
136+
)
137+
}

0 commit comments

Comments
 (0)