Add Qwen 3 Coder models to Amazon Bedrock models (cline#7022)

ajjuaire · web-flow · commit 65a0c3516303 · 2025-10-21T20:10:34.000-07:00
* Add Qwen 3 Coder models to Amazon Bedrock models

* Update comments to reference qwen

* Update cost.ts to round to avoid flakey tests

* remove math.round
diff --git a/.changeset/shaggy-zebras-bake.md b/.changeset/shaggy-zebras-bake.md
@@ -0,0 +1,5 @@
+---
+"claude-dev": patch
+---
+
+Add Qwen3 models to Amazon Bedrock provider
diff --git a/src/core/api/providers/bedrock.ts b/src/core/api/providers/bedrock.ts
@@ -10,7 +10,7 @@ import {
 } from "@aws-sdk/client-bedrock-runtime"
 import { fromNodeProviderChain } from "@aws-sdk/credential-providers"
 import { BedrockModelId, bedrockDefaultModelId, bedrockModels, CLAUDE_SONNET_1M_SUFFIX, ModelInfo } from "@shared/api"
-import { calculateApiCostOpenAI } from "@utils/cost"
+import { calculateApiCostOpenAI, calculateApiCostQwen } from "@utils/cost"
 import { ExtensionRegistryInfo } from "@/registry"
 import { ApiHandler, CommonApiHandlerOptions } from "../"
 import { withRetry } from "../retry"
@@ -150,6 +150,12 @@ export class AwsBedrockHandler implements ApiHandler {
 			return
 		}
 
+		// Check if this is a Qwen model
+		if (baseModelId.includes("qwen")) {
+			yield* this.createQwenMessage(systemPrompt, messages, modelId, model)
+			return
+		}
+
 		// Check if this is a Deepseek model
 		if (baseModelId.includes("deepseek")) {
 			yield* this.createDeepseekMessage(systemPrompt, messages, modelId, model)
@@ -1126,4 +1132,139 @@ export class AwsBedrockHandler implements ApiHandler {
 			}
 		}
 	}
+
+	/**
+	 * Creates a message using Qwen models through AWS Bedrock
+	 * Uses non-streaming Converse API and simulates streaming for models that don't support it
+	 */
+	private async *createQwenMessage(
+		systemPrompt: string,
+		messages: Anthropic.Messages.MessageParam[],
+		modelId: string,
+		model: { id: string; info: ModelInfo },
+	): ApiStream {
+		// Get Bedrock client with proper credentials
+		const client = await this.getBedrockClient()
+
+		// Format messages for Converse API
+		const formattedMessages = this.formatMessagesForConverseAPI(messages)
+
+		// Prepare system message
+		const systemMessages = systemPrompt ? [{ text: systemPrompt }] : undefined
+
+		// Prepare the non-streaming Converse command
+		const command = new ConverseCommand({
+			modelId: modelId,
+			messages: formattedMessages,
+			system: systemMessages,
+			inferenceConfig: {
+				maxTokens: model.info.maxTokens || 8192,
+				temperature: 0,
+			},
+		})
+
+		try {
+			// Track token usage
+			const inputTokenEstimate = this.estimateInputTokens(systemPrompt, messages)
+			let outputTokens = 0
+
+			// Execute the non-streaming request
+			const response = await client.send(command)
+
+			// Extract the complete response text and reasoning content
+			let fullText = ""
+			let reasoningText = ""
+
+			if (response.output?.message?.content) {
+				for (const contentBlock of response.output.message.content) {
+					// Check for reasoning content first
+					if ("reasoningContent" in contentBlock && contentBlock.reasoningContent) {
+						// Handle nested reasoning structure
+						const reasoning = contentBlock.reasoningContent
+						if ("reasoningText" in reasoning && reasoning.reasoningText && "text" in reasoning.reasoningText) {
+							reasoningText += reasoning.reasoningText.text
+						}
+					}
+					// Handle regular text content
+					else if ("text" in contentBlock && contentBlock.text) {
+						fullText += contentBlock.text
+					}
+				}
+			}
+
+			// If we have actual usage data from the response, use it
+			if (response.usage) {
+				const actualInputTokens = response.usage.inputTokens || inputTokenEstimate
+				const actualOutputTokens = response.usage.outputTokens || this.estimateTokenCount(fullText + reasoningText)
+				outputTokens = actualOutputTokens
+
+				// Report actual usage after processing content
+				const actualCost = calculateApiCostQwen(model.info, actualInputTokens, actualOutputTokens, 0, 0)
+				yield {
+					type: "usage",
+					inputTokens: actualInputTokens,
+					outputTokens: actualOutputTokens,
+					totalCost: actualCost,
+				}
+			} else {
+				// Estimate output tokens if not provided (includes both regular text and reasoning)
+				outputTokens = this.estimateTokenCount(fullText + reasoningText)
+			}
+
+			// Yield reasoning content first if present
+			if (reasoningText) {
+				const reasoningChunkSize = 1000 // Characters per chunk
+				for (let i = 0; i < reasoningText.length; i += reasoningChunkSize) {
+					const chunk = reasoningText.slice(i, Math.min(i + reasoningChunkSize, reasoningText.length))
+
+					yield {
+						type: "reasoning",
+						reasoning: chunk,
+					}
+				}
+			}
+
+			// Simulate streaming by chunking the response text
+			if (fullText) {
+				const chunkSize = 1000 // Characters per chunk
+
+				for (let i = 0; i < fullText.length; i += chunkSize) {
+					const chunk = fullText.slice(i, Math.min(i + chunkSize, fullText.length))
+
+					yield {
+						type: "text",
+						text: chunk,
+					}
+				}
+			}
+
+			// Report final usage if we didn't have actual usage data earlier
+			if (!response.usage) {
+				const finalCost = calculateApiCostQwen(model.info, inputTokenEstimate, outputTokens, 0, 0)
+				yield {
+					type: "usage",
+					inputTokens: inputTokenEstimate,
+					outputTokens: outputTokens,
+					totalCost: finalCost,
+				}
+			}
+		} catch (error) {
+			console.error("Error with Qwen model via Converse API:", error)
+
+			// Try to extract more detailed error information
+			let errorMessage = "Failed to process Qwen model request"
+			if (error instanceof Error) {
+				errorMessage = error.message
+				// Check for specific AWS SDK errors
+				if ("name" in error) {
+					errorMessage = `${error.name}: ${error.message}`
+				}
+			}
+
+			yield {
+				type: "text",
+				text: `[ERROR] ${errorMessage}`,
+			}
+		}
+	}
 }
diff --git a/src/shared/api.ts b/src/shared/api.ts
@@ -659,6 +659,26 @@ export const bedrockModels = {
 		description:
 			"A compact 20B open-weight Mixture-of-Experts language model designed for strong reasoning and tool use, ideal for edge devices and local inference.",
 	},
+	"qwen.qwen3-coder-30b-a3b-v1:0": {
+		maxTokens: 8192,
+		contextWindow: 262_144,
+		supportsImages: false,
+		supportsPromptCache: false,
+		inputPrice: 0.15,
+		outputPrice: 0.6,
+		description:
+			"Qwen3 Coder 30B MoE model with 3.3B activated parameters, optimized for code generation and analysis with 256K context window.",
+	},
+	"qwen.qwen3-coder-480b-a35b-v1:0": {
+		maxTokens: 8192,
+		contextWindow: 262_144,
+		supportsImages: false,
+		supportsPromptCache: false,
+		inputPrice: 0.22,
+		outputPrice: 1.8,
+		description:
+			"Qwen3 Coder 480B flagship MoE model with 35B activated parameters, designed for complex coding tasks with advanced reasoning capabilities and 256K context window.",
+	},
 } as const satisfies Record<string, ModelInfo>
 
 // OpenRouter
diff --git a/src/utils/cost.test.ts b/src/utils/cost.test.ts
@@ -1,7 +1,7 @@
 import { describe, it } from "mocha"
 import "should"
 import { ModelInfo } from "@shared/api"
-import { calculateApiCostAnthropic, calculateApiCostOpenAI } from "@utils/cost"
+import { calculateApiCostAnthropic, calculateApiCostOpenAI, calculateApiCostQwen } from "@utils/cost"
 
 describe("Cost Utilities", () => {
 	describe("calculateApiCostAnthropic", () => {
@@ -123,4 +123,79 @@ describe("Cost Utilities", () => {
 			cost.should.equal(0)
 		})
 	})
+
+	describe("calculateApiCostQwen", () => {
+		it("should calculate basic input/output costs", () => {
+			const modelInfo: ModelInfo = {
+				supportsPromptCache: false,
+				inputPrice: 0.15, // Qwen 30B pricing
+				outputPrice: 0.6,
+			}
+
+			const cost = calculateApiCostQwen(modelInfo, 1000, 500)
+			// Input: (0.15 / 1_000_000) * 1000 = 0.00015
+			// Output: (0.6 / 1_000_000) * 500 = 0.0003
+			// Total: 0.00015 + 0.0003 = 0.00045
+			cost.should.equal(0.00045)
+		})
+
+		it("should handle missing prices", () => {
+			const modelInfo: ModelInfo = {
+				supportsPromptCache: true,
+				// No prices specified
+			}
+
+			const cost = calculateApiCostQwen(modelInfo, 1000, 500)
+			cost.should.equal(0)
+		})
+
+		it("should use real Qwen model configuration (30B)", () => {
+			const modelInfo: ModelInfo = {
+				maxTokens: 8192,
+				contextWindow: 262_144,
+				supportsImages: false,
+				supportsPromptCache: false,
+				inputPrice: 0.15,
+				outputPrice: 0.6,
+			}
+
+			const cost = calculateApiCostQwen(modelInfo, 1000, 500, 0, 0)
+			// Input: (0.15 / 1_000_000) * 1000 = 0.00015
+			// Output: (0.6 / 1_000_000) * 500 = 0.0003
+			// Total: 0.00015 + 0.0003 = 0.00045
+			cost.should.equal(0.00045)
+		})
+
+		it("should handle cache tokens correctly (Qwen-style)", () => {
+			const modelInfo: ModelInfo = {
+				supportsPromptCache: true,
+				inputPrice: 0.15,
+				outputPrice: 0.6,
+				cacheWritesPrice: 0.2,
+				cacheReadsPrice: 0.05,
+			}
+
+			// Qwen-style: inputTokens includes cached tokens
+			const cost = calculateApiCostQwen(modelInfo, 2100, 1000, 1500, 500)
+			// Cache writes: (0.2 / 1_000_000) * 1500 = 0.0003
+			// Cache reads: (0.05 / 1_000_000) * 500 = 0.000025
+			// Input: (0.15 / 1_000_000) * (2100 - 1500 - 500) = 0.000015
+			// Output: (0.6 / 1_000_000) * 1000 = 0.0006
+			// Total: 0.0003 + 0.000025 + 0.000015 + 0.0006 = 0.00094
+			cost.should.equal(0.00094)
+		})
+
+		it("should handle zero token counts", () => {
+			const modelInfo: ModelInfo = {
+				supportsPromptCache: true,
+				inputPrice: 0.15,
+				outputPrice: 0.6,
+				cacheWritesPrice: 0.2,
+				cacheReadsPrice: 0.05,
+			}
+
+			const cost = calculateApiCostQwen(modelInfo, 0, 0, 0, 0)
+			cost.should.equal(0)
+		})
+	})
 })
diff --git a/src/utils/cost.ts b/src/utils/cost.ts
@@ -110,3 +110,28 @@ export function calculateApiCostOpenAI(
 		thinkingBudgetTokens,
 	)
 }
+
+// For Qwen compliant usage, follows OpenAI-style token counting where input tokens include cached tokens
+export function calculateApiCostQwen(
+	modelInfo: ModelInfo,
+	inputTokens: number, // For Qwen-style, this includes cached tokens
+	outputTokens: number,
+	cacheCreationInputTokens?: number,
+	cacheReadInputTokens?: number,
+	thinkingBudgetTokens?: number,
+): number {
+	const cacheCreationInputTokensNum = cacheCreationInputTokens || 0
+	const cacheReadInputTokensNum = cacheReadInputTokens || 0
+	// Calculate non-cached tokens for the internal function's 'inputTokens' parameter
+	const nonCachedInputTokens = Math.max(0, inputTokens - cacheCreationInputTokensNum - cacheReadInputTokensNum)
+	// Pass the original 'inputTokens' as 'totalInputTokensForPricing' for tier lookup
+	return calculateApiCostInternal(
+		modelInfo,
+		nonCachedInputTokens,
+		outputTokens,
+		cacheCreationInputTokensNum,
+		cacheReadInputTokensNum,
+		inputTokens,
+		thinkingBudgetTokens,
+	)
+}

-Original file line number
+Diff line change
@@ @@ -0,0 +1,5 @@ @@
 +---
 +"claude-dev": patch
 +---
++
 +Add Qwen3 models to Amazon Bedrock provider