@@ -10,7 +10,7 @@ import {
1010} from "@aws-sdk/client-bedrock-runtime"
1111import { fromNodeProviderChain } from "@aws-sdk/credential-providers"
1212import { BedrockModelId , bedrockDefaultModelId , bedrockModels , CLAUDE_SONNET_1M_SUFFIX , ModelInfo } from "@shared/api"
13- import { calculateApiCostOpenAI } from "@utils/cost"
13+ import { calculateApiCostOpenAI , calculateApiCostQwen } from "@utils/cost"
1414import { ExtensionRegistryInfo } from "@/registry"
1515import { ApiHandler , CommonApiHandlerOptions } from "../"
1616import { withRetry } from "../retry"
@@ -150,6 +150,12 @@ export class AwsBedrockHandler implements ApiHandler {
150150 return
151151 }
152152
153+ // Check if this is a Qwen model
154+ if ( baseModelId . includes ( "qwen" ) ) {
155+ yield * this . createQwenMessage ( systemPrompt , messages , modelId , model )
156+ return
157+ }
158+
153159 // Check if this is a Deepseek model
154160 if ( baseModelId . includes ( "deepseek" ) ) {
155161 yield * this . createDeepseekMessage ( systemPrompt , messages , modelId , model )
@@ -1126,4 +1132,139 @@ export class AwsBedrockHandler implements ApiHandler {
11261132 }
11271133 }
11281134 }
1135+
1136+ /**
1137+ * Creates a message using Qwen models through AWS Bedrock
1138+ * Uses non-streaming Converse API and simulates streaming for models that don't support it
1139+ */
1140+ private async * createQwenMessage (
1141+ systemPrompt : string ,
1142+ messages : Anthropic . Messages . MessageParam [ ] ,
1143+ modelId : string ,
1144+ model : { id : string ; info : ModelInfo } ,
1145+ ) : ApiStream {
1146+ // Get Bedrock client with proper credentials
1147+ const client = await this . getBedrockClient ( )
1148+
1149+ // Format messages for Converse API
1150+ const formattedMessages = this . formatMessagesForConverseAPI ( messages )
1151+
1152+ // Prepare system message
1153+ const systemMessages = systemPrompt ? [ { text : systemPrompt } ] : undefined
1154+
1155+ // Prepare the non-streaming Converse command
1156+ const command = new ConverseCommand ( {
1157+ modelId : modelId ,
1158+ messages : formattedMessages ,
1159+ system : systemMessages ,
1160+ inferenceConfig : {
1161+ maxTokens : model . info . maxTokens || 8192 ,
1162+ temperature : 0 ,
1163+ } ,
1164+ } )
1165+
1166+ try {
1167+ // Track token usage
1168+ const inputTokenEstimate = this . estimateInputTokens ( systemPrompt , messages )
1169+ let outputTokens = 0
1170+
1171+ // Execute the non-streaming request
1172+ const response = await client . send ( command )
1173+
1174+ // Extract the complete response text and reasoning content
1175+ let fullText = ""
1176+ let reasoningText = ""
1177+
1178+ if ( response . output ?. message ?. content ) {
1179+ for ( const contentBlock of response . output . message . content ) {
1180+ // Check for reasoning content first
1181+ if ( "reasoningContent" in contentBlock && contentBlock . reasoningContent ) {
1182+ // Handle nested reasoning structure
1183+ const reasoning = contentBlock . reasoningContent
1184+ if ( "reasoningText" in reasoning && reasoning . reasoningText && "text" in reasoning . reasoningText ) {
1185+ reasoningText += reasoning . reasoningText . text
1186+ }
1187+ }
1188+ // Handle regular text content
1189+ else if ( "text" in contentBlock && contentBlock . text ) {
1190+ fullText += contentBlock . text
1191+ }
1192+ }
1193+ }
1194+
1195+ // If we have actual usage data from the response, use it
1196+ if ( response . usage ) {
1197+ const actualInputTokens = response . usage . inputTokens || inputTokenEstimate
1198+ const actualOutputTokens = response . usage . outputTokens || this . estimateTokenCount ( fullText + reasoningText )
1199+ outputTokens = actualOutputTokens
1200+
1201+ // Report actual usage after processing content
1202+ const actualCost = calculateApiCostQwen ( model . info , actualInputTokens , actualOutputTokens , 0 , 0 )
1203+ yield {
1204+ type : "usage" ,
1205+ inputTokens : actualInputTokens ,
1206+ outputTokens : actualOutputTokens ,
1207+ totalCost : actualCost ,
1208+ }
1209+ } else {
1210+ // Estimate output tokens if not provided (includes both regular text and reasoning)
1211+ outputTokens = this . estimateTokenCount ( fullText + reasoningText )
1212+ }
1213+
1214+ // Yield reasoning content first if present
1215+ if ( reasoningText ) {
1216+ const reasoningChunkSize = 1000 // Characters per chunk
1217+ for ( let i = 0 ; i < reasoningText . length ; i += reasoningChunkSize ) {
1218+ const chunk = reasoningText . slice ( i , Math . min ( i + reasoningChunkSize , reasoningText . length ) )
1219+
1220+ yield {
1221+ type : "reasoning" ,
1222+ reasoning : chunk ,
1223+ }
1224+ }
1225+ }
1226+
1227+ // Simulate streaming by chunking the response text
1228+ if ( fullText ) {
1229+ const chunkSize = 1000 // Characters per chunk
1230+
1231+ for ( let i = 0 ; i < fullText . length ; i += chunkSize ) {
1232+ const chunk = fullText . slice ( i , Math . min ( i + chunkSize , fullText . length ) )
1233+
1234+ yield {
1235+ type : "text" ,
1236+ text : chunk ,
1237+ }
1238+ }
1239+ }
1240+
1241+ // Report final usage if we didn't have actual usage data earlier
1242+ if ( ! response . usage ) {
1243+ const finalCost = calculateApiCostQwen ( model . info , inputTokenEstimate , outputTokens , 0 , 0 )
1244+ yield {
1245+ type : "usage" ,
1246+ inputTokens : inputTokenEstimate ,
1247+ outputTokens : outputTokens ,
1248+ totalCost : finalCost ,
1249+ }
1250+ }
1251+ } catch ( error ) {
1252+ console . error ( "Error with Qwen model via Converse API:" , error )
1253+
1254+ // Try to extract more detailed error information
1255+ let errorMessage = "Failed to process Qwen model request"
1256+ if ( error instanceof Error ) {
1257+ errorMessage = error . message
1258+ // Check for specific AWS SDK errors
1259+ if ( "name" in error ) {
1260+ errorMessage = `${ error . name } : ${ error . message } `
1261+ }
1262+ }
1263+
1264+ yield {
1265+ type : "text" ,
1266+ text : `[ERROR] ${ errorMessage } ` ,
1267+ }
1268+ }
1269+ }
11291270}
0 commit comments