@@ -187,6 +187,13 @@ class StreamHandler {
187187 this . response . response . headers = {
188188 'x-amzn-requestid' : this . passThroughParams . response . response . headers [ 'x-amzn-requestid' ]
189189 }
190+
191+ if ( parsedEvent [ 'amazon-bedrock-invocationMetrics' ] ) {
192+ const invocationMetrics = parsedEvent [ 'amazon-bedrock-invocationMetrics' ]
193+ this . response . response . headers [ 'x-amzn-bedrock-input-token-count' ] = invocationMetrics ?. inputTokenCount
194+ this . response . response . headers [ 'x-amzn-bedrock-output-token-count' ] = invocationMetrics ?. outputTokenCount
195+ }
196+
190197 delete parsedEvent [ 'amazon-bedrock-invocationMetrics' ]
191198 }
192199}
@@ -213,8 +220,7 @@ async function * handleClaude() {
213220async function * handleClaude3 ( ) {
214221 let currentBody = { }
215222 let stopReason
216- const completions = [ ]
217- const usage = { }
223+ let response = ''
218224
219225 try {
220226 for await ( const event of this . stream ) {
@@ -223,25 +229,14 @@ async function * handleClaude3() {
223229 this . updateHeaders ( parsed )
224230 currentBody = parsed
225231 if ( parsed . type === 'content_block_delta' ) {
226- completions . push ( parsed . delta . text )
232+ response += parsed . delta . text
227233 } else if ( parsed . type === 'message_delta' ) {
228234 stopReason = parsed . delta . stop_reason
229235 }
230-
231- if ( parsed ?. message ?. usage ?. input_tokens ) {
232- usage . input_tokens = parsed . message . usage . input_tokens
233- }
234-
235- if ( parsed ?. message ?. usage ?. output_tokens ) {
236- usage . output_tokens = parsed . message . usage . output_tokens
237- } else if ( parsed ?. usage ?. output_tokens ) {
238- usage . output_tokens = parsed . usage . output_tokens
239- }
240236 }
241237 } finally {
242- currentBody . completions = completions
238+ currentBody . completions = response
243239 currentBody . stop_reason = stopReason
244- currentBody . usage = usage
245240 this . response . output . body = currentBody
246241 this . finish ( )
247242 }
@@ -286,7 +281,6 @@ async function * handleCohereEmbed() {
286281async function * handleLlama ( ) {
287282 let currentBody = { }
288283 let generation = ''
289- const usage = { }
290284
291285 try {
292286 for await ( const event of this . stream ) {
@@ -295,35 +289,34 @@ async function * handleLlama() {
295289 this . updateHeaders ( parsed )
296290 currentBody = parsed
297291 generation += parsed . generation
298-
299- if ( parsed ?. generation_token_count ) {
300- usage . output_tokens = parsed . generation_token_count
301- }
302-
303- if ( parsed ?. prompt_token_count ) {
304- usage . input_tokens = parsed . prompt_token_count
305- }
306292 }
307293 } finally {
308294 currentBody . generation = generation
309- currentBody . usage = usage
310295 this . response . output . body = currentBody
311296 this . finish ( )
312297 }
313298}
314299
300+ /**
301+ * Yields every chunk and builds up the response in a string
302+ * Re-assigns the response to `outputText` and makes body.results an array
303+ * to match the structure of non-streamed Titan response.
304+ */
315305async function * handleTitan ( ) {
316- const body = this . response . output . body
317- body . results = [ ]
306+ let currentBody = { }
307+ let response = ''
318308
319309 try {
320310 for await ( const event of this . stream ) {
321311 yield event // Pass it up to the real consumer of the stream.
322312 const parsed = this . parseEvent ( event )
323313 this . updateHeaders ( parsed )
324- body . results . push ( parsed )
314+ response += parsed . outputText
315+ currentBody = parsed
325316 }
326317 } finally {
318+ currentBody . outputText = response
319+ this . response . output . body . results = [ currentBody ]
327320 this . finish ( )
328321 }
329322}
0 commit comments