Skip to content

Commit b1687f6

Browse files
authored
fix: Updated AWS Bedrock instrumentation to properly assign token counts (newrelic#3491)
1 parent 51e6d7a commit b1687f6

38 files changed

+551
-587
lines changed

lib/llm-events/aws-bedrock/bedrock-response.js

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,30 @@ class BedrockResponse {
7373
return this.#parsedBody
7474
}
7575

76+
get inputTokenCount() {
77+
if (this.#isConverse) {
78+
return parseInt(this.#originalResponse?.output?.usage?.inputTokens || 0, 10)
79+
}
80+
81+
return parseInt(this?.headers?.['x-amzn-bedrock-input-token-count'] || 0, 10)
82+
}
83+
84+
get outputTokenCount() {
85+
if (this.#isConverse) {
86+
return parseInt(this.#originalResponse?.output?.usage?.outputTokens || 0, 10)
87+
}
88+
89+
return parseInt(this?.headers?.['x-amzn-bedrock-output-token-count'] || 0, 10)
90+
}
91+
92+
get totalTokenCount() {
93+
if (this.#isConverse) {
94+
return parseInt(this.#originalResponse?.output?.usage?.totalTokens || 0, 10)
95+
}
96+
97+
return this.inputTokenCount + this.outputTokenCount
98+
}
99+
76100
/**
77101
* The prompt responses returned by the model.
78102
*
@@ -164,7 +188,7 @@ class BedrockResponse {
164188
} else if (cmd.isClaude3() === true) {
165189
if (body?.type === 'message_stop') {
166190
// Streamed response
167-
this.#completions = body.completions
191+
this.#completions.push(body.completions)
168192
} else {
169193
this.#completions = [stringifyClaudeChunkedMessage(body?.content)]
170194
}

lib/llm-events/aws-bedrock/chat-completion-message.js

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,6 @@
66
'use strict'
77

88
const LlmEvent = require('./event')
9-
const { setTokenFromCallback } = require('../utils')
10-
const { tokenUsageAttributesExist, tokenUsageHeadersExist } = require('./utils')
11-
129
/**
1310
* @typedef {object} LlmChatCompletionParams
1411
* @augments LlmEventParams
@@ -71,9 +68,8 @@ class LlmChatCompletionMessage extends LlmEvent {
7168
const completionContent = this.bedrockResponse?.completions?.join(' ')
7269

7370
if (promptContent && completionContent) {
74-
setTokenFromCallback(
71+
this.setTokenFromCallback(
7572
{
76-
context: this,
7773
tokenCB,
7874
reqModel: this.bedrockCommand.modelId,
7975
resModel: this.bedrockCommand.modelId,
@@ -85,11 +81,7 @@ class LlmChatCompletionMessage extends LlmEvent {
8581
return
8682
}
8783

88-
// If no token count callback is available, we need to check the response object
89-
// or response headers for usage information and set token_count to 0 if all usage attributes are present.
90-
if (tokenUsageAttributesExist(this.bedrockResponse) || tokenUsageHeadersExist(this.bedrockResponse)) {
91-
this.token_count = 0
92-
}
84+
this.setTokenInCompletionMessage({ promptTokens: this.bedrockResponse.inputTokenCount, completionTokens: this.bedrockResponse.outputTokenCount })
9385
}
9486
}
9587

lib/llm-events/aws-bedrock/chat-completion-summary.js

Lines changed: 11 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,6 @@
77

88
const LlmEvent = require('./event')
99

10-
const { setUsageTokens } = require('./utils')
11-
const { setTokenUsageFromCallback } = require('../utils')
1210
/**
1311
* @typedef {object} LlmChatCompletionSummaryParams
1412
* @augments LlmEventParams
@@ -50,21 +48,19 @@ class LlmChatCompletionSummary extends LlmEvent {
5048
const promptContent = this.bedrockCommand?.prompt?.map((msg) => msg.content).join(' ')
5149
const completionContent = this.bedrockResponse?.completions?.join(' ')
5250

53-
if (promptContent && completionContent) {
54-
setTokenUsageFromCallback(
55-
{
56-
context: this,
57-
tokenCB,
58-
reqModel: this.bedrockCommand.modelId,
59-
resModel: this.bedrockCommand.modelId,
60-
promptContent,
61-
completionContent
62-
}
63-
)
64-
}
51+
this.setTokenUsageFromCallback(
52+
{
53+
tokenCB,
54+
reqModel: this.bedrockCommand.modelId,
55+
resModel: this.bedrockCommand.modelId,
56+
promptContent,
57+
completionContent
58+
}
59+
)
6560
return
6661
}
67-
setUsageTokens(this.bedrockResponse, this)
62+
63+
this.setTokensInResponse({ promptTokens: this.bedrockResponse.inputTokenCount, completionTokens: this.bedrockResponse.outputTokenCount, totalTokens: this.bedrockResponse.totalTokenCount })
6864
}
6965
}
7066

lib/llm-events/aws-bedrock/converse-stream-handler.js

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,11 @@ class ConverseStreamHandler {
6565
message: {
6666

6767
}
68+
},
69+
usage: {
70+
inputTokens: null,
71+
outputTokens: null,
72+
totalTokens: null
6873
}
6974
}
7075
}
@@ -119,9 +124,7 @@ async function * handleConverse() {
119124
for await (const event of this.stream) {
120125
yield event
121126
this.updateHeaders(event)
122-
if (event.messageStart?.role) {
123-
this.role = 'assistant'
124-
} else if (event.contentBlockStart?.start) {
127+
if (event.contentBlockStart?.start) {
125128
// Handles a Content block start event. Tool use only.
126129
const blockStartData = event.contentBlockStart.start
127130
if (blockStartData.toolUse) {
@@ -133,6 +136,7 @@ async function * handleConverse() {
133136
activeChunk = null
134137
}
135138
} else if (event.contentBlockDelta?.delta) {
139+
// There are also deltas for tool use (stringified inputs) but we don't currently record them so we just ignore for now
136140
if (event.contentBlockDelta.delta.text) {
137141
// It seems like the first streamed chunk does not always start with a contentBlockStart message
138142
// If the stream starts with a delta, assume the current chunk is text
@@ -141,9 +145,12 @@ async function * handleConverse() {
141145
}
142146
activeChunk.text += event.contentBlockDelta.delta.text
143147
}
144-
// There are also deltas for tool use (stringified inputs) but we don't currently record them so we just ignore for now
148+
// used in `handleResponse` to determine why the model stopped
145149
} else if (event.messageStop) {
146150
this.stopReason = event.messageStop?.stopReason
151+
// sends token usage info at the end of the stream
152+
} else if (event.metadata?.usage) {
153+
this.response.output.usage = event.metadata.usage
147154
}
148155
}
149156
} finally {

lib/llm-events/aws-bedrock/embedding.js

Lines changed: 3 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
'use strict'
77

88
const LlmEvent = require('./event')
9-
const { validCallbackTokenCount, calculateCallbackTokens } = require('../utils')
109

1110
/**
1211
* @typedef {object} LlmEmbeddingParams
@@ -44,28 +43,12 @@ class LlmEmbedding extends LlmEvent {
4443
return
4544
}
4645

47-
const totalTokenCount = calculateCallbackTokens(tokenCB, this['request.model'], content)
48-
const hasValidCallbackCounts = validCallbackTokenCount(totalTokenCount)
49-
50-
if (hasValidCallbackCounts) {
51-
this['response.usage.total_tokens'] = Number(totalTokenCount)
52-
}
46+
const totalTokens = this.calculateCallbackTokens(tokenCB, this['request.model'], content)
47+
this.setTokensOnEmbeddingMessage(totalTokens)
5348
return
5449
}
5550

56-
const totalTokens = this.getTotalTokens()
57-
if (totalTokens) {
58-
this['response.usage.total_tokens'] = Number(totalTokens)
59-
}
60-
}
61-
62-
getTotalTokens() {
63-
// We record the input token count as total tokens
64-
const totalToken =
65-
this.bedrockResponse?.usage?.input_tokens || this.bedrockResponse?.usage?.inputTokens ||
66-
this.bedrockResponse?.headers['x-amzn-bedrock-input-token-count']
67-
68-
return totalToken
51+
this.setTokensOnEmbeddingMessage(this.bedrockResponse.inputTokenCount)
6952
}
7053
}
7154

lib/llm-events/aws-bedrock/event.js

Lines changed: 3 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
'use strict'
77

88
const { randomUUID } = require('crypto')
9-
const { DESTINATIONS } = require('../../config/attribute-filter')
9+
const BaseEvent = require('../event')
1010

1111
/**
1212
* @typedef {object} LlmEventParams
@@ -29,7 +29,7 @@ const defaultParams = {
2929
/**
3030
* Baseline object representing a LLM event.
3131
*/
32-
class LlmEvent {
32+
class LlmEvent extends BaseEvent {
3333
/**
3434
* All parameters that were passed in to the constructor after they have
3535
* been merged with the constructor's defaults.
@@ -43,6 +43,7 @@ class LlmEvent {
4343
* @param {LlmEventParams} params Construction parameters.
4444
*/
4545
constructor(params = defaultParams) {
46+
super()
4647
params = Object.assign({}, defaultParams, params)
4748
this.constructionParams = params
4849

@@ -64,24 +65,6 @@ class LlmEvent {
6465
this['request.max_tokens'] = null
6566
}
6667

67-
/**
68-
* Pull user set `llm.*` attributes from the current transaction and
69-
* add them to the event.
70-
*
71-
* @param {object} agent The New Relic agent that provides access to the
72-
* transaction.
73-
*/
74-
set metadata(agent) { // eslint-disable-line accessor-pairs
75-
const tx = agent.tracer.getTransaction()
76-
const attrs = tx?.trace?.custom.get(DESTINATIONS.TRANS_SCOPE) || {}
77-
for (const [k, v] of Object.entries(attrs)) {
78-
if (k.startsWith('llm.') === false) {
79-
continue
80-
}
81-
this[k] = v
82-
}
83-
}
84-
8568
/**
8669
* Removes the complex objects from the event
8770
* This will be called right before the event is enqueued to the custom event aggregator

lib/llm-events/aws-bedrock/stream-handler.js

Lines changed: 21 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,13 @@ class StreamHandler {
187187
this.response.response.headers = {
188188
'x-amzn-requestid': this.passThroughParams.response.response.headers['x-amzn-requestid']
189189
}
190+
191+
if (parsedEvent['amazon-bedrock-invocationMetrics']) {
192+
const invocationMetrics = parsedEvent['amazon-bedrock-invocationMetrics']
193+
this.response.response.headers['x-amzn-bedrock-input-token-count'] = invocationMetrics?.inputTokenCount
194+
this.response.response.headers['x-amzn-bedrock-output-token-count'] = invocationMetrics?.outputTokenCount
195+
}
196+
190197
delete parsedEvent['amazon-bedrock-invocationMetrics']
191198
}
192199
}
@@ -213,8 +220,7 @@ async function * handleClaude() {
213220
async function * handleClaude3() {
214221
let currentBody = {}
215222
let stopReason
216-
const completions = []
217-
const usage = {}
223+
let response = ''
218224

219225
try {
220226
for await (const event of this.stream) {
@@ -223,25 +229,14 @@ async function * handleClaude3() {
223229
this.updateHeaders(parsed)
224230
currentBody = parsed
225231
if (parsed.type === 'content_block_delta') {
226-
completions.push(parsed.delta.text)
232+
response += parsed.delta.text
227233
} else if (parsed.type === 'message_delta') {
228234
stopReason = parsed.delta.stop_reason
229235
}
230-
231-
if (parsed?.message?.usage?.input_tokens) {
232-
usage.input_tokens = parsed.message.usage.input_tokens
233-
}
234-
235-
if (parsed?.message?.usage?.output_tokens) {
236-
usage.output_tokens = parsed.message.usage.output_tokens
237-
} else if (parsed?.usage?.output_tokens) {
238-
usage.output_tokens = parsed.usage.output_tokens
239-
}
240236
}
241237
} finally {
242-
currentBody.completions = completions
238+
currentBody.completions = response
243239
currentBody.stop_reason = stopReason
244-
currentBody.usage = usage
245240
this.response.output.body = currentBody
246241
this.finish()
247242
}
@@ -286,7 +281,6 @@ async function * handleCohereEmbed() {
286281
async function * handleLlama() {
287282
let currentBody = {}
288283
let generation = ''
289-
const usage = {}
290284

291285
try {
292286
for await (const event of this.stream) {
@@ -295,35 +289,34 @@ async function * handleLlama() {
295289
this.updateHeaders(parsed)
296290
currentBody = parsed
297291
generation += parsed.generation
298-
299-
if (parsed?.generation_token_count) {
300-
usage.output_tokens = parsed.generation_token_count
301-
}
302-
303-
if (parsed?.prompt_token_count) {
304-
usage.input_tokens = parsed.prompt_token_count
305-
}
306292
}
307293
} finally {
308294
currentBody.generation = generation
309-
currentBody.usage = usage
310295
this.response.output.body = currentBody
311296
this.finish()
312297
}
313298
}
314299

300+
/**
301+
* Yields every chunk and builds up the response in a string
302+
* Re-assigns the response to `outputText` and makes body.results an array
303+
* to match the structure of non-streamed Titan response.
304+
*/
315305
async function * handleTitan() {
316-
const body = this.response.output.body
317-
body.results = []
306+
let currentBody = {}
307+
let response = ''
318308

319309
try {
320310
for await (const event of this.stream) {
321311
yield event // Pass it up to the real consumer of the stream.
322312
const parsed = this.parseEvent(event)
323313
this.updateHeaders(parsed)
324-
body.results.push(parsed)
314+
response += parsed.outputText
315+
currentBody = parsed
325316
}
326317
} finally {
318+
currentBody.outputText = response
319+
this.response.output.body.results = [currentBody]
327320
this.finish()
328321
}
329322
}

0 commit comments

Comments
 (0)