Skip to content

Commit 40f2eb5

Browse files
feat: add GenAI semconv attributes for streaming, prompt-cache tokens, and tool spans
Adding more tags from OpenTelemetry GenAI semantic conventions https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-spans/. `Chat#complete`: - `gen_ai.request.stream` - `gen_ai.usage.cache_read.input_tokens` - `gen_ai.usage.cache_creation.input_tokens` `Chat#execute_tool`: - `error.type` + record_exception + ERROR status on rescue. - `gen_ai.tool.description`
1 parent 9a97ca4 commit 40f2eb5

2 files changed

Lines changed: 124 additions & 5 deletions

File tree

lib/opentelemetry/instrumentation/ruby_llm/patches/chat.rb

Lines changed: 30 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@ def complete(&)
1919
"gen_ai.provider.name" => provider,
2020
"gen_ai.request.model" => model_id,
2121
}
22+
# Per GenAI semconv: set `gen_ai.request.stream` if and only if
23+
# the request is streaming. Absence means non-streaming.
24+
attributes["gen_ai.request.stream"] = true if block_given?
2225

2326
tracer.in_span("chat #{model_id}", attributes: attributes, kind: OpenTelemetry::Trace::SpanKind::CLIENT) do |span|
2427
begin
@@ -37,6 +40,17 @@ def complete(&)
3740
span.set_attribute("gen_ai.usage.output_tokens", response.output_tokens) if response.output_tokens
3841
span.set_attribute("gen_ai.request.temperature", @temperature) if @temperature
3942

43+
# Prompt-cache token accessors were added in ruby_llm 1.9.0
44+
# (commit 869a755f, Anthropic Prompt Caching). Graceful skip
45+
# on older versions instead of NoMethodError.
46+
if response.respond_to?(:cached_tokens) && response.cached_tokens
47+
span.set_attribute("gen_ai.usage.cache_read.input_tokens", response.cached_tokens)
48+
end
49+
50+
if response.respond_to?(:cache_creation_tokens) && response.cache_creation_tokens
51+
span.set_attribute("gen_ai.usage.cache_creation.input_tokens", response.cache_creation_tokens)
52+
end
53+
4054
if capture_content?
4155
system_messages = @messages.select { |m| m.role == :system }
4256
input_messages = @messages[0..-2].reject { |m| m.role == :system }
@@ -62,13 +76,24 @@ def execute_tool(tool_call)
6276
"gen_ai.tool.name" => tool_call.name,
6377
"gen_ai.tool.call.id" => tool_call.id,
6478
"gen_ai.tool.call.arguments" => tool_call.arguments.to_json,
65-
"gen_ai.tool.type" => "function"
66-
}
79+
"gen_ai.tool.type" => "function",
80+
"gen_ai.tool.description" => tools[tool_call.name.to_sym]&.description
81+
}.compact
6782

6883
tracer.in_span("execute_tool #{tool_call.name}", attributes: attributes, kind: OpenTelemetry::Trace::SpanKind::INTERNAL) do |span|
69-
result = super
70-
result_str = result.is_a?(::RubyLLM::Tool::Halt) ? result.content.to_s : result.to_s
71-
span.set_attribute("gen_ai.tool.call.result", result_str[0..500])
84+
begin
85+
result = super
86+
rescue => e
87+
span.record_exception(e)
88+
span.status = OpenTelemetry::Trace::Status.error(e.message)
89+
span.set_attribute("error.type", e.class.name)
90+
raise
91+
end
92+
93+
# `RubyLLM::Tool::Halt#to_s` returns `@content.to_s`, so a single
94+
# `to_s` covers both the Halt and plain-result cases.
95+
span.set_attribute("gen_ai.tool.call.result", result.to_s[0..500])
96+
7297
result
7398
end
7499
end

test/instrumentation_test.rb

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,10 +45,61 @@ def test_creates_span_with_attributes
4545
assert_equal "openai", span.attributes["gen_ai.provider.name"]
4646
assert_equal "gpt-4o-mini", span.attributes["gen_ai.request.model"]
4747
assert_equal "chat", span.attributes["gen_ai.operation.name"]
48+
# Per GenAI semconv, `gen_ai.request.stream` is set only when streaming.
49+
assert_nil span.attributes["gen_ai.request.stream"]
4850
assert_equal 10, span.attributes["gen_ai.usage.input_tokens"]
4951
assert_equal 5, span.attributes["gen_ai.usage.output_tokens"]
5052
end
5153

54+
def test_marks_streaming_chat_requests
55+
stub_request(:post, "https://api.openai.com/v1/chat/completions")
56+
.to_return(
57+
status: 200,
58+
headers: { "Content-Type" => "application/json" },
59+
body: {
60+
id: "chatcmpl-123",
61+
model: "gpt-4o-mini",
62+
choices: [{ index: 0, message: { role: "assistant", content: "Hi" }, finish_reason: "stop" }],
63+
usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 }
64+
}.to_json
65+
)
66+
67+
chat = RubyLLM.chat(model: "gpt-4o-mini")
68+
chat.ask("Hi") { |_chunk| }
69+
70+
span = EXPORTER.finished_spans.first
71+
assert_equal true, span.attributes["gen_ai.request.stream"]
72+
end
73+
74+
def test_records_prompt_cache_tokens
75+
# RubyLLM's OpenAI provider maps `cached_tokens` ← `cache_read_tokens(usage)`
76+
# and `cache_creation_tokens` ← `cache_write_tokens(usage)`, both surfaced
77+
# on `Message#cached_tokens` / `Message#cache_creation_tokens`.
78+
stub_request(:post, "https://api.openai.com/v1/chat/completions")
79+
.to_return(
80+
status: 200,
81+
headers: { "Content-Type" => "application/json" },
82+
body: {
83+
id: "chatcmpl-cache",
84+
model: "gpt-4o-mini",
85+
choices: [{ index: 0, message: { role: "assistant", content: "Hello!" }, finish_reason: "stop" }],
86+
usage: {
87+
prompt_tokens: 100,
88+
completion_tokens: 5,
89+
total_tokens: 105,
90+
prompt_tokens_details: { cached_tokens: 75, cache_write_tokens: 20 }
91+
}
92+
}.to_json
93+
)
94+
95+
chat = RubyLLM.chat(model: "gpt-4o-mini")
96+
chat.ask("Hi")
97+
98+
span = EXPORTER.finished_spans.first
99+
assert_equal 75, span.attributes["gen_ai.usage.cache_read.input_tokens"]
100+
assert_equal 20, span.attributes["gen_ai.usage.cache_creation.input_tokens"]
101+
end
102+
52103
def test_records_error_on_api_failure
53104
stub_request(:post, "https://api.openai.com/v1/chat/completions")
54105
.to_return(status: 500, body: "Internal Server Error")
@@ -170,12 +221,55 @@ def execute(expression:)
170221
assert_equal "execute_tool calculator", tool_span.name
171222
assert_equal "execute_tool", tool_span.attributes["gen_ai.operation.name"]
172223
assert_equal "calculator", tool_span.attributes["gen_ai.tool.name"]
224+
assert_equal "Performs math", tool_span.attributes["gen_ai.tool.description"]
173225
assert_equal '{"expression":"2+2"}', tool_span.attributes["gen_ai.tool.call.arguments"]
174226
assert_equal "4", tool_span.attributes["gen_ai.tool.call.result"]
175227
assert_equal "call_abc123", tool_span.attributes["gen_ai.tool.call.id"]
176228
assert_equal "function", tool_span.attributes["gen_ai.tool.type"]
177229
end
178230

231+
def test_records_error_when_tool_raises
232+
boom = Class.new(RubyLLM::Tool) do
233+
def self.name = "boom"
234+
description "Always raises"
235+
236+
def execute
237+
raise ArgumentError, "tool failure"
238+
end
239+
end
240+
241+
stub_request(:post, "https://api.openai.com/v1/chat/completions")
242+
.to_return(
243+
status: 200,
244+
headers: { "Content-Type" => "application/json" },
245+
body: {
246+
id: "chatcmpl-boom",
247+
model: "gpt-4o-mini",
248+
choices: [{
249+
index: 0,
250+
message: {
251+
role: "assistant",
252+
content: nil,
253+
tool_calls: [{
254+
id: "call_x",
255+
type: "function",
256+
function: { name: "boom", arguments: "{}" }
257+
}]
258+
},
259+
finish_reason: "tool_calls"
260+
}],
261+
usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 }
262+
}.to_json
263+
)
264+
265+
chat = RubyLLM.chat(model: "gpt-4o-mini").with_tool(boom)
266+
assert_raises(ArgumentError) { chat.ask("trigger") }
267+
268+
tool_span = EXPORTER.finished_spans.find { |s| s.name.start_with?("execute_tool ") }
269+
assert_equal "ArgumentError", tool_span.attributes["error.type"]
270+
assert_equal OpenTelemetry::Trace::Status::ERROR, tool_span.status.code
271+
end
272+
179273
def test_does_not_capture_content_by_default
180274
stub_request(:post, "https://api.openai.com/v1/chat/completions")
181275
.to_return(

0 commit comments

Comments
 (0)