feat: add GenAI semconv attributes for streaming, prompt-cache tokens, and tool spans

oleksii-leonov · oleksii-leonov · commit 40f2eb56aafc · 2026-05-22T11:45:50.000Z
Adding more tags from OpenTelemetry GenAI semantic conventions https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-spans/. `Chat#complete`: - `gen_ai.request.stream` - `gen_ai.usage.cache_read.input_tokens` - `gen_ai.usage.cache_creation.input_tokens` `Chat#execute_tool`: - `error.type` + record_exception + ERROR status on rescue. - `gen_ai.tool.description`
diff --git a/lib/opentelemetry/instrumentation/ruby_llm/patches/chat.rb b/lib/opentelemetry/instrumentation/ruby_llm/patches/chat.rb
@@ -19,6 +19,9 @@ def complete(&)
               "gen_ai.provider.name" => provider,
               "gen_ai.request.model" => model_id,
             }
+            # Per GenAI semconv: set `gen_ai.request.stream` if and only if
+            # the request is streaming. Absence means non-streaming.
+            attributes["gen_ai.request.stream"] = true if block_given?
 
             tracer.in_span("chat #{model_id}", attributes: attributes, kind: OpenTelemetry::Trace::SpanKind::CLIENT) do |span|
               begin
@@ -37,6 +40,17 @@ def complete(&)
                 span.set_attribute("gen_ai.usage.output_tokens", response.output_tokens) if response.output_tokens
                 span.set_attribute("gen_ai.request.temperature", @temperature) if @temperature
 
+                # Prompt-cache token accessors were added in ruby_llm 1.9.0
+                # (commit 869a755f, Anthropic Prompt Caching). Graceful skip
+                # on older versions instead of NoMethodError.
+                if response.respond_to?(:cached_tokens) && response.cached_tokens
+                  span.set_attribute("gen_ai.usage.cache_read.input_tokens", response.cached_tokens)
+                end
+
+                if response.respond_to?(:cache_creation_tokens) && response.cache_creation_tokens
+                  span.set_attribute("gen_ai.usage.cache_creation.input_tokens", response.cache_creation_tokens)
+                end
+
                 if capture_content?
                   system_messages = @messages.select { |m| m.role == :system }
                   input_messages = @messages[0..-2].reject { |m| m.role == :system }
@@ -62,13 +76,24 @@ def execute_tool(tool_call)
               "gen_ai.tool.name" => tool_call.name,
               "gen_ai.tool.call.id" => tool_call.id,
               "gen_ai.tool.call.arguments" => tool_call.arguments.to_json,
-              "gen_ai.tool.type" => "function"
-            }
+              "gen_ai.tool.type" => "function",
+              "gen_ai.tool.description" => tools[tool_call.name.to_sym]&.description
+            }.compact
 
             tracer.in_span("execute_tool #{tool_call.name}", attributes: attributes, kind: OpenTelemetry::Trace::SpanKind::INTERNAL) do |span|
-              result = super
-              result_str = result.is_a?(::RubyLLM::Tool::Halt) ? result.content.to_s : result.to_s
-              span.set_attribute("gen_ai.tool.call.result", result_str[0..500])
+              begin
+                result = super
+              rescue => e
+                span.record_exception(e)
+                span.status = OpenTelemetry::Trace::Status.error(e.message)
+                span.set_attribute("error.type", e.class.name)
+                raise
+              end
+
+              # `RubyLLM::Tool::Halt#to_s` returns `@content.to_s`, so a single
+              # `to_s` covers both the Halt and plain-result cases.
+              span.set_attribute("gen_ai.tool.call.result", result.to_s[0..500])
+
               result
             end
           end
diff --git a/test/instrumentation_test.rb b/test/instrumentation_test.rb
@@ -45,10 +45,61 @@ def test_creates_span_with_attributes
     assert_equal "openai", span.attributes["gen_ai.provider.name"]
     assert_equal "gpt-4o-mini", span.attributes["gen_ai.request.model"]
     assert_equal "chat", span.attributes["gen_ai.operation.name"]
+    # Per GenAI semconv, `gen_ai.request.stream` is set only when streaming.
+    assert_nil span.attributes["gen_ai.request.stream"]
     assert_equal 10, span.attributes["gen_ai.usage.input_tokens"]
     assert_equal 5, span.attributes["gen_ai.usage.output_tokens"]
   end
 
+  def test_marks_streaming_chat_requests
+    stub_request(:post, "https://api.openai.com/v1/chat/completions")
+      .to_return(
+        status: 200,
+        headers: { "Content-Type" => "application/json" },
+        body: {
+          id: "chatcmpl-123",
+          model: "gpt-4o-mini",
+          choices: [{ index: 0, message: { role: "assistant", content: "Hi" }, finish_reason: "stop" }],
+          usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 }
+        }.to_json
+      )
+
+    chat = RubyLLM.chat(model: "gpt-4o-mini")
+    chat.ask("Hi") { |_chunk| }
+
+    span = EXPORTER.finished_spans.first
+    assert_equal true, span.attributes["gen_ai.request.stream"]
+  end
+
+  def test_records_prompt_cache_tokens
+    # RubyLLM's OpenAI provider maps `cached_tokens` ← `cache_read_tokens(usage)`
+    # and `cache_creation_tokens` ← `cache_write_tokens(usage)`, both surfaced
+    # on `Message#cached_tokens` / `Message#cache_creation_tokens`.
+    stub_request(:post, "https://api.openai.com/v1/chat/completions")
+      .to_return(
+        status: 200,
+        headers: { "Content-Type" => "application/json" },
+        body: {
+          id: "chatcmpl-cache",
+          model: "gpt-4o-mini",
+          choices: [{ index: 0, message: { role: "assistant", content: "Hello!" }, finish_reason: "stop" }],
+          usage: {
+            prompt_tokens: 100,
+            completion_tokens: 5,
+            total_tokens: 105,
+            prompt_tokens_details: { cached_tokens: 75, cache_write_tokens: 20 }
+          }
+        }.to_json
+      )
+
+    chat = RubyLLM.chat(model: "gpt-4o-mini")
+    chat.ask("Hi")
+
+    span = EXPORTER.finished_spans.first
+    assert_equal 75, span.attributes["gen_ai.usage.cache_read.input_tokens"]
+    assert_equal 20, span.attributes["gen_ai.usage.cache_creation.input_tokens"]
+  end
+
   def test_records_error_on_api_failure
     stub_request(:post, "https://api.openai.com/v1/chat/completions")
       .to_return(status: 500, body: "Internal Server Error")
@@ -170,12 +221,55 @@ def execute(expression:)
     assert_equal "execute_tool calculator", tool_span.name
     assert_equal "execute_tool", tool_span.attributes["gen_ai.operation.name"]
     assert_equal "calculator", tool_span.attributes["gen_ai.tool.name"]
+    assert_equal "Performs math", tool_span.attributes["gen_ai.tool.description"]
     assert_equal '{"expression":"2+2"}', tool_span.attributes["gen_ai.tool.call.arguments"]
     assert_equal "4", tool_span.attributes["gen_ai.tool.call.result"]
     assert_equal "call_abc123", tool_span.attributes["gen_ai.tool.call.id"]
     assert_equal "function", tool_span.attributes["gen_ai.tool.type"]
   end
 
+  def test_records_error_when_tool_raises
+    boom = Class.new(RubyLLM::Tool) do
+      def self.name = "boom"
+      description "Always raises"
+
+      def execute
+        raise ArgumentError, "tool failure"
+      end
+    end
+
+    stub_request(:post, "https://api.openai.com/v1/chat/completions")
+      .to_return(
+        status: 200,
+        headers: { "Content-Type" => "application/json" },
+        body: {
+          id: "chatcmpl-boom",
+          model: "gpt-4o-mini",
+          choices: [{
+            index: 0,
+            message: {
+              role: "assistant",
+              content: nil,
+              tool_calls: [{
+                id: "call_x",
+                type: "function",
+                function: { name: "boom", arguments: "{}" }
+              }]
+            },
+            finish_reason: "tool_calls"
+          }],
+          usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 }
+        }.to_json
+      )
+
+    chat = RubyLLM.chat(model: "gpt-4o-mini").with_tool(boom)
+    assert_raises(ArgumentError) { chat.ask("trigger") }
+
+    tool_span = EXPORTER.finished_spans.find { |s| s.name.start_with?("execute_tool ") }
+    assert_equal "ArgumentError", tool_span.attributes["error.type"]
+    assert_equal OpenTelemetry::Trace::Status::ERROR, tool_span.status.code
+  end
+
   def test_does_not_capture_content_by_default
     stub_request(:post, "https://api.openai.com/v1/chat/completions")
       .to_return(