Add opt-in content capture for messages and system instructions

clarissalimab · clarissalimab · commit b75c17ab49b3 · 2026-02-07T17:17:00.000-03:00
Support capturing gen_ai.system_instructions, gen_ai.input.messages and
gen_ai.output.messages via capture_content config option or
OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT env var. Content is
not captured by default.
diff --git a/README.md b/README.md
@@ -30,15 +30,44 @@ OpenTelemetry::SDK.configure do |c|
 end
 ```
 
+## Configuration
+
+### Content capture
+
+By default, message content is **not captured**. To enable it:
+
+```ruby
+OpenTelemetry::SDK.configure do |c|
+  c.use 'OpenTelemetry::Instrumentation::RubyLLM', capture_content: true
+end
+```
+
+Or set the environment variable:
+
+```bash
+export OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT=true
+```
+
+When enabled, the following attributes are added to chat spans:
+
+| Attribute | Description |
+|-----------|-------------|
+| `gen_ai.system_instructions` | System instructions provided via `with_instructions` |
+| `gen_ai.input.messages` | Input messages sent to the model |
+| `gen_ai.output.messages` | Final output messages from the model |
+
+> [!WARNING]
+> Captured content may include sensitive or personally identifiable information (PII). Use with caution in production environments.
+
 ## What's traced?
 
 | Feature | Status |
 |---------|--------|
 | Chat completions | Supported |
 | Tool calls | Supported |
 | Error handling | Supported |
+| Opt-in input/output content capture | Supported |
 | Conversation tracking (`gen_ai.conversation.id`) | Planned |
-| Opt-in input/output content capture | Planned |
 | System instructions capture | Planned |
 | Custom attributes on traces and spans | Planned |
 | Embeddings | Planned |
diff --git a/example/trace_demonstration_with_langfuse.rb b/example/trace_demonstration_with_langfuse.rb
@@ -26,7 +26,7 @@
       )
     )
   )
-  c.use "OpenTelemetry::Instrumentation::RubyLLM"
+  c.use "OpenTelemetry::Instrumentation::RubyLLM", capture_content: true
 end
 
 RubyLLM.configure do |c|
@@ -35,6 +35,7 @@
 end
 
 chat = RubyLLM.chat
+chat.with_instructions("You are a helpful assistant that provides concise answers.")
 response = chat.ask("What is the meaning of life?")
 puts "\nResponse: #{response.content}"
 
diff --git a/example/trace_demonstration_with_langfuse_and_tools.rb b/example/trace_demonstration_with_langfuse_and_tools.rb
@@ -26,7 +26,7 @@
       )
     )
   )
-  c.use "OpenTelemetry::Instrumentation::RubyLLM"
+  c.use "OpenTelemetry::Instrumentation::RubyLLM", capture_content: true
 end
 
 RubyLLM.configure do |c|
@@ -44,9 +44,12 @@ def execute(expression:)
 end
 
 chat = RubyLLM.chat
+chat.with_instructions("You are a helpful assistant that provides concise answers.")
 chat.with_tool(Calculator)
 response = chat.ask("Use the calculator tool to compute 123 * 456")
 puts "\nResponse: #{response.content}"
+response = chat.ask("Use the tool again to compute 789 + 1011")
+puts "\nResponse: #{response.content}"
 
 # This line is only necessary in short-lived scripts. In a long-running application, spans will be flushed automatically.
 OpenTelemetry.tracer_provider.force_flush
diff --git a/lib/opentelemetry/instrumentation/ruby_llm/instrumentation.rb b/lib/opentelemetry/instrumentation/ruby_llm/instrumentation.rb
@@ -7,6 +7,8 @@ class Instrumentation < OpenTelemetry::Instrumentation::Base
         instrumentation_name "OpenTelemetry::Instrumentation::RubyLLM"
         instrumentation_version VERSION
 
+        option :capture_content, default: false, validate: :boolean
+
         present do
           defined?(::RubyLLM)
         end
diff --git a/lib/opentelemetry/instrumentation/ruby_llm/patches/chat.rb b/lib/opentelemetry/instrumentation/ruby_llm/patches/chat.rb
@@ -31,6 +31,18 @@ def ask(message, &block)
                 span.set_attribute("gen_ai.usage.input_tokens", response.input_tokens) if response.input_tokens
                 span.set_attribute("gen_ai.usage.output_tokens", response.output_tokens) if response.output_tokens
                 span.set_attribute("gen_ai.request.temperature", @temperature) if @temperature
+
+                if capture_content?
+                  system_messages = @messages.select { |m| m.role == :system }
+                  input_messages = @messages[0..-2].reject { |m| m.role == :system }
+
+                  unless system_messages.empty?
+                    span.set_attribute("gen_ai.system_instructions", format_system_instructions(system_messages))
+                  end
+
+                  span.set_attribute("gen_ai.input.messages", format_messages(input_messages))
+                  span.set_attribute("gen_ai.output.messages", format_messages([response]))
+                end
               end
 
               result
@@ -61,6 +73,39 @@ def execute_tool(tool_call)
 
           private
 
+          def capture_content?
+            env_value = ENV["OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT"]
+            return env_value.to_s.strip.casecmp("true").zero? unless env_value.nil?
+
+            RubyLLM::Instrumentation.instance.config[:capture_content]
+          end
+
+          def format_messages(messages)
+            messages.map { |m| format_message(m) }.to_json
+          end
+
+          def format_message(message)
+            msg = { role: message.role.to_s, parts: [] }
+
+            if message.content
+              msg[:parts] << { type: "text", content: message.content.to_s }
+            end
+
+            if message.tool_calls&.any?
+              message.tool_calls.each_value do |tc|
+                msg[:parts] << { type: "tool_call", id: tc.id, name: tc.name, arguments: tc.arguments }
+              end
+            end
+
+            msg[:tool_call_id] = message.tool_call_id if message.tool_call_id
+
+            msg
+          end
+
+          def format_system_instructions(system_messages)
+            system_messages.map { |m| { type: "text", content: m.content.to_s } }.to_json
+          end
+
           def tracer
             RubyLLM::Instrumentation.instance.tracer
           end
diff --git a/test/instrumentation_test.rb b/test/instrumentation_test.rb
@@ -228,4 +228,108 @@ def execute(expression:)
     response = chat.ask("What is 2+2?")
     assert_equal "The answer is 4", response.content
   end
+
+  def test_does_not_capture_content_by_default
+    stub_request(:post, "https://api.openai.com/v1/chat/completions")
+      .to_return(
+        status: 200,
+        headers: { "Content-Type" => "application/json" },
+        body: {
+          id: "chatcmpl-123",
+          object: "chat.completion",
+          model: "gpt-4o-mini",
+          choices: [{
+            index: 0,
+            message: { role: "assistant", content: "Hello, world!" },
+            finish_reason: "stop"
+          }],
+          usage: { prompt_tokens: 10, completion_tokens: 5, total_tokens: 15 }
+        }.to_json
+      )
+
+    chat = RubyLLM.chat(model: "gpt-4o-mini")
+    chat.with_instructions("You are helpful")
+    chat.ask("Hi")
+
+    span = EXPORTER.finished_spans.first
+    assert_nil span.attributes["gen_ai.system_instructions"]
+    assert_nil span.attributes["gen_ai.input.messages"]
+    assert_nil span.attributes["gen_ai.output.messages"]
+  end
+
+  def test_captures_content_when_enabled
+    OpenTelemetry::Instrumentation::RubyLLM::Instrumentation.instance.config[:capture_content] = true
+
+    stub_request(:post, "https://api.openai.com/v1/chat/completions")
+      .to_return(
+        status: 200,
+        headers: { "Content-Type" => "application/json" },
+        body: {
+          id: "chatcmpl-123",
+          object: "chat.completion",
+          model: "gpt-4o-mini",
+          choices: [{
+            index: 0,
+            message: { role: "assistant", content: "Hello, world!" },
+            finish_reason: "stop"
+          }],
+          usage: { prompt_tokens: 10, completion_tokens: 5, total_tokens: 15 }
+        }.to_json
+      )
+
+    chat = RubyLLM.chat(model: "gpt-4o-mini")
+    chat.with_instructions("You are helpful")
+    chat.ask("Hi")
+
+    span = EXPORTER.finished_spans.first
+
+    system_instructions = JSON.parse(span.attributes["gen_ai.system_instructions"])
+    assert_equal [{ "type" => "text", "content" => "You are helpful" }], system_instructions
+
+    input_messages = JSON.parse(span.attributes["gen_ai.input.messages"])
+    assert_equal 1, input_messages.length
+    assert_equal "user", input_messages[0]["role"]
+    assert_equal [{ "type" => "text", "content" => "Hi" }], input_messages[0]["parts"]
+
+    output_messages = JSON.parse(span.attributes["gen_ai.output.messages"])
+    assert_equal 1, output_messages.length
+    assert_equal "assistant", output_messages[0]["role"]
+    assert_equal [{ "type" => "text", "content" => "Hello, world!" }], output_messages[0]["parts"]
+  ensure
+    OpenTelemetry::Instrumentation::RubyLLM::Instrumentation.instance.config[:capture_content] = false
+  end
+
+  def test_captures_content_when_enabled_via_env_var
+    ENV["OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT"] = "true"
+
+    stub_request(:post, "https://api.openai.com/v1/chat/completions")
+      .to_return(
+        status: 200,
+        headers: { "Content-Type" => "application/json" },
+        body: {
+          id: "chatcmpl-123",
+          object: "chat.completion",
+          model: "gpt-4o-mini",
+          choices: [{
+            index: 0,
+            message: { role: "assistant", content: "Hello, world!" },
+            finish_reason: "stop"
+          }],
+          usage: { prompt_tokens: 10, completion_tokens: 5, total_tokens: 15 }
+        }.to_json
+      )
+
+    chat = RubyLLM.chat(model: "gpt-4o-mini")
+    chat.ask("Hi")
+
+    span = EXPORTER.finished_spans.first
+
+    input_messages = JSON.parse(span.attributes["gen_ai.input.messages"])
+    assert_equal "user", input_messages[0]["role"]
+
+    output_messages = JSON.parse(span.attributes["gen_ai.output.messages"])
+    assert_equal "assistant", output_messages[0]["role"]
+  ensure
+    ENV.delete("OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT")
+  end
 end

Original file line number	Diff line number	Diff line change
`@@ -26,7 +26,7 @@`
`26`	`26`	`)`
`27`	`27`	`)`
`28`	`28`	`)`
`29`		`- c.use "OpenTelemetry::Instrumentation::RubyLLM"`
	`29`	`+ c.use "OpenTelemetry::Instrumentation::RubyLLM", capture_content: true`
`30`	`30`	`end`
`31`	`31`
`32`	`32`	`RubyLLM.configure do \|c\|`
`@@ -35,6 +35,7 @@`
`35`	`35`	`end`
`36`	`36`
`37`	`37`	`chat = RubyLLM.chat`
	`38`	`+chat.with_instructions("You are a helpful assistant that provides concise answers.")`
`38`	`39`	`response = chat.ask("What is the meaning of life?")`
`39`	`40`	`puts "\nResponse: #{response.content}"`
`40`	`41`