Skip to content

Commit b75c17a

Browse files
committed
Add opt-in content capture for messages and system instructions
Support capturing gen_ai.system_instructions, gen_ai.input.messages and gen_ai.output.messages via capture_content config option or OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT env var. Content is not captured by default.
1 parent c7b4f09 commit b75c17a

6 files changed

Lines changed: 187 additions & 3 deletions

File tree

README.md

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,15 +30,44 @@ OpenTelemetry::SDK.configure do |c|
3030
end
3131
```
3232

33+
## Configuration
34+
35+
### Content capture
36+
37+
By default, message content is **not captured**. To enable it:
38+
39+
```ruby
40+
OpenTelemetry::SDK.configure do |c|
41+
c.use 'OpenTelemetry::Instrumentation::RubyLLM', capture_content: true
42+
end
43+
```
44+
45+
Or set the environment variable:
46+
47+
```bash
48+
export OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT=true
49+
```
50+
51+
When enabled, the following attributes are added to chat spans:
52+
53+
| Attribute | Description |
54+
|-----------|-------------|
55+
| `gen_ai.system_instructions` | System instructions provided via `with_instructions` |
56+
| `gen_ai.input.messages` | Input messages sent to the model |
57+
| `gen_ai.output.messages` | Final output messages from the model |
58+
59+
> [!WARNING]
60+
> Captured content may include sensitive or personally identifiable information (PII). Use with caution in production environments.
61+
3362
## What's traced?
3463

3564
| Feature | Status |
3665
|---------|--------|
3766
| Chat completions | Supported |
3867
| Tool calls | Supported |
3968
| Error handling | Supported |
69+
| Opt-in input/output content capture | Supported |
4070
| Conversation tracking (`gen_ai.conversation.id`) | Planned |
41-
| Opt-in input/output content capture | Planned |
4271
| System instructions capture | Planned |
4372
| Custom attributes on traces and spans | Planned |
4473
| Embeddings | Planned |

example/trace_demonstration_with_langfuse.rb

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
)
2727
)
2828
)
29-
c.use "OpenTelemetry::Instrumentation::RubyLLM"
29+
c.use "OpenTelemetry::Instrumentation::RubyLLM", capture_content: true
3030
end
3131

3232
RubyLLM.configure do |c|
@@ -35,6 +35,7 @@
3535
end
3636

3737
chat = RubyLLM.chat
38+
chat.with_instructions("You are a helpful assistant that provides concise answers.")
3839
response = chat.ask("What is the meaning of life?")
3940
puts "\nResponse: #{response.content}"
4041

example/trace_demonstration_with_langfuse_and_tools.rb

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
)
2727
)
2828
)
29-
c.use "OpenTelemetry::Instrumentation::RubyLLM"
29+
c.use "OpenTelemetry::Instrumentation::RubyLLM", capture_content: true
3030
end
3131

3232
RubyLLM.configure do |c|
@@ -44,9 +44,12 @@ def execute(expression:)
4444
end
4545

4646
chat = RubyLLM.chat
47+
chat.with_instructions("You are a helpful assistant that provides concise answers.")
4748
chat.with_tool(Calculator)
4849
response = chat.ask("Use the calculator tool to compute 123 * 456")
4950
puts "\nResponse: #{response.content}"
51+
response = chat.ask("Use the tool again to compute 789 + 1011")
52+
puts "\nResponse: #{response.content}"
5053

5154
# This line is only necessary in short-lived scripts. In a long-running application, spans will be flushed automatically.
5255
OpenTelemetry.tracer_provider.force_flush

lib/opentelemetry/instrumentation/ruby_llm/instrumentation.rb

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ class Instrumentation < OpenTelemetry::Instrumentation::Base
77
instrumentation_name "OpenTelemetry::Instrumentation::RubyLLM"
88
instrumentation_version VERSION
99

10+
option :capture_content, default: false, validate: :boolean
11+
1012
present do
1113
defined?(::RubyLLM)
1214
end

lib/opentelemetry/instrumentation/ruby_llm/patches/chat.rb

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,18 @@ def ask(message, &block)
3131
span.set_attribute("gen_ai.usage.input_tokens", response.input_tokens) if response.input_tokens
3232
span.set_attribute("gen_ai.usage.output_tokens", response.output_tokens) if response.output_tokens
3333
span.set_attribute("gen_ai.request.temperature", @temperature) if @temperature
34+
35+
if capture_content?
36+
system_messages = @messages.select { |m| m.role == :system }
37+
input_messages = @messages[0..-2].reject { |m| m.role == :system }
38+
39+
unless system_messages.empty?
40+
span.set_attribute("gen_ai.system_instructions", format_system_instructions(system_messages))
41+
end
42+
43+
span.set_attribute("gen_ai.input.messages", format_messages(input_messages))
44+
span.set_attribute("gen_ai.output.messages", format_messages([response]))
45+
end
3446
end
3547

3648
result
@@ -61,6 +73,39 @@ def execute_tool(tool_call)
6173

6274
private
6375

76+
def capture_content?
77+
env_value = ENV["OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT"]
78+
return env_value.to_s.strip.casecmp("true").zero? unless env_value.nil?
79+
80+
RubyLLM::Instrumentation.instance.config[:capture_content]
81+
end
82+
83+
def format_messages(messages)
84+
messages.map { |m| format_message(m) }.to_json
85+
end
86+
87+
def format_message(message)
88+
msg = { role: message.role.to_s, parts: [] }
89+
90+
if message.content
91+
msg[:parts] << { type: "text", content: message.content.to_s }
92+
end
93+
94+
if message.tool_calls&.any?
95+
message.tool_calls.each_value do |tc|
96+
msg[:parts] << { type: "tool_call", id: tc.id, name: tc.name, arguments: tc.arguments }
97+
end
98+
end
99+
100+
msg[:tool_call_id] = message.tool_call_id if message.tool_call_id
101+
102+
msg
103+
end
104+
105+
def format_system_instructions(system_messages)
106+
system_messages.map { |m| { type: "text", content: m.content.to_s } }.to_json
107+
end
108+
64109
def tracer
65110
RubyLLM::Instrumentation.instance.tracer
66111
end

test/instrumentation_test.rb

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,4 +228,108 @@ def execute(expression:)
228228
response = chat.ask("What is 2+2?")
229229
assert_equal "The answer is 4", response.content
230230
end
231+
232+
def test_does_not_capture_content_by_default
233+
stub_request(:post, "https://api.openai.com/v1/chat/completions")
234+
.to_return(
235+
status: 200,
236+
headers: { "Content-Type" => "application/json" },
237+
body: {
238+
id: "chatcmpl-123",
239+
object: "chat.completion",
240+
model: "gpt-4o-mini",
241+
choices: [{
242+
index: 0,
243+
message: { role: "assistant", content: "Hello, world!" },
244+
finish_reason: "stop"
245+
}],
246+
usage: { prompt_tokens: 10, completion_tokens: 5, total_tokens: 15 }
247+
}.to_json
248+
)
249+
250+
chat = RubyLLM.chat(model: "gpt-4o-mini")
251+
chat.with_instructions("You are helpful")
252+
chat.ask("Hi")
253+
254+
span = EXPORTER.finished_spans.first
255+
assert_nil span.attributes["gen_ai.system_instructions"]
256+
assert_nil span.attributes["gen_ai.input.messages"]
257+
assert_nil span.attributes["gen_ai.output.messages"]
258+
end
259+
260+
def test_captures_content_when_enabled
261+
OpenTelemetry::Instrumentation::RubyLLM::Instrumentation.instance.config[:capture_content] = true
262+
263+
stub_request(:post, "https://api.openai.com/v1/chat/completions")
264+
.to_return(
265+
status: 200,
266+
headers: { "Content-Type" => "application/json" },
267+
body: {
268+
id: "chatcmpl-123",
269+
object: "chat.completion",
270+
model: "gpt-4o-mini",
271+
choices: [{
272+
index: 0,
273+
message: { role: "assistant", content: "Hello, world!" },
274+
finish_reason: "stop"
275+
}],
276+
usage: { prompt_tokens: 10, completion_tokens: 5, total_tokens: 15 }
277+
}.to_json
278+
)
279+
280+
chat = RubyLLM.chat(model: "gpt-4o-mini")
281+
chat.with_instructions("You are helpful")
282+
chat.ask("Hi")
283+
284+
span = EXPORTER.finished_spans.first
285+
286+
system_instructions = JSON.parse(span.attributes["gen_ai.system_instructions"])
287+
assert_equal [{ "type" => "text", "content" => "You are helpful" }], system_instructions
288+
289+
input_messages = JSON.parse(span.attributes["gen_ai.input.messages"])
290+
assert_equal 1, input_messages.length
291+
assert_equal "user", input_messages[0]["role"]
292+
assert_equal [{ "type" => "text", "content" => "Hi" }], input_messages[0]["parts"]
293+
294+
output_messages = JSON.parse(span.attributes["gen_ai.output.messages"])
295+
assert_equal 1, output_messages.length
296+
assert_equal "assistant", output_messages[0]["role"]
297+
assert_equal [{ "type" => "text", "content" => "Hello, world!" }], output_messages[0]["parts"]
298+
ensure
299+
OpenTelemetry::Instrumentation::RubyLLM::Instrumentation.instance.config[:capture_content] = false
300+
end
301+
302+
def test_captures_content_when_enabled_via_env_var
303+
ENV["OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT"] = "true"
304+
305+
stub_request(:post, "https://api.openai.com/v1/chat/completions")
306+
.to_return(
307+
status: 200,
308+
headers: { "Content-Type" => "application/json" },
309+
body: {
310+
id: "chatcmpl-123",
311+
object: "chat.completion",
312+
model: "gpt-4o-mini",
313+
choices: [{
314+
index: 0,
315+
message: { role: "assistant", content: "Hello, world!" },
316+
finish_reason: "stop"
317+
}],
318+
usage: { prompt_tokens: 10, completion_tokens: 5, total_tokens: 15 }
319+
}.to_json
320+
)
321+
322+
chat = RubyLLM.chat(model: "gpt-4o-mini")
323+
chat.ask("Hi")
324+
325+
span = EXPORTER.finished_spans.first
326+
327+
input_messages = JSON.parse(span.attributes["gen_ai.input.messages"])
328+
assert_equal "user", input_messages[0]["role"]
329+
330+
output_messages = JSON.parse(span.attributes["gen_ai.output.messages"])
331+
assert_equal "assistant", output_messages[0]["role"]
332+
ensure
333+
ENV.delete("OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT")
334+
end
231335
end

0 commit comments

Comments
 (0)