From 2586ef0a6017ddde252f6713ad18a3c42e6f8cb8 Mon Sep 17 00:00:00 2001 From: Caio Ribeiro Date: Thu, 21 May 2026 00:03:42 +0000 Subject: [PATCH] feat: add context input telemetry event helper --- README.md | 34 ++++++++++++++++++++++++++ claude_telemetry/hooks.py | 50 +++++++++++++++++++++++++++++++++++++++ tests/test_hooks.py | 42 ++++++++++++++++++++++++++++++++ 3 files changed, 126 insertions(+) diff --git a/README.md b/README.md index 73aaa48..bfa1769 100644 --- a/README.md +++ b/README.md @@ -226,6 +226,40 @@ Every agent run creates a full trace showing exactly what happened: This gives you complete visibility into what your agent did, why it failed, and how much it cost. +### Optional context input events + +Tool and model spans show what the agent did after it started. Some headless +workflows also need to debug which repo instructions, skills, hooks, or memory +records entered the session in the first place. For that, `TelemetryHooks` exposes +an opt-in helper that records a privacy-preserving `context.input.loaded` event on +the session span. + +The event should carry paths/URIs, hashes, and categorical metadata only. Do not +attach raw prompt text, raw context bodies, tool arguments, secrets, memory +contents, or full transcripts unless your application has an explicit opt-in policy. + +```python +from claude_telemetry.hooks import TelemetryHooks + +hooks = TelemetryHooks() +# Start the session span using the normal hook flow first. +hooks.record_context_input_loaded( + kind="agent_instruction_file", + source_path="AGENTS.md", + source_bytes_hash="sha256:...", + delivered_hash="sha256:...", + loaded_by="claude_code", + activation="session_start", + scope="repo", + duplicate_suppression_policy="not_evaluated", + extra_attributes={"gen_ai.conversation.id": "session-123"}, +) +``` + +This is intentionally small: it lets wrappers or harnesses correlate context inputs +with the existing tool/model trace without making `claude_telemetry` inspect raw +project context. + ## Span Hierarchy ``` diff --git a/claude_telemetry/hooks.py b/claude_telemetry/hooks.py index 2d5446c..dd97688 100644 --- a/claude_telemetry/hooks.py +++ b/claude_telemetry/hooks.py @@ -504,6 +504,56 @@ async def on_user_prompt_submit( return {} + def record_context_input_loaded( + self, + *, + kind: str, + source_path: str | None = None, + source_uri: str | None = None, + source_bytes_hash: str | None = None, + delivered_hash: str | None = None, + loaded_by: str = "unknown", + activation: str = "unknown", + scope: str = "unknown", + duplicate_suppression_policy: str = "not_evaluated", + extra_attributes: dict[str, Any] | None = None, + ) -> None: + """Record a privacy-preserving context input event on the session span. + + This is an opt-in helper for harnesses that know when context was loaded + before or during an agent run. It records identities and categorical + metadata only; callers should pass hashes/paths/URIs, not raw prompt, + raw context text, tool arguments, or memory contents. + """ + if not self.session_span: + msg = "No active session span" + raise RuntimeError(msg) + + event_data: dict[str, Any] = { + "context.input.kind": kind, + "context.input.loaded_by": loaded_by, + "context.input.activation": activation, + "context.input.scope": scope, + "context.input.duplicate.suppression_policy": ( + duplicate_suppression_policy + ), + } + + optional_fields = { + "context.input.source.path": source_path, + "context.input.source.uri": source_uri, + "context.input.source.bytes_hash": source_bytes_hash, + "context.input.delivered.hash": delivered_hash, + } + for key, value in optional_fields.items(): + if value is not None: + event_data[key] = value + + if extra_attributes: + event_data.update(extra_attributes) + + self.session_span.add_event("context.input.loaded", event_data) + async def on_pre_tool_use( self, input_data: dict[str, Any], diff --git a/tests/test_hooks.py b/tests/test_hooks.py index 33642fe..b81803f 100644 --- a/tests/test_hooks.py +++ b/tests/test_hooks.py @@ -107,6 +107,48 @@ async def test_stores_message_history(self, hooks, mocker, mock_tracer): assert hooks.messages[0]["content"] == "Test prompt" +class TestContextInputLoaded: + """Tests for privacy-preserving context input events.""" + + def test_records_context_input_event(self, hooks, mocker): + """Context input events include hashes and metadata, not raw content.""" + hooks.session_span = mocker.MagicMock() + + hooks.record_context_input_loaded( + kind="agent_instruction_file", + source_path="AGENTS.md", + source_bytes_hash="sha256:source", + delivered_hash="sha256:delivered", + loaded_by="claude_code", + activation="session_start", + scope="repo", + duplicate_suppression_policy="suppress_equal_dedupe_key_within_scope", + extra_attributes={ + "gen_ai.conversation.id": "session-123", + "context.input.delivered.full_render.status": "available", + }, + ) + + hooks.session_span.add_event.assert_called_once() + event_name, event_data = hooks.session_span.add_event.call_args[0] + assert event_name == "context.input.loaded" + assert event_data["context.input.kind"] == "agent_instruction_file" + assert event_data["context.input.source.path"] == "AGENTS.md" + assert event_data["context.input.source.bytes_hash"] == "sha256:source" + assert event_data["context.input.delivered.hash"] == "sha256:delivered" + assert event_data["context.input.loaded_by"] == "claude_code" + assert event_data["context.input.activation"] == "session_start" + assert event_data["context.input.scope"] == "repo" + assert event_data["gen_ai.conversation.id"] == "session-123" + + def test_requires_active_session_span(self, hooks): + """Context input events need a session span boundary.""" + hooks.session_span = None + + with pytest.raises(RuntimeError, match="No active session span"): + hooks.record_context_input_loaded(kind="agent_instruction_file") + + class TestPreToolUse: """Tests for on_pre_tool_use hook."""