From 2586ef0a6017ddde252f6713ad18a3c42e6f8cb8 Mon Sep 17 00:00:00 2001
From: Caio Ribeiro <caio.ribeiro.clw@gmail.com>
Date: Thu, 21 May 2026 00:03:42 +0000
Subject: [PATCH] feat: add context input telemetry event helper

---
 README.md                 | 34 ++++++++++++++++++++++++++
 claude_telemetry/hooks.py | 50 +++++++++++++++++++++++++++++++++++++++
 tests/test_hooks.py       | 42 ++++++++++++++++++++++++++++++++
 3 files changed, 126 insertions(+)

diff --git a/README.md b/README.md
index 73aaa48..bfa1769 100644
--- a/README.md
+++ b/README.md
@@ -226,6 +226,40 @@ Every agent run creates a full trace showing exactly what happened:
 This gives you complete visibility into what your agent did, why it failed, and how much
 it cost.
 
+### Optional context input events
+
+Tool and model spans show what the agent did after it started. Some headless
+workflows also need to debug which repo instructions, skills, hooks, or memory
+records entered the session in the first place. For that, `TelemetryHooks` exposes
+an opt-in helper that records a privacy-preserving `context.input.loaded` event on
+the session span.
+
+The event should carry paths/URIs, hashes, and categorical metadata only. Do not
+attach raw prompt text, raw context bodies, tool arguments, secrets, memory
+contents, or full transcripts unless your application has an explicit opt-in policy.
+
+```python
+from claude_telemetry.hooks import TelemetryHooks
+
+hooks = TelemetryHooks()
+# Start the session span using the normal hook flow first.
+hooks.record_context_input_loaded(
+    kind="agent_instruction_file",
+    source_path="AGENTS.md",
+    source_bytes_hash="sha256:...",
+    delivered_hash="sha256:...",
+    loaded_by="claude_code",
+    activation="session_start",
+    scope="repo",
+    duplicate_suppression_policy="not_evaluated",
+    extra_attributes={"gen_ai.conversation.id": "session-123"},
+)
+```
+
+This is intentionally small: it lets wrappers or harnesses correlate context inputs
+with the existing tool/model trace without making `claude_telemetry` inspect raw
+project context.
+
 ## Span Hierarchy
 
 ```
diff --git a/claude_telemetry/hooks.py b/claude_telemetry/hooks.py
index 2d5446c..dd97688 100644
--- a/claude_telemetry/hooks.py
+++ b/claude_telemetry/hooks.py
@@ -504,6 +504,56 @@ async def on_user_prompt_submit(
 
         return {}
 
+    def record_context_input_loaded(
+        self,
+        *,
+        kind: str,
+        source_path: str | None = None,
+        source_uri: str | None = None,
+        source_bytes_hash: str | None = None,
+        delivered_hash: str | None = None,
+        loaded_by: str = "unknown",
+        activation: str = "unknown",
+        scope: str = "unknown",
+        duplicate_suppression_policy: str = "not_evaluated",
+        extra_attributes: dict[str, Any] | None = None,
+    ) -> None:
+        """Record a privacy-preserving context input event on the session span.
+
+        This is an opt-in helper for harnesses that know when context was loaded
+        before or during an agent run. It records identities and categorical
+        metadata only; callers should pass hashes/paths/URIs, not raw prompt,
+        raw context text, tool arguments, or memory contents.
+        """
+        if not self.session_span:
+            msg = "No active session span"
+            raise RuntimeError(msg)
+
+        event_data: dict[str, Any] = {
+            "context.input.kind": kind,
+            "context.input.loaded_by": loaded_by,
+            "context.input.activation": activation,
+            "context.input.scope": scope,
+            "context.input.duplicate.suppression_policy": (
+                duplicate_suppression_policy
+            ),
+        }
+
+        optional_fields = {
+            "context.input.source.path": source_path,
+            "context.input.source.uri": source_uri,
+            "context.input.source.bytes_hash": source_bytes_hash,
+            "context.input.delivered.hash": delivered_hash,
+        }
+        for key, value in optional_fields.items():
+            if value is not None:
+                event_data[key] = value
+
+        if extra_attributes:
+            event_data.update(extra_attributes)
+
+        self.session_span.add_event("context.input.loaded", event_data)
+
     async def on_pre_tool_use(
         self,
         input_data: dict[str, Any],
diff --git a/tests/test_hooks.py b/tests/test_hooks.py
index 33642fe..b81803f 100644
--- a/tests/test_hooks.py
+++ b/tests/test_hooks.py
@@ -107,6 +107,48 @@ async def test_stores_message_history(self, hooks, mocker, mock_tracer):
         assert hooks.messages[0]["content"] == "Test prompt"
 
 
+class TestContextInputLoaded:
+    """Tests for privacy-preserving context input events."""
+
+    def test_records_context_input_event(self, hooks, mocker):
+        """Context input events include hashes and metadata, not raw content."""
+        hooks.session_span = mocker.MagicMock()
+
+        hooks.record_context_input_loaded(
+            kind="agent_instruction_file",
+            source_path="AGENTS.md",
+            source_bytes_hash="sha256:source",
+            delivered_hash="sha256:delivered",
+            loaded_by="claude_code",
+            activation="session_start",
+            scope="repo",
+            duplicate_suppression_policy="suppress_equal_dedupe_key_within_scope",
+            extra_attributes={
+                "gen_ai.conversation.id": "session-123",
+                "context.input.delivered.full_render.status": "available",
+            },
+        )
+
+        hooks.session_span.add_event.assert_called_once()
+        event_name, event_data = hooks.session_span.add_event.call_args[0]
+        assert event_name == "context.input.loaded"
+        assert event_data["context.input.kind"] == "agent_instruction_file"
+        assert event_data["context.input.source.path"] == "AGENTS.md"
+        assert event_data["context.input.source.bytes_hash"] == "sha256:source"
+        assert event_data["context.input.delivered.hash"] == "sha256:delivered"
+        assert event_data["context.input.loaded_by"] == "claude_code"
+        assert event_data["context.input.activation"] == "session_start"
+        assert event_data["context.input.scope"] == "repo"
+        assert event_data["gen_ai.conversation.id"] == "session-123"
+
+    def test_requires_active_session_span(self, hooks):
+        """Context input events need a session span boundary."""
+        hooks.session_span = None
+
+        with pytest.raises(RuntimeError, match="No active session span"):
+            hooks.record_context_input_loaded(kind="agent_instruction_file")
+
+
 class TestPreToolUse:
     """Tests for on_pre_tool_use hook."""