fix: support legacy gen_ai.prompt/completion attributes for Ollama traces

mesutoezdil · mesutoezdil · commit 369fe1d93ca5 · 2026-05-05T15:12:12.000+02:00
Instrumentors like opentelemetry-instrumentation-ollama emit flat indexed attributes instead of the current gen_ai.input.messages JSON arrays. Without a fallback, user text and agent responses are not extracted from Ollama traces. Closes #88
diff --git a/src/agentevals/extraction.py b/src/agentevals/extraction.py
@@ -58,6 +58,43 @@
 
 FORMAT_DETECTION_SPAN_LIMIT = 10
 
+
+def _parse_legacy_indexed_attrs(attrs: dict[str, Any], prefix: str) -> list[dict]:
+    """Parse flat gen_ai.{prefix}.N.* attributes into a message list."""
+    messages: dict[int, dict] = {}
+    for key, value in attrs.items():
+        if not key.startswith(prefix):
+            continue
+        rest = key[len(prefix):]
+        parts = rest.split(".", 1)
+        if not parts[0].isdigit():
+            continue
+        idx = int(parts[0])
+        msg = messages.setdefault(idx, {})
+        if len(parts) < 2:
+            continue
+        field = parts[1]
+        if field == "role":
+            msg["role"] = value
+        elif field == "content":
+            msg["content"] = value
+        elif field.startswith("tool_calls."):
+            tc_rest = field[len("tool_calls."):]
+            tc_parts = tc_rest.split(".", 1)
+            if not tc_parts[0].isdigit() or len(tc_parts) < 2:
+                continue
+            tc_map = msg.setdefault("_tc", {})
+            tc_map.setdefault(int(tc_parts[0]), {})[tc_parts[1]] = value
+    result = []
+    for idx in sorted(messages):
+        msg = messages[idx].copy()
+        tc_map = msg.pop("_tc", {})
+        if tc_map:
+            msg["tool_calls"] = [tc_map[i] for i in sorted(tc_map)]
+        result.append(msg)
+    return result
+
+
 # ---------------------------------------------------------------------------
 # Pure extraction functions (operate on flat attribute dicts)
 # ---------------------------------------------------------------------------
@@ -92,6 +129,12 @@ def extract_user_text_from_attrs(attrs: dict[str, Any]) -> str | None:
                     if text:
                         return text
 
+    for msg in reversed(_parse_legacy_indexed_attrs(attrs, "gen_ai.prompt.")):
+        if msg.get("role") in USER_ROLES:
+            text = extract_text_from_message(msg)
+            if text:
+                return text
+
     return None
 
 
@@ -118,6 +161,12 @@ def extract_agent_response_from_attrs(attrs: dict[str, Any]) -> str | None:
                     if text:
                         return text
 
+    for msg in reversed(_parse_legacy_indexed_attrs(attrs, "gen_ai.completion.")):
+        if msg.get("role") in ASSISTANT_ROLES:
+            text = extract_text_from_message(msg)
+            if text:
+                return text
+
     return None
 
 
diff --git a/tests/test_extraction.py b/tests/test_extraction.py
@@ -802,3 +802,53 @@ def test_absent_type_and_description(self):
         result = extract_tool_call_from_attrs(attrs)
         assert "type" not in result
         assert "description" not in result
+
+
+# ---------------------------------------------------------------------------
+# Legacy gen_ai.prompt.* / gen_ai.completion.* attributes (Ollama style)
+# ---------------------------------------------------------------------------
+
+
+class TestLegacyGenAIAttributes:
+    def test_user_text_from_legacy_prompt(self):
+        attrs = {
+            "gen_ai.prompt.0.role": "user",
+            "gen_ai.prompt.0.content": "Hi! Can you help me?",
+            "gen_ai.request.model": "llama3.2:3b",
+        }
+        assert extract_user_text_from_attrs(attrs) == "Hi! Can you help me?"
+
+    def test_user_text_prefers_last_user_in_legacy_prompt(self):
+        attrs = {
+            "gen_ai.prompt.0.role": "user",
+            "gen_ai.prompt.0.content": "First message",
+            "gen_ai.prompt.1.role": "assistant",
+            "gen_ai.prompt.1.content": "Response",
+            "gen_ai.prompt.2.role": "user",
+            "gen_ai.prompt.2.content": "Follow-up",
+        }
+        assert extract_user_text_from_attrs(attrs) == "Follow-up"
+
+    def test_agent_response_from_legacy_completion(self):
+        attrs = {
+            "gen_ai.completion.0.role": "assistant",
+            "gen_ai.completion.0.content": "You rolled a 4 on a 6-sided die.",
+            "gen_ai.request.model": "llama3.2:3b",
+        }
+        assert extract_agent_response_from_attrs(attrs) == "You rolled a 4 on a 6-sided die."
+
+    def test_legacy_prompt_ignored_when_standard_attr_present(self):
+        attrs = {
+            OTEL_GENAI_INPUT_MESSAGES: json.dumps([{"role": "user", "content": "Standard wins"}]),
+            "gen_ai.prompt.0.role": "user",
+            "gen_ai.prompt.0.content": "Legacy loses",
+        }
+        assert extract_user_text_from_attrs(attrs) == "Standard wins"
+
+    def test_legacy_completion_ignored_when_standard_attr_present(self):
+        attrs = {
+            OTEL_GENAI_OUTPUT_MESSAGES: json.dumps([{"role": "assistant", "content": "Standard wins"}]),
+            "gen_ai.completion.0.role": "assistant",
+            "gen_ai.completion.0.content": "Legacy loses",
+        }
+        assert extract_agent_response_from_attrs(attrs) == "Standard wins"