Skip to content

Commit 743389a

Browse files
Merge pull request #79 from agentevals-dev/chore/content-extraction-dedup
Consolidate text extraction into a single source of truth
2 parents a56f5fb + 1a15507 commit 743389a

4 files changed

Lines changed: 52 additions & 74 deletions

File tree

src/agentevals/converter.py

Lines changed: 21 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,12 @@
1818
from google.adk.evaluation.eval_case import IntermediateData, Invocation
1919
from google.genai import types as genai_types
2020

21-
from .extraction import get_extractor, parse_json
21+
from .extraction import (
22+
extract_agent_response_from_attrs,
23+
extract_user_text_from_attrs,
24+
get_extractor,
25+
parse_json,
26+
)
2227
from .loader.base import Span, Trace
2328
from .trace_attrs import (
2429
ADK_INVOCATION_ID,
@@ -152,50 +157,34 @@ def _walk(span: Span, op_prefix: str, acc: list[Span]) -> None:
152157

153158

154159
def _extract_user_content(first_call_llm: Span) -> genai_types.Content:
155-
"""Extract user input from the first call_llm span's llm_request tag."""
160+
"""Extract user input from the first call_llm span's attributes via shared extractor."""
161+
text = extract_user_text_from_attrs(first_call_llm.tags)
162+
if text:
163+
return genai_types.Content(
164+
role="user",
165+
parts=[genai_types.Part(text=text)],
166+
)
156167
llm_request_raw = first_call_llm.get_tag(ADK_LLM_REQUEST, "{}")
157168
llm_request = parse_json(llm_request_raw)
158-
contents = llm_request.get("contents", [])
159-
160-
for content_dict in reversed(contents):
161-
if content_dict.get("role") != "user":
162-
continue
163-
parts = content_dict.get("parts", [])
164-
# Skip function_response parts — only want actual user text messages
165-
text_parts = [p for p in parts if "text" in p]
166-
if text_parts:
167-
return genai_types.Content(
168-
role="user",
169-
parts=[genai_types.Part(text=p["text"]) for p in text_parts],
170-
)
171-
172-
for content_dict in contents:
169+
for content_dict in llm_request.get("contents", []):
173170
if content_dict.get("role") == "user":
174171
return _content_from_dict(content_dict)
175-
176172
raise ValueError(f"call_llm span {first_call_llm.span_id}: no user content found in llm_request")
177173

178174

179175
def _extract_final_response(last_call_llm: Span) -> genai_types.Content:
180-
"""Extract final text response from the last call_llm span's llm_response tag."""
176+
"""Extract final text response from the last call_llm span's attributes via shared extractor."""
177+
text = extract_agent_response_from_attrs(last_call_llm.tags)
178+
if text:
179+
return genai_types.Content(
180+
role="model",
181+
parts=[genai_types.Part(text=text)],
182+
)
181183
llm_response_raw = last_call_llm.get_tag(ADK_LLM_RESPONSE, "{}")
182184
llm_response = parse_json(llm_response_raw)
183-
184185
content_dict = llm_response.get("content", {})
185186
if not content_dict:
186187
raise ValueError(f"call_llm span {last_call_llm.span_id}: no content in llm_response")
187-
188-
parts_dicts = content_dict.get("parts", [])
189-
# Final response should have text parts, not function_call parts
190-
text_parts = [p for p in parts_dicts if "text" in p]
191-
if text_parts:
192-
return genai_types.Content(
193-
role="model",
194-
parts=[genai_types.Part(text=p["text"]) for p in text_parts],
195-
)
196-
197-
# If the last call_llm only has function_call parts, that's unexpected
198-
# for a final response — the agent may have been cut short.
199188
logger.warning(
200189
"call_llm span %s: last llm_response has no text parts, may not be the actual final response",
201190
last_call_llm.span_id,

src/agentevals/extraction.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ def extract_agent_response_from_attrs(attrs: dict[str, Any]) -> str | None:
100100
if messages_raw:
101101
messages = parse_json_attr(messages_raw, "gen_ai.output.messages")
102102
if isinstance(messages, list):
103-
for msg in messages:
103+
for msg in reversed(messages):
104104
if isinstance(msg, dict) and msg.get("role") in ASSISTANT_ROLES:
105105
text = extract_text_from_message(msg)
106106
if text:

src/agentevals/genai_converter.py

Lines changed: 19 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,14 @@
1414
from google.genai import types as genai_types
1515

1616
from .converter import ConversionResult
17-
from .extraction import GenAIExtractor, is_invocation_span, is_llm_span, parse_tool_response_content
17+
from .extraction import (
18+
GenAIExtractor,
19+
extract_agent_response_from_attrs,
20+
extract_user_text_from_attrs,
21+
is_invocation_span,
22+
is_llm_span,
23+
parse_tool_response_content,
24+
)
1825
from .loader.base import Span, Trace
1926
from .trace_attrs import (
2027
OTEL_GENAI_INPUT_MESSAGES,
@@ -307,50 +314,21 @@ def _turn_to_invocation(turn: _ConversationTurn) -> Invocation:
307314

308315

309316
def _extract_user_text(llm_span: Span) -> str:
310-
messages_raw = llm_span.get_tag(OTEL_GENAI_INPUT_MESSAGES, "[]")
311-
messages = parse_json_attr(messages_raw, "gen_ai.input.messages")
312-
313-
if not isinstance(messages, list):
314-
messages = []
315-
316-
for msg in reversed(messages):
317-
if not isinstance(msg, dict):
318-
continue
319-
if msg.get("role") in USER_ROLES:
320-
text = extract_text_from_message(msg)
321-
if text:
322-
logger.debug(f"Found user message: {text[:100]}")
323-
return text
324-
325-
logger.warning(f"No user message found in {len(messages)} messages")
326-
raise ValueError(f"LLM span {llm_span.span_id}: no user message found in gen_ai.input.messages")
317+
text = extract_user_text_from_attrs(llm_span.tags)
318+
if text:
319+
return text
320+
raise ValueError(
321+
f"LLM span {llm_span.span_id}: no user message found (checked gen_ai.input.messages and ADK llm_request)"
322+
)
327323

328324

329325
def _extract_assistant_text(llm_span: Span) -> str:
330-
messages_raw = llm_span.get_tag(OTEL_GENAI_OUTPUT_MESSAGES, "[]")
331-
messages = parse_json_attr(messages_raw, "gen_ai.output.messages")
332-
333-
if not isinstance(messages, list):
334-
messages = []
335-
336-
logger.debug(f"Extracting final response from {len(messages)} output messages")
337-
for i, msg in enumerate(messages):
338-
if isinstance(msg, dict):
339-
logger.debug(
340-
f" Message {i}: role={msg.get('role')}, content_len={len(msg.get('content', ''))}, has_tool_calls={bool(msg.get('tool_calls'))}"
341-
)
342-
343-
for msg in reversed(messages):
344-
if not isinstance(msg, dict):
345-
continue
346-
if msg.get("role") in ASSISTANT_ROLES:
347-
text = extract_text_from_message(msg)
348-
if text:
349-
logger.debug(f"Found assistant message with text: {text[:100]}")
350-
return text
351-
326+
text = extract_agent_response_from_attrs(llm_span.tags)
327+
if text:
328+
return text
352329
logger.warning(
353-
f"LLM span {llm_span.span_id}: no assistant message with content in gen_ai.output.messages ({len(messages)} messages)"
330+
"LLM span %s: no assistant message with content in span attributes",
331+
llm_span.span_id,
354332
)
355333
return ""
356334

tests/test_extraction.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,17 @@ def test_adk_no_text_parts(self):
194194
attrs = {ADK_LLM_RESPONSE: json.dumps({"content": {"parts": [{"function_call": {"name": "tool"}}]}})}
195195
assert extract_agent_response_from_attrs(attrs) is None
196196

197+
def test_genai_prefers_last_assistant(self):
198+
attrs = {
199+
OTEL_GENAI_OUTPUT_MESSAGES: json.dumps(
200+
[
201+
{"role": "assistant", "content": "First response"},
202+
{"role": "assistant", "content": "Second response"},
203+
]
204+
)
205+
}
206+
assert extract_agent_response_from_attrs(attrs) == "Second response"
207+
197208

198209
# ---------------------------------------------------------------------------
199210
# extract_token_usage_from_attrs

0 commit comments

Comments
 (0)