From 948094b4357e410238be499921cded7088416349 Mon Sep 17 00:00:00 2001 From: erauner Date: Tue, 12 May 2026 22:49:19 -0500 Subject: [PATCH 1/4] Support ADK generate_content LLM spans --- src/agentevals/converter.py | 43 +++++++++--------- src/agentevals/extraction.py | 70 +++++++++++++++++++++-------- tests/test_converter.py | 86 ++++++++++++++++++++++++++++++++++++ tests/test_extraction.py | 34 ++++++++++++++ 4 files changed, 195 insertions(+), 38 deletions(-) diff --git a/src/agentevals/converter.py b/src/agentevals/converter.py index 4e4b3b8..c5255e1 100644 --- a/src/agentevals/converter.py +++ b/src/agentevals/converter.py @@ -23,6 +23,7 @@ extract_tool_call_from_span, extract_tool_result_from_span, extract_user_text_from_attrs, + find_adk_llm_spans_in, get_extractor, has_adk_descendant, is_adk_scope, @@ -127,15 +128,17 @@ def _find_adk_spans(trace: Trace, operation: str) -> list[Span]: def _convert_invoke_span(invoke_span: Span) -> Invocation: - call_llm_spans = _find_children_by_op(invoke_span, "call_llm") - if not call_llm_spans: - raise ValueError(f"invoke_agent span {invoke_span.span_id} has no child call_llm spans") + llm_spans = find_adk_llm_spans_in(invoke_span) + if not llm_spans: + raise ValueError( + f"invoke_agent span {invoke_span.span_id} has no converter-compatible ADK LLM descendants" + ) tool_spans = _find_children_by_op(invoke_span, "execute_tool") - user_content = _extract_user_content(call_llm_spans[0]) - final_response = _extract_final_response(call_llm_spans[-1]) - tool_uses, tool_responses = _extract_tool_trajectory(call_llm_spans, tool_spans) + user_content = _extract_user_content(llm_spans[0]) + final_response = _extract_final_response(llm_spans[-1]) + tool_uses, tool_responses = _extract_tool_trajectory(llm_spans, tool_spans) intermediate_data = IntermediateData( tool_uses=tool_uses, @@ -177,8 +180,8 @@ def _extract_user_content(first_call_llm: Span) -> genai_types.Content: ) llm_request_raw = first_call_llm.get_tag(ADK_LLM_REQUEST, "{}") llm_request = parse_json(llm_request_raw) - for content_dict in llm_request.get("contents", []): - if content_dict.get("role") == "user": + for content_dict in llm_request.get("contents", llm_request.get("Contents", [])): + if content_dict.get("role", content_dict.get("Role")) == "user": return _content_from_dict(content_dict) raise ValueError(f"call_llm span {first_call_llm.span_id}: no user content found in llm_request") @@ -193,7 +196,7 @@ def _extract_final_response(last_call_llm: Span) -> genai_types.Content: ) llm_response_raw = last_call_llm.get_tag(ADK_LLM_RESPONSE, "{}") llm_response = parse_json(llm_response_raw) - content_dict = llm_response.get("content", {}) + content_dict = llm_response.get("content", llm_response.get("Content", {})) if not content_dict: raise ValueError(f"call_llm span {last_call_llm.span_id}: no content in llm_response") logger.warning( @@ -263,12 +266,12 @@ def _extract_function_calls_from_llm_response( llm_response_raw = call_llm.get_tag(ADK_LLM_RESPONSE, "{}") llm_response = parse_json(llm_response_raw) - content_dict = llm_response.get("content", {}) - parts = content_dict.get("parts", []) + content_dict = llm_response.get("content", llm_response.get("Content", {})) + parts = content_dict.get("parts", content_dict.get("Parts", [])) calls = [] for part in parts: - fc_dict = part.get("function_call") + fc_dict = part.get("function_call", part.get("functionCall")) if fc_dict: calls.append( genai_types.FunctionCall( @@ -282,15 +285,15 @@ def _extract_function_calls_from_llm_response( def _content_from_dict(content_dict: dict[str, Any]) -> genai_types.Content: """Build a genai Content from a raw dict. Handles text, function_call, and function_response parts.""" - role = content_dict.get("role", "user") - parts_dicts = content_dict.get("parts", []) + role = content_dict.get("role", content_dict.get("Role", "user")) + parts_dicts = content_dict.get("parts", content_dict.get("Parts", [])) parts: list[genai_types.Part] = [] for p in parts_dicts: - if "text" in p: - parts.append(genai_types.Part(text=p["text"])) - elif "function_call" in p: - fc = p["function_call"] + if "text" in p or "Text" in p: + parts.append(genai_types.Part(text=p.get("text", p.get("Text")))) + elif "function_call" in p or "functionCall" in p: + fc = p.get("function_call", p.get("functionCall")) parts.append( genai_types.Part( function_call=genai_types.FunctionCall( @@ -300,8 +303,8 @@ def _content_from_dict(content_dict: dict[str, Any]) -> genai_types.Content: ) ) ) - elif "function_response" in p: - fr = p["function_response"] + elif "function_response" in p or "functionResponse" in p: + fr = p.get("function_response", p.get("functionResponse")) parts.append( genai_types.Part( function_response=genai_types.FunctionResponse( diff --git a/src/agentevals/extraction.py b/src/agentevals/extraction.py index e4536da..bd31201 100644 --- a/src/agentevals/extraction.py +++ b/src/agentevals/extraction.py @@ -69,18 +69,19 @@ def extract_user_text_from_attrs(attrs: dict[str, Any]) -> str | None: if llm_request_raw: llm_request = parse_json(llm_request_raw) if isinstance(llm_request, dict): - for content_dict in reversed(llm_request.get("contents", [])): - if content_dict.get("role") != "user": + contents = llm_request.get("contents", llm_request.get("Contents", [])) + for content_dict in reversed(contents): + if content_dict.get("role", content_dict.get("Role")) != "user": continue - parts = content_dict.get("parts", []) - text_parts = [p for p in parts if "text" in p] + parts = content_dict.get("parts", content_dict.get("Parts", [])) + text_parts = [p for p in parts if "text" in p or "Text" in p] if text_parts: - return " ".join(p["text"] for p in text_parts) - for content_dict in llm_request.get("contents", []): - if content_dict.get("role") == "user": - parts = content_dict.get("parts", []) + return " ".join(p.get("text", p.get("Text", "")) for p in text_parts) + for content_dict in contents: + if content_dict.get("role", content_dict.get("Role")) == "user": + parts = content_dict.get("parts", content_dict.get("Parts", [])) if parts: - return " ".join(p.get("text", "") for p in parts if "text" in p) + return " ".join(p.get("text", p.get("Text", "")) for p in parts if "text" in p or "Text" in p) messages_raw = attrs.get(OTEL_GENAI_INPUT_MESSAGES) if messages_raw: @@ -101,12 +102,12 @@ def extract_agent_response_from_attrs(attrs: dict[str, Any]) -> str | None: if llm_response_raw: llm_response = parse_json(llm_response_raw) if isinstance(llm_response, dict): - content_dict = llm_response.get("content", {}) + content_dict = llm_response.get("content", llm_response.get("Content", {})) if content_dict: - parts_dicts = content_dict.get("parts", []) - text_parts = [p for p in parts_dicts if "text" in p] + parts_dicts = content_dict.get("parts", content_dict.get("Parts", [])) + text_parts = [p for p in parts_dicts if "text" in p or "Text" in p] if text_parts: - return " ".join(p["text"] for p in text_parts) + return " ".join(p.get("text", p.get("Text", "")) for p in text_parts) messages_raw = attrs.get(OTEL_GENAI_OUTPUT_MESSAGES) if messages_raw: @@ -392,6 +393,42 @@ def is_adk_scope(span: Span) -> bool: return False +def is_adk_generate_content_llm_span(span: Span) -> bool: + if not ( + span.operation_name.startswith("generate_content") + or span.get_tag(OTEL_GENAI_OP) == "generate_content" + ): + return False + return bool(span.get_tag(ADK_LLM_REQUEST) or span.get_tag(ADK_LLM_RESPONSE)) + + +def is_adk_llm_span(span: Span) -> bool: + return span.operation_name.startswith("call_llm") or is_adk_generate_content_llm_span(span) + + +def find_adk_llm_spans_in(root: Span) -> list[Span]: + call_llm_spans: list[Span] = [] + generate_content_spans: list[Span] = [] + + def collect(span: Span) -> None: + if span.operation_name.startswith("call_llm"): + call_llm_spans.append(span) + elif is_adk_generate_content_llm_span(span): + generate_content_spans.append(span) + + _walk_descendants(root, collect) + sort_key = lambda s: (s.start_time, s.span_id, s.operation_name) + call_llm_spans.sort(key=sort_key) + generate_content_spans.sort(key=sort_key) + return call_llm_spans or generate_content_spans + + +def _walk_descendants(span: Span, visit) -> None: + for child in span.children: + visit(child) + _walk_descendants(child, visit) + + def is_llm_span(span: Span) -> bool: return span.get_tag(OTEL_GENAI_REQUEST_MODEL) is not None @@ -477,10 +514,7 @@ def find_invocation_spans(self, trace: Trace) -> list[Span]: return matches def find_llm_spans_in(self, root: Span) -> list[Span]: - results: list[Span] = [] - self._walk(root, lambda s: s.operation_name.startswith("call_llm"), results) - results.sort(key=lambda s: s.start_time) - return results + return find_adk_llm_spans_in(root) def find_tool_spans_in(self, root: Span) -> list[Span]: results: list[Span] = [] @@ -493,7 +527,7 @@ def classify_span(self, span: Span) -> str | None: return None if span.operation_name.startswith("invoke_agent"): return "invocation" - if span.operation_name.startswith("call_llm"): + if is_adk_llm_span(span): return "llm" if span.operation_name.startswith("execute_tool"): return "tool" diff --git a/tests/test_converter.py b/tests/test_converter.py index 5e6a4e4..ebeb04e 100644 --- a/tests/test_converter.py +++ b/tests/test_converter.py @@ -186,6 +186,92 @@ def test_convert_traces_multiple(self): assert len(results) == 2 assert all(r.trace_id == "t1" for r in results) + def test_convert_adk_generate_content_llm_spans(self): + invoke = Span( + trace_id="t-gc", + span_id="invoke1", + parent_span_id=None, + operation_name="invoke_agent query_agent", + start_time=1000, + duration=10000, + tags={"gen_ai.operation.name": "invoke_agent"}, + ) + llm_1 = Span( + trace_id="t-gc", + span_id="llm1", + parent_span_id="invoke1", + operation_name="generate_content mockllm-deterministic", + start_time=2000, + duration=1000, + tags={ + "gen_ai.operation.name": "generate_content", + "gcp.vertex.agent.llm_request": json.dumps( + {"Contents": [{"role": "user", "parts": [{"text": "inspect pods"}]}]} + ), + "gcp.vertex.agent.llm_response": json.dumps( + {"Content": {"role": "model", "parts": [{"text": "Calling tools."}]}} + ), + }, + ) + tool_1 = Span( + trace_id="t-gc", + span_id="tool1", + parent_span_id="invoke1", + operation_name="execute_tool list_pods", + start_time=3000, + duration=500, + tags={ + "gen_ai.tool.name": "list_pods", + "gen_ai.tool.call.id": "call_1", + "gcp.vertex.agent.tool_call_args": json.dumps({"namespace": "default"}), + "gcp.vertex.agent.tool_response": json.dumps({"pods": []}), + }, + ) + llm_2 = Span( + trace_id="t-gc", + span_id="llm2", + parent_span_id="invoke1", + operation_name="generate_content mockllm-deterministic", + start_time=4000, + duration=1000, + tags={ + "gen_ai.operation.name": "generate_content", + "gcp.vertex.agent.llm_request": json.dumps({"contents": []}), + "gcp.vertex.agent.llm_response": json.dumps( + {"Content": {"role": "model", "parts": [{"text": "No pods found."}]}} + ), + }, + ) + tool_2 = Span( + trace_id="t-gc", + span_id="tool2", + parent_span_id="invoke1", + operation_name="execute_tool get_events", + start_time=5000, + duration=500, + tags={ + "gen_ai.tool.name": "get_events", + "gen_ai.tool.call.id": "call_2", + "gcp.vertex.agent.tool_call_args": json.dumps({"namespace": "default"}), + "gcp.vertex.agent.tool_response": json.dumps({"events": []}), + }, + ) + invoke.children.extend([llm_1, tool_1, llm_2, tool_2]) + trace = Trace( + trace_id="t-gc", + root_spans=[invoke], + all_spans=[invoke, llm_1, tool_1, llm_2, tool_2], + ) + + result = convert_trace(trace) + + assert result.warnings == [] + assert len(result.invocations) == 1 + inv = result.invocations[0] + assert inv.user_content.parts[0].text == "inspect pods" + assert inv.final_response.parts[0].text == "No pods found." + assert [t.name for t in inv.intermediate_data.tool_uses] == ["list_pods", "get_events"] + def test_no_invoke_agent_warns(self): trace = Trace( trace_id="empty", diff --git a/tests/test_extraction.py b/tests/test_extraction.py index 8686e55..d1b8603 100644 --- a/tests/test_extraction.py +++ b/tests/test_extraction.py @@ -519,6 +519,39 @@ def test_find_llm_spans_in(self): ext = AdkExtractor() assert [s.span_id for s in ext.find_llm_spans_in(root)] == ["llm1"] + def test_find_llm_spans_in_falls_back_to_adk_generate_content(self): + child_llm = _span( + op="generate_content mockllm-deterministic", + tags={ADK_LLM_REQUEST: "{}"}, + span_id="llm1", + ) + child_tool = _span(op="execute_tool search", span_id="tool1") + root = _span(op="invoke_agent a", children=[child_llm, child_tool]) + ext = AdkExtractor() + assert [s.span_id for s in ext.find_llm_spans_in(root)] == ["llm1"] + + def test_find_llm_spans_in_ignores_provider_generate_content_without_adk_payload(self): + child_llm = _span( + op="generate_content gpt-4", + tags={OTEL_GENAI_REQUEST_MODEL: "gpt-4"}, + span_id="llm1", + ) + root = _span(op="invoke_agent a", children=[child_llm]) + ext = AdkExtractor() + assert ext.find_llm_spans_in(root) == [] + + def test_find_llm_spans_in_prefers_call_llm_over_generate_content(self): + call_llm = _span(op="call_llm gemini", span_id="llm1", start_time=20) + generate_content = _span( + op="generate_content gemini", + tags={ADK_LLM_REQUEST: "{}"}, + span_id="llm2", + start_time=10, + ) + root = _span(op="invoke_agent a", children=[generate_content, call_llm]) + ext = AdkExtractor() + assert [s.span_id for s in ext.find_llm_spans_in(root)] == ["llm1"] + def test_find_tool_spans_in(self): child_llm = _span(op="call_llm gemini", span_id="llm1") child_tool = _span(op="execute_tool search", span_id="tool1") @@ -530,6 +563,7 @@ def test_classify_span(self): ext = AdkExtractor() assert ext.classify_span(_span(op="invoke_agent a", tags={OTEL_SCOPE: ADK_SCOPE_VALUE})) == "invocation" assert ext.classify_span(_span(op="call_llm", tags={OTEL_SCOPE: ADK_SCOPE_VALUE})) == "llm" + assert ext.classify_span(_span(op="generate_content", tags={ADK_LLM_REQUEST: "{}"})) == "llm" assert ext.classify_span(_span(op="execute_tool x", tags={OTEL_SCOPE: ADK_SCOPE_VALUE})) == "tool" assert ext.classify_span(_span(op="random")) is None From 603d845182aeffc2240d0db9e59fa53cffb477fe Mon Sep 17 00:00:00 2001 From: erauner Date: Thu, 14 May 2026 12:41:55 -0500 Subject: [PATCH 2/4] Address ADK generate_content review feedback Align ADK LLM span ordering with existing start-time-only extractor conventions. Make missing-LLM conversion diagnostics name call_llm and ADK generate_content compatible span shapes. Trim unreachable inner PascalCase ADK content fallbacks while preserving lower/camelCase function call parsing, and add regression coverage for generate_content functionCall content. Validation: uv run pytest tests/test_extraction.py tests/test_converter.py; uv run pytest -k generate_content --- src/agentevals/converter.py | 15 +++++------ src/agentevals/extraction.py | 5 ++-- tests/test_converter.py | 49 ++++++++++++++++++++++++++++++++++-- 3 files changed, 57 insertions(+), 12 deletions(-) diff --git a/src/agentevals/converter.py b/src/agentevals/converter.py index c5255e1..7543369 100644 --- a/src/agentevals/converter.py +++ b/src/agentevals/converter.py @@ -131,7 +131,8 @@ def _convert_invoke_span(invoke_span: Span) -> Invocation: llm_spans = find_adk_llm_spans_in(invoke_span) if not llm_spans: raise ValueError( - f"invoke_agent span {invoke_span.span_id} has no converter-compatible ADK LLM descendants" + f"invoke_agent span {invoke_span.span_id} has no converter-compatible ADK LLM descendants; " + "expected call_llm or ADK generate_content spans" ) tool_spans = _find_children_by_op(invoke_span, "execute_tool") @@ -181,7 +182,7 @@ def _extract_user_content(first_call_llm: Span) -> genai_types.Content: llm_request_raw = first_call_llm.get_tag(ADK_LLM_REQUEST, "{}") llm_request = parse_json(llm_request_raw) for content_dict in llm_request.get("contents", llm_request.get("Contents", [])): - if content_dict.get("role", content_dict.get("Role")) == "user": + if content_dict.get("role") == "user": return _content_from_dict(content_dict) raise ValueError(f"call_llm span {first_call_llm.span_id}: no user content found in llm_request") @@ -267,7 +268,7 @@ def _extract_function_calls_from_llm_response( llm_response = parse_json(llm_response_raw) content_dict = llm_response.get("content", llm_response.get("Content", {})) - parts = content_dict.get("parts", content_dict.get("Parts", [])) + parts = content_dict.get("parts", []) calls = [] for part in parts: @@ -285,13 +286,13 @@ def _extract_function_calls_from_llm_response( def _content_from_dict(content_dict: dict[str, Any]) -> genai_types.Content: """Build a genai Content from a raw dict. Handles text, function_call, and function_response parts.""" - role = content_dict.get("role", content_dict.get("Role", "user")) - parts_dicts = content_dict.get("parts", content_dict.get("Parts", [])) + role = content_dict.get("role", "user") + parts_dicts = content_dict.get("parts", []) parts: list[genai_types.Part] = [] for p in parts_dicts: - if "text" in p or "Text" in p: - parts.append(genai_types.Part(text=p.get("text", p.get("Text")))) + if "text" in p: + parts.append(genai_types.Part(text=p.get("text"))) elif "function_call" in p or "functionCall" in p: fc = p.get("function_call", p.get("functionCall")) parts.append( diff --git a/src/agentevals/extraction.py b/src/agentevals/extraction.py index bd31201..1ebf5bf 100644 --- a/src/agentevals/extraction.py +++ b/src/agentevals/extraction.py @@ -417,9 +417,8 @@ def collect(span: Span) -> None: generate_content_spans.append(span) _walk_descendants(root, collect) - sort_key = lambda s: (s.start_time, s.span_id, s.operation_name) - call_llm_spans.sort(key=sort_key) - generate_content_spans.sort(key=sort_key) + call_llm_spans.sort(key=lambda s: s.start_time) + generate_content_spans.sort(key=lambda s: s.start_time) return call_llm_spans or generate_content_spans diff --git a/tests/test_converter.py b/tests/test_converter.py index ebeb04e..fdbb518 100644 --- a/tests/test_converter.py +++ b/tests/test_converter.py @@ -238,7 +238,20 @@ def test_convert_adk_generate_content_llm_spans(self): "gen_ai.operation.name": "generate_content", "gcp.vertex.agent.llm_request": json.dumps({"contents": []}), "gcp.vertex.agent.llm_response": json.dumps( - {"Content": {"role": "model", "parts": [{"text": "No pods found."}]}} + { + "Content": { + "role": "model", + "parts": [ + { + "functionCall": { + "name": "summarize_pods", + "args": {"namespace": "default"}, + "id": "call_final", + } + } + ], + } + } ), }, ) @@ -269,7 +282,10 @@ def test_convert_adk_generate_content_llm_spans(self): assert len(result.invocations) == 1 inv = result.invocations[0] assert inv.user_content.parts[0].text == "inspect pods" - assert inv.final_response.parts[0].text == "No pods found." + final_call = inv.final_response.parts[0].function_call + assert final_call.name == "summarize_pods" + assert final_call.args == {"namespace": "default"} + assert final_call.id == "call_final" assert [t.name for t in inv.intermediate_data.tool_uses] == ["list_pods", "get_events"] def test_no_invoke_agent_warns(self): @@ -293,6 +309,35 @@ def test_no_invoke_agent_warns(self): assert len(result.warnings) == 1 assert "no invoke_agent" in result.warnings[0] + def test_no_llm_descendants_warns_with_compatible_shapes(self): + invoke = Span( + trace_id="no-llm", + span_id="invoke-no-llm", + parent_span_id=None, + operation_name="invoke_agent test_agent", + start_time=1000, + duration=1000, + tags={ + "otel.scope.name": "gcp.vertex.agent", + "gen_ai.operation.name": "invoke_agent", + }, + ) + trace = Trace( + trace_id="no-llm", + root_spans=[invoke], + all_spans=[invoke], + ) + + result = convert_trace(trace) + + assert result.invocations == [] + assert len(result.warnings) == 1 + warning = result.warnings[0] + assert "invoke-no-llm" in warning + assert "no converter-compatible ADK LLM descendants" in warning + assert "call_llm" in warning + assert "ADK generate_content" in warning + def test_no_tool_spans_fallback_to_llm_response(self): """When no execute_tool spans exist, function_calls should be extracted from call_llm responses instead.""" From 50dc51fb834c680f274539028d5d9536f3378d43 Mon Sep 17 00:00:00 2001 From: erauner Date: Fri, 15 May 2026 11:46:09 -0500 Subject: [PATCH 3/4] Trim ADK extraction casing fallbacks Remove unreachable inner Role, Parts, and Text fallbacks from ADK request/response text extraction while preserving outer Contents/Content handling. Add focused coverage for outer PascalCase ADK payload objects with lower-case inner content fields. Validation: uv run pytest tests/test_extraction.py -k 'ExtractUserText or ExtractAgentResponse'; uv run pytest tests/test_extraction.py tests/test_converter.py --- src/agentevals/extraction.py | 20 ++++++++++---------- tests/test_extraction.py | 16 ++++++++++++++++ 2 files changed, 26 insertions(+), 10 deletions(-) diff --git a/src/agentevals/extraction.py b/src/agentevals/extraction.py index 1ebf5bf..01b8339 100644 --- a/src/agentevals/extraction.py +++ b/src/agentevals/extraction.py @@ -71,17 +71,17 @@ def extract_user_text_from_attrs(attrs: dict[str, Any]) -> str | None: if isinstance(llm_request, dict): contents = llm_request.get("contents", llm_request.get("Contents", [])) for content_dict in reversed(contents): - if content_dict.get("role", content_dict.get("Role")) != "user": + if content_dict.get("role") != "user": continue - parts = content_dict.get("parts", content_dict.get("Parts", [])) - text_parts = [p for p in parts if "text" in p or "Text" in p] + parts = content_dict.get("parts", []) + text_parts = [p for p in parts if "text" in p] if text_parts: - return " ".join(p.get("text", p.get("Text", "")) for p in text_parts) + return " ".join(p["text"] for p in text_parts) for content_dict in contents: - if content_dict.get("role", content_dict.get("Role")) == "user": - parts = content_dict.get("parts", content_dict.get("Parts", [])) + if content_dict.get("role") == "user": + parts = content_dict.get("parts", []) if parts: - return " ".join(p.get("text", p.get("Text", "")) for p in parts if "text" in p or "Text" in p) + return " ".join(p.get("text", "") for p in parts if "text" in p) messages_raw = attrs.get(OTEL_GENAI_INPUT_MESSAGES) if messages_raw: @@ -104,10 +104,10 @@ def extract_agent_response_from_attrs(attrs: dict[str, Any]) -> str | None: if isinstance(llm_response, dict): content_dict = llm_response.get("content", llm_response.get("Content", {})) if content_dict: - parts_dicts = content_dict.get("parts", content_dict.get("Parts", [])) - text_parts = [p for p in parts_dicts if "text" in p or "Text" in p] + parts_dicts = content_dict.get("parts", []) + text_parts = [p for p in parts_dicts if "text" in p] if text_parts: - return " ".join(p.get("text", p.get("Text", "")) for p in text_parts) + return " ".join(p["text"] for p in text_parts) messages_raw = attrs.get(OTEL_GENAI_OUTPUT_MESSAGES) if messages_raw: diff --git a/tests/test_extraction.py b/tests/test_extraction.py index d1b8603..52ec812 100644 --- a/tests/test_extraction.py +++ b/tests/test_extraction.py @@ -107,6 +107,18 @@ def test_adk_llm_request_prefers_last_user(self): } assert extract_user_text_from_attrs(attrs) == "Second" + def test_adk_llm_request_outer_contents_pascalcase(self): + attrs = { + ADK_LLM_REQUEST: json.dumps( + { + "Contents": [ + {"role": "user", "parts": [{"text": "Outer PascalCase only"}]}, + ] + } + ) + } + assert extract_user_text_from_attrs(attrs) == "Outer PascalCase only" + def test_genai_content_based(self): attrs = { OTEL_GENAI_INPUT_MESSAGES: json.dumps( @@ -170,6 +182,10 @@ def test_adk_llm_response(self): attrs = {ADK_LLM_RESPONSE: json.dumps({"content": {"parts": [{"text": "ADK response"}]}})} assert extract_agent_response_from_attrs(attrs) == "ADK response" + def test_adk_llm_response_outer_content_pascalcase(self): + attrs = {ADK_LLM_RESPONSE: json.dumps({"Content": {"parts": [{"text": "Outer Content only"}]}})} + assert extract_agent_response_from_attrs(attrs) == "Outer Content only" + def test_genai_content_based(self): attrs = { OTEL_GENAI_OUTPUT_MESSAGES: json.dumps( From 4e79cec3530e086d486acd82a66582fb26c589be Mon Sep 17 00:00:00 2001 From: erauner Date: Fri, 15 May 2026 11:52:55 -0500 Subject: [PATCH 4/4] Apply Ruff formatting Format extraction.py to satisfy CI lint. Validation: uv run ruff check . && uv run ruff format --check .; uv run pytest tests/test_extraction.py tests/test_converter.py --- src/agentevals/extraction.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/agentevals/extraction.py b/src/agentevals/extraction.py index 01b8339..51b0348 100644 --- a/src/agentevals/extraction.py +++ b/src/agentevals/extraction.py @@ -394,10 +394,7 @@ def is_adk_scope(span: Span) -> bool: def is_adk_generate_content_llm_span(span: Span) -> bool: - if not ( - span.operation_name.startswith("generate_content") - or span.get_tag(OTEL_GENAI_OP) == "generate_content" - ): + if not (span.operation_name.startswith("generate_content") or span.get_tag(OTEL_GENAI_OP) == "generate_content"): return False return bool(span.get_tag(ADK_LLM_REQUEST) or span.get_tag(ADK_LLM_RESPONSE))