Skip to content

Commit 9c39e64

Browse files
Merge pull request #145 from erauner12/fix-adk-generate-content-extraction
Support ADK generate_content LLM spans
2 parents 8868017 + 4e79cec commit 9c39e64

4 files changed

Lines changed: 238 additions & 23 deletions

File tree

src/agentevals/converter.py

Lines changed: 19 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
extract_tool_call_from_span,
2424
extract_tool_result_from_span,
2525
extract_user_text_from_attrs,
26+
find_adk_llm_spans_in,
2627
get_extractor,
2728
has_adk_descendant,
2829
is_adk_scope,
@@ -127,15 +128,18 @@ def _find_adk_spans(trace: Trace, operation: str) -> list[Span]:
127128

128129

129130
def _convert_invoke_span(invoke_span: Span) -> Invocation:
130-
call_llm_spans = _find_children_by_op(invoke_span, "call_llm")
131-
if not call_llm_spans:
132-
raise ValueError(f"invoke_agent span {invoke_span.span_id} has no child call_llm spans")
131+
llm_spans = find_adk_llm_spans_in(invoke_span)
132+
if not llm_spans:
133+
raise ValueError(
134+
f"invoke_agent span {invoke_span.span_id} has no converter-compatible ADK LLM descendants; "
135+
"expected call_llm or ADK generate_content spans"
136+
)
133137

134138
tool_spans = _find_children_by_op(invoke_span, "execute_tool")
135139

136-
user_content = _extract_user_content(call_llm_spans[0])
137-
final_response = _extract_final_response(call_llm_spans[-1])
138-
tool_uses, tool_responses = _extract_tool_trajectory(call_llm_spans, tool_spans)
140+
user_content = _extract_user_content(llm_spans[0])
141+
final_response = _extract_final_response(llm_spans[-1])
142+
tool_uses, tool_responses = _extract_tool_trajectory(llm_spans, tool_spans)
139143

140144
intermediate_data = IntermediateData(
141145
tool_uses=tool_uses,
@@ -177,7 +181,7 @@ def _extract_user_content(first_call_llm: Span) -> genai_types.Content:
177181
)
178182
llm_request_raw = first_call_llm.get_tag(ADK_LLM_REQUEST, "{}")
179183
llm_request = parse_json(llm_request_raw)
180-
for content_dict in llm_request.get("contents", []):
184+
for content_dict in llm_request.get("contents", llm_request.get("Contents", [])):
181185
if content_dict.get("role") == "user":
182186
return _content_from_dict(content_dict)
183187
raise ValueError(f"call_llm span {first_call_llm.span_id}: no user content found in llm_request")
@@ -193,7 +197,7 @@ def _extract_final_response(last_call_llm: Span) -> genai_types.Content:
193197
)
194198
llm_response_raw = last_call_llm.get_tag(ADK_LLM_RESPONSE, "{}")
195199
llm_response = parse_json(llm_response_raw)
196-
content_dict = llm_response.get("content", {})
200+
content_dict = llm_response.get("content", llm_response.get("Content", {}))
197201
if not content_dict:
198202
raise ValueError(f"call_llm span {last_call_llm.span_id}: no content in llm_response")
199203
logger.warning(
@@ -263,12 +267,12 @@ def _extract_function_calls_from_llm_response(
263267
llm_response_raw = call_llm.get_tag(ADK_LLM_RESPONSE, "{}")
264268
llm_response = parse_json(llm_response_raw)
265269

266-
content_dict = llm_response.get("content", {})
270+
content_dict = llm_response.get("content", llm_response.get("Content", {}))
267271
parts = content_dict.get("parts", [])
268272

269273
calls = []
270274
for part in parts:
271-
fc_dict = part.get("function_call")
275+
fc_dict = part.get("function_call", part.get("functionCall"))
272276
if fc_dict:
273277
calls.append(
274278
genai_types.FunctionCall(
@@ -288,9 +292,9 @@ def _content_from_dict(content_dict: dict[str, Any]) -> genai_types.Content:
288292
parts: list[genai_types.Part] = []
289293
for p in parts_dicts:
290294
if "text" in p:
291-
parts.append(genai_types.Part(text=p["text"]))
292-
elif "function_call" in p:
293-
fc = p["function_call"]
295+
parts.append(genai_types.Part(text=p.get("text")))
296+
elif "function_call" in p or "functionCall" in p:
297+
fc = p.get("function_call", p.get("functionCall"))
294298
parts.append(
295299
genai_types.Part(
296300
function_call=genai_types.FunctionCall(
@@ -300,8 +304,8 @@ def _content_from_dict(content_dict: dict[str, Any]) -> genai_types.Content:
300304
)
301305
)
302306
)
303-
elif "function_response" in p:
304-
fr = p["function_response"]
307+
elif "function_response" in p or "functionResponse" in p:
308+
fr = p.get("function_response", p.get("functionResponse"))
305309
parts.append(
306310
genai_types.Part(
307311
function_response=genai_types.FunctionResponse(

src/agentevals/extraction.py

Lines changed: 38 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -69,14 +69,15 @@ def extract_user_text_from_attrs(attrs: dict[str, Any]) -> str | None:
6969
if llm_request_raw:
7070
llm_request = parse_json(llm_request_raw)
7171
if isinstance(llm_request, dict):
72-
for content_dict in reversed(llm_request.get("contents", [])):
72+
contents = llm_request.get("contents", llm_request.get("Contents", []))
73+
for content_dict in reversed(contents):
7374
if content_dict.get("role") != "user":
7475
continue
7576
parts = content_dict.get("parts", [])
7677
text_parts = [p for p in parts if "text" in p]
7778
if text_parts:
7879
return " ".join(p["text"] for p in text_parts)
79-
for content_dict in llm_request.get("contents", []):
80+
for content_dict in contents:
8081
if content_dict.get("role") == "user":
8182
parts = content_dict.get("parts", [])
8283
if parts:
@@ -101,7 +102,7 @@ def extract_agent_response_from_attrs(attrs: dict[str, Any]) -> str | None:
101102
if llm_response_raw:
102103
llm_response = parse_json(llm_response_raw)
103104
if isinstance(llm_response, dict):
104-
content_dict = llm_response.get("content", {})
105+
content_dict = llm_response.get("content", llm_response.get("Content", {}))
105106
if content_dict:
106107
parts_dicts = content_dict.get("parts", [])
107108
text_parts = [p for p in parts_dicts if "text" in p]
@@ -392,6 +393,38 @@ def is_adk_scope(span: Span) -> bool:
392393
return False
393394

394395

396+
def is_adk_generate_content_llm_span(span: Span) -> bool:
397+
if not (span.operation_name.startswith("generate_content") or span.get_tag(OTEL_GENAI_OP) == "generate_content"):
398+
return False
399+
return bool(span.get_tag(ADK_LLM_REQUEST) or span.get_tag(ADK_LLM_RESPONSE))
400+
401+
402+
def is_adk_llm_span(span: Span) -> bool:
403+
return span.operation_name.startswith("call_llm") or is_adk_generate_content_llm_span(span)
404+
405+
406+
def find_adk_llm_spans_in(root: Span) -> list[Span]:
407+
call_llm_spans: list[Span] = []
408+
generate_content_spans: list[Span] = []
409+
410+
def collect(span: Span) -> None:
411+
if span.operation_name.startswith("call_llm"):
412+
call_llm_spans.append(span)
413+
elif is_adk_generate_content_llm_span(span):
414+
generate_content_spans.append(span)
415+
416+
_walk_descendants(root, collect)
417+
call_llm_spans.sort(key=lambda s: s.start_time)
418+
generate_content_spans.sort(key=lambda s: s.start_time)
419+
return call_llm_spans or generate_content_spans
420+
421+
422+
def _walk_descendants(span: Span, visit) -> None:
423+
for child in span.children:
424+
visit(child)
425+
_walk_descendants(child, visit)
426+
427+
395428
def is_llm_span(span: Span) -> bool:
396429
return span.get_tag(OTEL_GENAI_REQUEST_MODEL) is not None
397430

@@ -477,10 +510,7 @@ def find_invocation_spans(self, trace: Trace) -> list[Span]:
477510
return matches
478511

479512
def find_llm_spans_in(self, root: Span) -> list[Span]:
480-
results: list[Span] = []
481-
self._walk(root, lambda s: s.operation_name.startswith("call_llm"), results)
482-
results.sort(key=lambda s: s.start_time)
483-
return results
513+
return find_adk_llm_spans_in(root)
484514

485515
def find_tool_spans_in(self, root: Span) -> list[Span]:
486516
results: list[Span] = []
@@ -493,7 +523,7 @@ def classify_span(self, span: Span) -> str | None:
493523
return None
494524
if span.operation_name.startswith("invoke_agent"):
495525
return "invocation"
496-
if span.operation_name.startswith("call_llm"):
526+
if is_adk_llm_span(span):
497527
return "llm"
498528
if span.operation_name.startswith("execute_tool"):
499529
return "tool"

tests/test_converter.py

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,108 @@ def test_convert_traces_multiple(self):
186186
assert len(results) == 2
187187
assert all(r.trace_id == "t1" for r in results)
188188

189+
def test_convert_adk_generate_content_llm_spans(self):
190+
invoke = Span(
191+
trace_id="t-gc",
192+
span_id="invoke1",
193+
parent_span_id=None,
194+
operation_name="invoke_agent query_agent",
195+
start_time=1000,
196+
duration=10000,
197+
tags={"gen_ai.operation.name": "invoke_agent"},
198+
)
199+
llm_1 = Span(
200+
trace_id="t-gc",
201+
span_id="llm1",
202+
parent_span_id="invoke1",
203+
operation_name="generate_content mockllm-deterministic",
204+
start_time=2000,
205+
duration=1000,
206+
tags={
207+
"gen_ai.operation.name": "generate_content",
208+
"gcp.vertex.agent.llm_request": json.dumps(
209+
{"Contents": [{"role": "user", "parts": [{"text": "inspect pods"}]}]}
210+
),
211+
"gcp.vertex.agent.llm_response": json.dumps(
212+
{"Content": {"role": "model", "parts": [{"text": "Calling tools."}]}}
213+
),
214+
},
215+
)
216+
tool_1 = Span(
217+
trace_id="t-gc",
218+
span_id="tool1",
219+
parent_span_id="invoke1",
220+
operation_name="execute_tool list_pods",
221+
start_time=3000,
222+
duration=500,
223+
tags={
224+
"gen_ai.tool.name": "list_pods",
225+
"gen_ai.tool.call.id": "call_1",
226+
"gcp.vertex.agent.tool_call_args": json.dumps({"namespace": "default"}),
227+
"gcp.vertex.agent.tool_response": json.dumps({"pods": []}),
228+
},
229+
)
230+
llm_2 = Span(
231+
trace_id="t-gc",
232+
span_id="llm2",
233+
parent_span_id="invoke1",
234+
operation_name="generate_content mockllm-deterministic",
235+
start_time=4000,
236+
duration=1000,
237+
tags={
238+
"gen_ai.operation.name": "generate_content",
239+
"gcp.vertex.agent.llm_request": json.dumps({"contents": []}),
240+
"gcp.vertex.agent.llm_response": json.dumps(
241+
{
242+
"Content": {
243+
"role": "model",
244+
"parts": [
245+
{
246+
"functionCall": {
247+
"name": "summarize_pods",
248+
"args": {"namespace": "default"},
249+
"id": "call_final",
250+
}
251+
}
252+
],
253+
}
254+
}
255+
),
256+
},
257+
)
258+
tool_2 = Span(
259+
trace_id="t-gc",
260+
span_id="tool2",
261+
parent_span_id="invoke1",
262+
operation_name="execute_tool get_events",
263+
start_time=5000,
264+
duration=500,
265+
tags={
266+
"gen_ai.tool.name": "get_events",
267+
"gen_ai.tool.call.id": "call_2",
268+
"gcp.vertex.agent.tool_call_args": json.dumps({"namespace": "default"}),
269+
"gcp.vertex.agent.tool_response": json.dumps({"events": []}),
270+
},
271+
)
272+
invoke.children.extend([llm_1, tool_1, llm_2, tool_2])
273+
trace = Trace(
274+
trace_id="t-gc",
275+
root_spans=[invoke],
276+
all_spans=[invoke, llm_1, tool_1, llm_2, tool_2],
277+
)
278+
279+
result = convert_trace(trace)
280+
281+
assert result.warnings == []
282+
assert len(result.invocations) == 1
283+
inv = result.invocations[0]
284+
assert inv.user_content.parts[0].text == "inspect pods"
285+
final_call = inv.final_response.parts[0].function_call
286+
assert final_call.name == "summarize_pods"
287+
assert final_call.args == {"namespace": "default"}
288+
assert final_call.id == "call_final"
289+
assert [t.name for t in inv.intermediate_data.tool_uses] == ["list_pods", "get_events"]
290+
189291
def test_no_invoke_agent_warns(self):
190292
trace = Trace(
191293
trace_id="empty",
@@ -207,6 +309,35 @@ def test_no_invoke_agent_warns(self):
207309
assert len(result.warnings) == 1
208310
assert "no invoke_agent" in result.warnings[0]
209311

312+
def test_no_llm_descendants_warns_with_compatible_shapes(self):
313+
invoke = Span(
314+
trace_id="no-llm",
315+
span_id="invoke-no-llm",
316+
parent_span_id=None,
317+
operation_name="invoke_agent test_agent",
318+
start_time=1000,
319+
duration=1000,
320+
tags={
321+
"otel.scope.name": "gcp.vertex.agent",
322+
"gen_ai.operation.name": "invoke_agent",
323+
},
324+
)
325+
trace = Trace(
326+
trace_id="no-llm",
327+
root_spans=[invoke],
328+
all_spans=[invoke],
329+
)
330+
331+
result = convert_trace(trace)
332+
333+
assert result.invocations == []
334+
assert len(result.warnings) == 1
335+
warning = result.warnings[0]
336+
assert "invoke-no-llm" in warning
337+
assert "no converter-compatible ADK LLM descendants" in warning
338+
assert "call_llm" in warning
339+
assert "ADK generate_content" in warning
340+
210341
def test_no_tool_spans_fallback_to_llm_response(self):
211342
"""When no execute_tool spans exist, function_calls should be
212343
extracted from call_llm responses instead."""

tests/test_extraction.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,18 @@ def test_adk_llm_request_prefers_last_user(self):
107107
}
108108
assert extract_user_text_from_attrs(attrs) == "Second"
109109

110+
def test_adk_llm_request_outer_contents_pascalcase(self):
111+
attrs = {
112+
ADK_LLM_REQUEST: json.dumps(
113+
{
114+
"Contents": [
115+
{"role": "user", "parts": [{"text": "Outer PascalCase only"}]},
116+
]
117+
}
118+
)
119+
}
120+
assert extract_user_text_from_attrs(attrs) == "Outer PascalCase only"
121+
110122
def test_genai_content_based(self):
111123
attrs = {
112124
OTEL_GENAI_INPUT_MESSAGES: json.dumps(
@@ -170,6 +182,10 @@ def test_adk_llm_response(self):
170182
attrs = {ADK_LLM_RESPONSE: json.dumps({"content": {"parts": [{"text": "ADK response"}]}})}
171183
assert extract_agent_response_from_attrs(attrs) == "ADK response"
172184

185+
def test_adk_llm_response_outer_content_pascalcase(self):
186+
attrs = {ADK_LLM_RESPONSE: json.dumps({"Content": {"parts": [{"text": "Outer Content only"}]}})}
187+
assert extract_agent_response_from_attrs(attrs) == "Outer Content only"
188+
173189
def test_genai_content_based(self):
174190
attrs = {
175191
OTEL_GENAI_OUTPUT_MESSAGES: json.dumps(
@@ -519,6 +535,39 @@ def test_find_llm_spans_in(self):
519535
ext = AdkExtractor()
520536
assert [s.span_id for s in ext.find_llm_spans_in(root)] == ["llm1"]
521537

538+
def test_find_llm_spans_in_falls_back_to_adk_generate_content(self):
539+
child_llm = _span(
540+
op="generate_content mockllm-deterministic",
541+
tags={ADK_LLM_REQUEST: "{}"},
542+
span_id="llm1",
543+
)
544+
child_tool = _span(op="execute_tool search", span_id="tool1")
545+
root = _span(op="invoke_agent a", children=[child_llm, child_tool])
546+
ext = AdkExtractor()
547+
assert [s.span_id for s in ext.find_llm_spans_in(root)] == ["llm1"]
548+
549+
def test_find_llm_spans_in_ignores_provider_generate_content_without_adk_payload(self):
550+
child_llm = _span(
551+
op="generate_content gpt-4",
552+
tags={OTEL_GENAI_REQUEST_MODEL: "gpt-4"},
553+
span_id="llm1",
554+
)
555+
root = _span(op="invoke_agent a", children=[child_llm])
556+
ext = AdkExtractor()
557+
assert ext.find_llm_spans_in(root) == []
558+
559+
def test_find_llm_spans_in_prefers_call_llm_over_generate_content(self):
560+
call_llm = _span(op="call_llm gemini", span_id="llm1", start_time=20)
561+
generate_content = _span(
562+
op="generate_content gemini",
563+
tags={ADK_LLM_REQUEST: "{}"},
564+
span_id="llm2",
565+
start_time=10,
566+
)
567+
root = _span(op="invoke_agent a", children=[generate_content, call_llm])
568+
ext = AdkExtractor()
569+
assert [s.span_id for s in ext.find_llm_spans_in(root)] == ["llm1"]
570+
522571
def test_find_tool_spans_in(self):
523572
child_llm = _span(op="call_llm gemini", span_id="llm1")
524573
child_tool = _span(op="execute_tool search", span_id="tool1")
@@ -530,6 +579,7 @@ def test_classify_span(self):
530579
ext = AdkExtractor()
531580
assert ext.classify_span(_span(op="invoke_agent a", tags={OTEL_SCOPE: ADK_SCOPE_VALUE})) == "invocation"
532581
assert ext.classify_span(_span(op="call_llm", tags={OTEL_SCOPE: ADK_SCOPE_VALUE})) == "llm"
582+
assert ext.classify_span(_span(op="generate_content", tags={ADK_LLM_REQUEST: "{}"})) == "llm"
533583
assert ext.classify_span(_span(op="execute_tool x", tags={OTEL_SCOPE: ADK_SCOPE_VALUE})) == "tool"
534584
assert ext.classify_span(_span(op="random")) is None
535585

0 commit comments

Comments
 (0)