Skip to content

Commit 372edcd

Browse files
jsondaicopybara-github
authored andcommitted
chore: GenAI Client(evals) - update validation and warning messages for intermediate_events
PiperOrigin-RevId: 893138202
1 parent a1924a9 commit 372edcd

File tree

2 files changed

+78
-8
lines changed

2 files changed

+78
-8
lines changed

tests/unit/vertexai/genai/test_evals.py

Lines changed: 61 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2013,6 +2013,23 @@ def test_has_tool_call_mixed_events(self):
20132013
]
20142014
assert _evals_metric_handlers._has_tool_call(events)
20152015

2016+
def test_has_tool_call_with_agent_event(self):
2017+
events = [
2018+
vertexai_genai_types.evals.AgentEvent(
2019+
author="model",
2020+
content=genai_types.Content(
2021+
parts=[
2022+
genai_types.Part(
2023+
function_call=genai_types.FunctionCall(
2024+
name="search", args={}
2025+
)
2026+
)
2027+
]
2028+
),
2029+
)
2030+
]
2031+
assert _evals_metric_handlers._has_tool_call(events)
2032+
20162033

20172034
@pytest.mark.usefixtures("google_auth_mock")
20182035
class TestRunAgentInternal:
@@ -4892,10 +4909,53 @@ def test_tool_use_quality_metric_no_tool_call_logs_warning(
48924909
handler._build_request_payload(eval_case, response_index=0)
48934910
mock_warning.assert_called_once_with(
48944911
"Metric 'tool_use_quality_v1' requires tool usage in "
4895-
"'intermediate_events', but no tool usage was found for case %s.",
4912+
"'intermediate_events' or 'agent_data', but no tool usage was found for case %s.",
48964913
"case-no-tool-call",
48974914
)
48984915

4916+
@mock.patch.object(_evals_metric_handlers.logger, "warning")
4917+
def test_build_request_payload_tool_use_quality_v1_with_agent_data_tool_call(
4918+
self, mock_warning, mock_api_client_fixture
4919+
):
4920+
"""Tests that PredefinedMetricHandler does not warn if tool call is in agent_data."""
4921+
metric = vertexai_genai_types.Metric(name="tool_use_quality_v1")
4922+
handler = _evals_metric_handlers.PredefinedMetricHandler(
4923+
module=evals.Evals(api_client_=mock_api_client_fixture), metric=metric
4924+
)
4925+
eval_case = vertexai_genai_types.EvalCase(
4926+
eval_case_id="case-with-agent-data-tool-call",
4927+
prompt=genai_types.Content(parts=[genai_types.Part(text="Hello")]),
4928+
responses=[
4929+
vertexai_genai_types.ResponseCandidate(
4930+
response=genai_types.Content(parts=[genai_types.Part(text="Hi")])
4931+
)
4932+
],
4933+
agent_data=vertexai_genai_types.evals.AgentData(
4934+
turns=[
4935+
vertexai_genai_types.evals.ConversationTurn(
4936+
turn_index=0,
4937+
turn_id="turn_0",
4938+
events=[
4939+
vertexai_genai_types.evals.AgentEvent(
4940+
author="model",
4941+
content=genai_types.Content(
4942+
parts=[
4943+
genai_types.Part(
4944+
function_call=genai_types.FunctionCall(
4945+
name="search", args={}
4946+
)
4947+
)
4948+
]
4949+
),
4950+
)
4951+
],
4952+
)
4953+
]
4954+
),
4955+
)
4956+
handler._build_request_payload(eval_case, response_index=0)
4957+
mock_warning.assert_not_called()
4958+
48994959

49004960
@pytest.mark.usefixtures("google_auth_mock")
49014961
class TestRunAdkUserSimulation:

vertexai/_genai/_evals_metric_handlers.py

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -42,12 +42,12 @@
4242
T = TypeVar("T", types.Metric, types.MetricSource, types.LLMMetric)
4343

4444

45-
def _has_tool_call(intermediate_events: Optional[list[types.evals.Event]]) -> bool:
46-
"""Checks if any event in intermediate_events has a function call."""
47-
if not intermediate_events:
45+
def _has_tool_call(events: Optional[list[Any]]) -> bool:
46+
"""Checks if any event in events has a function call."""
47+
if not events:
4848
return False
49-
for event in intermediate_events:
50-
if event.content and event.content.parts:
49+
for event in events:
50+
if getattr(event, "content", None) and getattr(event.content, "parts", None):
5151
for part in event.content.parts:
5252
if hasattr(part, "function_call") and part.function_call:
5353
return True
@@ -922,10 +922,20 @@ def _build_request_payload(
922922
)
923923

924924
if self.metric.name == "tool_use_quality_v1":
925-
if not _has_tool_call(eval_case.intermediate_events):
925+
has_tool_call = _has_tool_call(eval_case.intermediate_events)
926+
927+
# Check agent_data for tool calls if intermediate_events is empty
928+
agent_data = getattr(eval_case, "agent_data", None)
929+
if not has_tool_call and agent_data:
930+
for turn in agent_data.turns or []:
931+
if _has_tool_call(turn.events):
932+
has_tool_call = True
933+
break
934+
935+
if not has_tool_call:
926936
logger.warning(
927937
"Metric 'tool_use_quality_v1' requires tool usage in "
928-
"'intermediate_events', but no tool usage was found for case %s.",
938+
"'intermediate_events' or 'agent_data', but no tool usage was found for case %s.",
929939
eval_case.eval_case_id,
930940
)
931941

0 commit comments

Comments
 (0)