Skip to content

Commit c3b9f2c

Browse files
committed
chore: fix benchmark script
1 parent 9d2fa24 commit c3b9f2c

1 file changed

Lines changed: 51 additions & 9 deletions

File tree

benchmark/run_weak_model_benchmark.py

Lines changed: 51 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -570,35 +570,77 @@ async def _run_search_precision(
570570

571571
started = time.perf_counter()
572572
try:
573-
result = await server.call_tool(
574-
"search_tools",
575-
{"query": query, "limit": 3},
576-
)
573+
# Deterministic Tier 3: bypass Agent execution and call the MCP tool directly.
574+
# Newer PydanticAI versions require (ctx, tool) for call_tool(), so use
575+
# direct_call_tool() when available.
576+
if hasattr(server, "direct_call_tool"):
577+
result = await server.direct_call_tool(
578+
"search_tools",
579+
{"query": query, "limit": 3},
580+
)
581+
else:
582+
# Back-compat for older PydanticAI versions.
583+
result = await server.call_tool(
584+
"search_tools",
585+
{"query": query, "limit": 3},
586+
)
577587
duration_s = round(time.perf_counter() - started, 3)
578588

579589
# Parse results from the MCP response
590+
# direct_call_tool() may return structured Python objects (dict/list),
591+
# not an MCP SDK response object with `.content`.
592+
parsed_obj: object | None = None
580593
result_text = ""
581-
if hasattr(result, "content"):
594+
if isinstance(result, (dict, list)):
595+
parsed_obj = result
596+
try:
597+
result_text = json.dumps(result, ensure_ascii=False)
598+
except Exception:
599+
result_text = str(result)
600+
elif hasattr(result, "content"):
582601
for part in result.content:
583602
if hasattr(part, "text"):
584603
result_text += part.text
585604
elif isinstance(result, str):
586605
result_text = result
606+
else:
607+
result_text = str(result)
587608

588609
# Try to parse as JSON to extract tool names
589610
tool_names_found: list[str] = []
590611
try:
591-
parsed = json.loads(result_text)
612+
parsed = parsed_obj if parsed_obj is not None else json.loads(result_text)
592613
if isinstance(parsed, list):
593614
for item in parsed:
594-
name = item.get("proxy_tool_name", "") or item.get("name", "")
615+
if not isinstance(item, dict):
616+
continue
617+
name = (
618+
item.get("proxy_tool_name", "")
619+
or item.get("proxyToolName", "")
620+
or item.get("tool_name", "")
621+
or item.get("toolName", "")
622+
or item.get("name", "")
623+
or item.get("id", "")
624+
)
595625
if name:
596626
tool_names_found.append(name.lower())
597627
elif isinstance(parsed, dict):
598-
results_list = parsed.get("results", parsed.get("tools", []))
628+
results_list = parsed.get(
629+
"results",
630+
parsed.get("hits", parsed.get("tools", parsed.get("items", []))),
631+
)
599632
if isinstance(results_list, list):
600633
for item in results_list:
601-
name = item.get("proxy_tool_name", "") or item.get("name", "")
634+
if not isinstance(item, dict):
635+
continue
636+
name = (
637+
item.get("proxy_tool_name", "")
638+
or item.get("proxyToolName", "")
639+
or item.get("tool_name", "")
640+
or item.get("toolName", "")
641+
or item.get("name", "")
642+
or item.get("id", "")
643+
)
602644
if name:
603645
tool_names_found.append(name.lower())
604646
except (json.JSONDecodeError, TypeError):

0 commit comments

Comments
 (0)