Skip to content

Commit 7f77ce3

Browse files
committed
Address CodeRabbit feedback for Wikipedia tool
Signed-off-by: tamohannes <hovhannes.tamoyan@gmail.com>
1 parent 8e4e6ca commit 7f77ce3

3 files changed

Lines changed: 72 additions & 22 deletions

File tree

docs/agentic_inference/tool_calling.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -372,4 +372,3 @@ For vLLM, you may need to specify tool calling arguments:
372372
- [`nemo_skills.mcp.servers.python_tool.PythonTool`](https://github.com/NVIDIA-NeMo/Skills/tree/main/nemo_skills/mcp/servers/python_tool.py) - Python code execution
373373
- [`nemo_skills.mcp.servers.exa_tool.ExaTool`](https://github.com/NVIDIA-NeMo/Skills/tree/main/nemo_skills/mcp/servers/exa_tool.py) - Web search via Exa API
374374
- [`nemo_skills.mcp.servers.wikipedia_tool.WikipediaSearchTool`](https://github.com/NVIDIA-NeMo/Skills/tree/main/nemo_skills/mcp/servers/wikipedia_tool.py) - Direct Wikipedia article search and retrieval (no API key required)
375-
- [`nemo_skills.mcp.servers.wikipedia_tool.WikipediaSearchTool`](https://github.com/NVIDIA-NeMo/Skills/tree/main/nemo_skills/mcp/servers/wikipedia_tool.py) - Wikipedia article search and retrieval (no API key required)

nemo_skills/mcp/servers/wikipedia_tool.py

Lines changed: 39 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@
7272
"WIKIPEDIA_RATE_LIMIT_LOCK",
7373
os.path.join(tempfile.gettempdir(), "nemo_skills_wikipedia_api_rate_limit.lock"),
7474
)
75+
EXPECTED_HTTP_ERRORS = (httpx.HTTPError, RuntimeError, json.JSONDecodeError)
7576

7677
_cache: dict[str, str] = {}
7778
_request_lock = asyncio.Lock()
@@ -270,7 +271,7 @@ async def wikipedia_search(
270271
async with httpx.AsyncClient(headers=headers) as client:
271272
try:
272273
data = await _http_get_json(client, ACTION_BASE, params=params)
273-
except Exception as e:
274+
except EXPECTED_HTTP_ERRORS as e:
274275
return f"Wikipedia search failed: {e}"
275276

276277
hits = (data.get("query") or {}).get("search") or []
@@ -328,7 +329,7 @@ async def wikipedia_page(
328329
async with httpx.AsyncClient(headers=headers) as client:
329330
try:
330331
data = await _http_get_json(client, ACTION_BASE, params=params)
331-
except Exception as e:
332+
except EXPECTED_HTTP_ERRORS as e:
332333
return f"Wikipedia lookup failed: {e}"
333334

334335
pages = (data.get("query") or {}).get("pages") or []
@@ -383,7 +384,7 @@ async def wikipedia_summary(
383384
async with httpx.AsyncClient(headers=headers) as client:
384385
try:
385386
data = await _http_get_json(client, ACTION_BASE, params=params)
386-
except Exception as e:
387+
except EXPECTED_HTTP_ERRORS as e:
387388
return f"Wikipedia summary failed: {e}"
388389
pages = (data.get("query") or {}).get("pages") or []
389390
if not pages or pages[0].get("missing"):
@@ -427,7 +428,7 @@ async def wikipedia_sections(
427428
"formatversion": "2",
428429
},
429430
)
430-
except Exception as e:
431+
except EXPECTED_HTTP_ERRORS as e:
431432
return f"Wikipedia section listing failed: {e}"
432433
sections = (data.get("parse") or {}).get("sections") or []
433434
if not sections:
@@ -457,7 +458,7 @@ async def wikipedia_query_summary(
457458
return cached
458459
try:
459460
rendered_title, url, extract = await _page_extract(t)
460-
except Exception as e:
461+
except EXPECTED_HTTP_ERRORS as e:
461462
return f"Wikipedia query summary failed: {e}"
462463
if rendered_title is None:
463464
return extract
@@ -498,7 +499,7 @@ async def wikipedia_key_facts(
498499
return cached
499500
try:
500501
rendered_title, url, extract = await _page_extract(t)
501-
except Exception as e:
502+
except EXPECTED_HTTP_ERRORS as e:
502503
return f"Wikipedia key facts failed: {e}"
503504
if rendered_title is None:
504505
return extract
@@ -554,7 +555,7 @@ async def wikipedia_section(
554555
"formatversion": "2",
555556
},
556557
)
557-
except Exception as e:
558+
except EXPECTED_HTTP_ERRORS as e:
558559
return f"Wikipedia section listing failed: {e}"
559560
sections = (sec_data.get("parse") or {}).get("sections") or []
560561
if not sections:
@@ -585,7 +586,7 @@ async def wikipedia_section(
585586
"formatversion": "2",
586587
},
587588
)
588-
except Exception as e:
589+
except EXPECTED_HTTP_ERRORS as e:
589590
return f"Wikipedia section fetch failed: {e}"
590591

591592
# Prefer the rendered HTML stripped (cleaner than raw wikitext).
@@ -619,7 +620,7 @@ async def _suggest_titles(query: str, n: int = 5) -> list[str]:
619620
return []
620621
data = r.json()
621622
return data[1] if isinstance(data, list) and len(data) > 1 else []
622-
except Exception:
623+
except EXPECTED_HTTP_ERRORS:
623624
return []
624625

625626

@@ -641,8 +642,24 @@ def default_config(self) -> dict[str, Any]:
641642
return dict(self._config)
642643

643644
def configure(self, overrides: dict[str, Any] | None = None, context: dict[str, Any] | None = None) -> None:
644-
if overrides:
645-
self._config.update(overrides)
645+
if not overrides:
646+
return
647+
648+
unknown = set(overrides) - set(self._config)
649+
if unknown:
650+
raise ValueError(f"Unknown WikipediaSearchTool override(s): {sorted(unknown)}")
651+
652+
if "num_results" in overrides:
653+
num_results = int(overrides["num_results"])
654+
if num_results < 1 or num_results > 5:
655+
raise ValueError("num_results must be between 1 and 5.")
656+
self._config["num_results"] = num_results
657+
if "max_chars" in overrides:
658+
self._config["max_chars"] = max(200, min(int(overrides["max_chars"]), 1500))
659+
if "max_sections" in overrides:
660+
self._config["max_sections"] = max(1, min(int(overrides["max_sections"]), 50))
661+
if "count" in overrides:
662+
self._config["count"] = max(1, min(int(overrides["count"]), MAX_FACTS))
646663

647664
async def list_tools(self) -> list[dict[str, Any]]:
648665
return [
@@ -696,11 +713,14 @@ async def list_tools(self) -> list[dict[str, Any]]:
696713
},
697714
{
698715
"name": "wikipedia-query-summary",
699-
"description": "Search for a query, then summarize the top result.",
716+
"description": "Return a compact snippet around a query inside a Wikipedia article.",
700717
"input_schema": {
701718
"type": "object",
702-
"properties": {"query": {"type": "string", "description": "Search query."}},
703-
"required": ["query"],
719+
"properties": {
720+
"title": {"type": "string", "description": "Article title."},
721+
"query": {"type": "string", "description": "Term or phrase to locate within the article."},
722+
},
723+
"required": ["title", "query"],
704724
},
705725
},
706726
{
@@ -717,24 +737,22 @@ async def list_tools(self) -> list[dict[str, Any]]:
717737
async def execute(self, tool_name: str, arguments: dict[str, Any], extra_args: dict[str, Any] | None = None):
718738
arguments = dict(arguments or {})
719739
if tool_name == "wikipedia-search":
720-
arguments.setdefault("num_results", self._config.get("num_results", 3))
740+
arguments.setdefault("num_results", self._config["num_results"])
721741
return await wikipedia_search(**arguments)
722742
if tool_name == "wikipedia-page":
723-
arguments.setdefault("max_chars", self._config.get("max_chars", EXTRACT_LIMIT))
724743
return await wikipedia_page(**arguments)
725744
if tool_name == "wikipedia-summary":
726-
arguments.setdefault("max_chars", self._config.get("max_chars", 900))
745+
arguments.setdefault("max_chars", self._config["max_chars"])
727746
return await wikipedia_summary(**arguments)
728747
if tool_name == "wikipedia-sections":
729-
arguments.setdefault("max_sections", self._config.get("max_sections", 40))
748+
arguments.setdefault("max_sections", self._config["max_sections"])
730749
return await wikipedia_sections(**arguments)
731750
if tool_name == "wikipedia-section":
732-
arguments.setdefault("max_chars", self._config.get("max_chars", EXTRACT_LIMIT))
733751
return await wikipedia_section(**arguments)
734752
if tool_name == "wikipedia-query-summary":
735-
arguments.setdefault("max_chars", self._config.get("max_chars", 900))
753+
arguments.setdefault("max_chars", self._config["max_chars"])
736754
return await wikipedia_query_summary(**arguments)
737755
if tool_name == "wikipedia-key-facts":
738-
arguments.setdefault("count", self._config.get("count", 5))
756+
arguments.setdefault("count", self._config["count"])
739757
return await wikipedia_key_facts(**arguments)
740758
return f"Error: unknown tool '{tool_name}'"

tests/test_mcp_clients.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1043,3 +1043,36 @@ async def test_wikipedia_direct_list_tools(self):
10431043
search_tool = next(t for t in tools if t["name"] == "wikipedia-search")
10441044
assert "query" in search_tool["input_schema"]["properties"]
10451045
assert "num_results" not in search_tool["input_schema"]["properties"]
1046+
1047+
query_summary_tool = next(t for t in tools if t["name"] == "wikipedia-query-summary")
1048+
assert {"title", "query"} <= set(query_summary_tool["input_schema"]["properties"])
1049+
assert set(query_summary_tool["input_schema"]["required"]) == {"title", "query"}
1050+
1051+
@pytest.mark.asyncio
1052+
async def test_wikipedia_execute_dispatch_contracts(self, monkeypatch):
1053+
from nemo_skills.mcp.servers import wikipedia_tool
1054+
from nemo_skills.mcp.servers.wikipedia_tool import WikipediaSearchTool
1055+
1056+
async def fake_page(title):
1057+
return f"page:{title}"
1058+
1059+
async def fake_section(title, section):
1060+
return f"section:{title}:{section}"
1061+
1062+
async def fake_query_summary(title, query, max_chars=700):
1063+
return f"query-summary:{title}:{query}:{max_chars}"
1064+
1065+
monkeypatch.setattr(wikipedia_tool, "wikipedia_page", fake_page)
1066+
monkeypatch.setattr(wikipedia_tool, "wikipedia_section", fake_section)
1067+
monkeypatch.setattr(wikipedia_tool, "wikipedia_query_summary", fake_query_summary)
1068+
1069+
tool = WikipediaSearchTool()
1070+
assert await tool.execute("wikipedia-page", {"title": "Hydrogen"}) == "page:Hydrogen"
1071+
assert (
1072+
await tool.execute("wikipedia-section", {"title": "Hydrogen", "section": "Isotopes"})
1073+
== "section:Hydrogen:Isotopes"
1074+
)
1075+
assert (
1076+
await tool.execute("wikipedia-query-summary", {"title": "Hydrogen", "query": "isotope"})
1077+
== "query-summary:Hydrogen:isotope:2500"
1078+
)

0 commit comments

Comments
 (0)