Add ArXiv search MCP tool with tests and dependency

tamohannes · tamohannes · commit b95bc5d59923 · 2026-04-21T10:35:42.000-07:00
Add ArxivSearchTool as a first-party MCP server for scientific paper
retrieval via the free arXiv API (no API key required). Includes:

- New nemo_skills/mcp/servers/arxiv_tool.py with arxiv-search and
  arxiv-get tools, rate limiting, retry logic, and response caching
- Declare `arxiv` dependency in tools/requirements.txt
- CI: install [tools] extra so MCP tool deps are available in tests
- Tests: config validation, offline stdio subprocess integration
  (list_tools, hide_args), invalid-id handling, and live API tests
- Docs: add ArxivSearchTool to built-in tools reference

Signed-off-by: tamohannes &lt;hovhannes.tamoyan@gmail.com&gt;
Made-with: Cursor
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -44,7 +44,7 @@ jobs:
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
-        pip install -e .[dev] --extra-index-url https://download.pytorch.org/whl/cpu
+        pip install -e ".[dev,tools]" --extra-index-url https://download.pytorch.org/whl/cpu
         # Clear pip cache
         pip cache purge || true
     - name: Build Images
diff --git a/docs/agentic_inference/tool_calling.md b/docs/agentic_inference/tool_calling.md
@@ -370,4 +370,5 @@ For vLLM, you may need to specify tool calling arguments:
 ### Built-in Tools
 
 - [`nemo_skills.mcp.servers.python_tool.PythonTool`](https://github.com/NVIDIA-NeMo/Skills/tree/main/nemo_skills/mcp/servers/python_tool.py) - Python code execution
+- [`nemo_skills.mcp.servers.arxiv_tool.ArxivSearchTool`](https://github.com/NVIDIA-NeMo/Skills/tree/main/nemo_skills/mcp/servers/arxiv_tool.py) - ArXiv paper search and retrieval (no API key required)
 - [`nemo_skills.mcp.servers.exa_tool.ExaTool`](https://github.com/NVIDIA-NeMo/Skills/tree/main/nemo_skills/mcp/servers/exa_tool.py) - Web search via Exa API
diff --git a/nemo_skills/mcp/servers/arxiv_tool.py b/nemo_skills/mcp/servers/arxiv_tool.py
@@ -0,0 +1,198 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""ArXiv search MCP tool for scientific paper retrieval.
+
+Runs outside the sandbox (no network blocking). Uses the free arXiv API
+via the `arxiv` pip package. No API key required.
+
+Prerequisites:
+    pip install arxiv
+
+Usage:
+    ++tool_modules=[nemo_skills.mcp.servers.arxiv_tool::ArxivSearchTool]
+"""
+
+import hashlib
+import json
+import logging
+import time
+from threading import Lock
+from typing import Annotated
+
+from mcp.server.fastmcp import FastMCP
+from pydantic import Field
+
+from nemo_skills.mcp.tool_providers import MCPClientTool
+
+logger = logging.getLogger(__name__)
+
+mcp = FastMCP(name="arxiv")
+
+MAX_RESULTS = 10
+_REQUEST_INTERVAL = 3.0
+_NUM_RETRIES = 5
+_INITIAL_DELAY = 5.0
+_MAX_DELAY = 60.0
+_CACHE_MAX_SIZE = 256
+
+_last_request_time = 0.0
+_rate_lock = Lock()
+_cache: dict[str, str] = {}
+
+
+def _rate_limit():
+    """Enforce minimum 3-second gap between ArXiv API calls."""
+    global _last_request_time
+    with _rate_lock:
+        now = time.monotonic()
+        wait = _REQUEST_INTERVAL - (now - _last_request_time)
+        if wait > 0:
+            time.sleep(wait)
+        _last_request_time = time.monotonic()
+
+
+def _cache_key(*args) -> str:
+    return hashlib.sha256(json.dumps(args, sort_keys=True).encode()).hexdigest()
+
+
+def _with_retry(fn):
+    """Execute fn with exponential backoff. Rate-limits each attempt."""
+    delay = _INITIAL_DELAY
+    for attempt in range(_NUM_RETRIES + 1):
+        try:
+            _rate_limit()
+            return fn()
+        except Exception as e:
+            if attempt == _NUM_RETRIES:
+                raise
+            logger.warning(
+                "ArXiv attempt %d/%d failed: %s — retrying in %.0fs",
+                attempt + 1,
+                _NUM_RETRIES + 1,
+                e,
+                delay,
+            )
+            time.sleep(delay)
+            delay = min(delay * 2, _MAX_DELAY)
+
+
+@mcp.tool(name="arxiv-search")
+def arxiv_search(
+    query: Annotated[
+        str, Field(description="Search query for arXiv papers (supports arXiv query syntax: au:, ti:, abs:, cat:).")
+    ],
+    max_results: Annotated[int, Field(description="Maximum number of results to return.")] = 3,
+) -> str:
+    """Search arXiv for scientific papers. Returns titles, abstracts, and URLs."""
+    import arxiv
+
+    if max_results > MAX_RESULTS:
+        max_results = MAX_RESULTS
+
+    key = _cache_key("search", query, max_results)
+    if key in _cache:
+        return _cache[key]
+
+    def _fetch():
+        client = arxiv.Client(page_size=max_results, num_retries=1, delay_seconds=0)
+        search = arxiv.Search(
+            query=query,
+            max_results=max_results,
+            sort_by=arxiv.SortCriterion.Relevance,
+        )
+        results = []
+        for paper in client.results(search):
+            results.append(
+                f"**{paper.title}**\n"
+                f"Authors: {', '.join(a.name for a in paper.authors[:5])}"
+                f"{'...' if len(paper.authors) > 5 else ''}\n"
+                f"Published: {paper.published.strftime('%Y-%m-%d')}\n"
+                f"URL: {paper.entry_id}\n"
+                f"Abstract: {paper.summary[:500]}{'...' if len(paper.summary) > 500 else ''}\n"
+            )
+        return results
+
+    try:
+        results = _with_retry(_fetch)
+        if not results:
+            return "No papers found for this query."
+        result_str = "\n---\n".join(results)
+        if len(_cache) < _CACHE_MAX_SIZE:
+            _cache[key] = result_str
+        return result_str
+    except Exception as e:
+        return f"ArXiv search failed: {e}"
+
+
+@mcp.tool(name="arxiv-get")
+def arxiv_get(
+    paper_id: Annotated[str, Field(description="arXiv paper ID (e.g. '2301.07041' or '2301.07041v1').")],
+) -> str:
+    """Fetch a specific arXiv paper by ID. Returns full title, authors, abstract, and metadata."""
+    import arxiv
+
+    key = _cache_key("get", paper_id)
+    if key in _cache:
+        return _cache[key]
+
+    def _fetch():
+        client = arxiv.Client(page_size=1, num_retries=1, delay_seconds=0)
+        search = arxiv.Search(id_list=[paper_id])
+        return next(client.results(search), None)
+
+    try:
+        paper = _with_retry(_fetch)
+        if paper is None:
+            return f"Paper {paper_id} not found on arXiv."
+        result_str = (
+            f"**{paper.title}**\n"
+            f"Authors: {', '.join(a.name for a in paper.authors)}\n"
+            f"Published: {paper.published.strftime('%Y-%m-%d')}\n"
+            f"Updated: {paper.updated.strftime('%Y-%m-%d')}\n"
+            f"Categories: {', '.join(paper.categories)}\n"
+            f"URL: {paper.entry_id}\n"
+            f"PDF: {paper.pdf_url}\n\n"
+            f"Abstract:\n{paper.summary}"
+        )
+        if len(_cache) < _CACHE_MAX_SIZE:
+            _cache[key] = result_str
+        return result_str
+    except Exception as e:
+        return f"ArXiv lookup failed: {e}"
+
+
+class ArxivSearchTool(MCPClientTool):
+    def __init__(self) -> None:
+        super().__init__()
+        self.apply_config_updates(
+            {
+                "client": "nemo_skills.mcp.clients.MCPStdioClient",
+                "client_params": {
+                    "command": "python",
+                    "args": ["-m", "nemo_skills.mcp.servers.arxiv_tool"],
+                },
+                "hide_args": {
+                    "arxiv-search": ["max_results"],
+                },
+            }
+        )
+
+
+def main():
+    mcp.run(transport="stdio")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/test_mcp_clients.py b/tests/test_mcp_clients.py
@@ -1002,3 +1002,72 @@ async def failing_delete(session_id):
     # Must not raise; session must be removed from the mapping regardless.
     await tool.cleanup_request("req-x")
     assert "req-x" not in tool.requests_to_sessions
+
+
+# ── ArXiv tool tests ─────────────────────────────────────────────────────
+
+
+class TestArxivTool:
+    def test_arxiv_tool_config(self):
+        from nemo_skills.mcp.servers.arxiv_tool import ArxivSearchTool
+
+        tool = ArxivSearchTool()
+        assert tool._config["client"] == "nemo_skills.mcp.clients.MCPStdioClient"
+        assert "-m" in tool._config["client_params"]["args"]
+        assert "nemo_skills.mcp.servers.arxiv_tool" in tool._config["client_params"]["args"]
+
+    @pytest.mark.live
+    def test_arxiv_search_live(self):
+        from nemo_skills.mcp.servers.arxiv_tool import arxiv_search
+
+        result = arxiv_search("quantum entanglement", max_results=2)
+        assert "Error" not in result
+        assert "**" in result
+
+    @pytest.mark.live
+    def test_arxiv_get_live(self):
+        from nemo_skills.mcp.servers.arxiv_tool import arxiv_get
+
+        result = arxiv_get("2301.07041")
+        assert "not found" not in result.lower()
+        assert "Abstract" in result
+
+    def test_arxiv_get_invalid_id(self):
+        from nemo_skills.mcp.servers.arxiv_tool import arxiv_get
+
+        result = arxiv_get("0000.00000")
+        assert "not found" in result.lower() or "failed" in result.lower()
+
+    @pytest.mark.asyncio
+    async def test_arxiv_stdio_list_tools(self):
+        """Launch ArxivSearchTool over a real stdio subprocess and verify tool listing."""
+        from nemo_skills.mcp.servers.arxiv_tool import ArxivSearchTool
+
+        tool = ArxivSearchTool()
+        tool.configure()
+
+        tools = await tool.list_tools()
+        tool_names = {t["name"] for t in tools}
+        assert "arxiv-search" in tool_names
+        assert "arxiv-get" in tool_names
+
+        await tool.shutdown()
+
+    @pytest.mark.asyncio
+    async def test_arxiv_stdio_hide_args(self):
+        """Verify hide_args removes max_results from the listed schema."""
+        from nemo_skills.mcp.servers.arxiv_tool import ArxivSearchTool
+
+        tool = ArxivSearchTool()
+        tool.configure()
+
+        tools = await tool.list_tools()
+        search_tool = next(t for t in tools if t["name"] == "arxiv-search")
+        schema_props = search_tool["input_schema"]["properties"]
+        assert "query" in schema_props
+        assert "max_results" not in schema_props
+
+        get_tool = next(t for t in tools if t["name"] == "arxiv-get")
+        assert "paper_id" in get_tool["input_schema"]["properties"]
+
+        await tool.shutdown()
diff --git a/tools/requirements.txt b/tools/requirements.txt
@@ -5,6 +5,7 @@
 # import time by the current implementation, but it excludes model-client
 # dependencies such as litellm and openai.
 
+arxiv
 httpx
 math-verify[antlr4_9_3]
 mcp