Skip to content

Commit dbd3bb7

Browse files
authored
Merge pull request #12 from Imaging-Plaza/feature/deep-wiki-mcp
Feature/deep wiki mcp
2 parents 6ec6585 + 22ac4a0 commit dbd3bb7

9 files changed

Lines changed: 804 additions & 503 deletions

File tree

CHANGELOG.md

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,17 @@ All notable changes to this project will be documented in this file.
55
## [Unreleased]
66

77
### Added
8+
- **DeepWiki MCP integration**: Repository info tool now uses DeepWiki MCP server (https://mcp.deepwiki.com/sse) as primary source for GitHub repository documentation. DeepWiki provides fast, pre-indexed documentation access without API rate limits.
9+
- Automatic fallback to `repocards` library (replacing previous direct GitHub API implementation) when DeepWiki is unavailable or times out, ensuring robust repository information retrieval for both indexed and newly-created repositories.
10+
11+
### Changed
12+
- Updated `pydantic-ai` dependency to include MCP support via `pydantic-ai[mcp]` extra.
13+
- Enhanced `RepoSummaryOutput` schema to include `source` field indicating whether data came from "deepwiki" or "repocards".
14+
- Repository info tool logs now track data source (DeepWiki vs repocards) for observability.
15+
- Replaced previous direct GitHub API implementation with `repocards` library as the fallback mechanism for repository information retrieval.
816
- **YAML Model Configuration**: New `config.yaml` file for flexible model configuration supporting OpenAI, EPFL inference server, and any OpenAI-compatible API endpoints.
917
- **Multi-Model Support**: Can now configure different models for agent (main reasoning & tool selection).
1018
- **Configuration Module**: New `utils/config.py` with Pydantic models for type-safe configuration loading and validation.
11-
12-
### Changed
1319
- **Model Initialization**: Agent now uses configuration from `config.yaml`.
1420
- **API Client Creation**: OpenAI clients now support custom `base_url` for alternative API endpoints (EPFL, custom deployments).
1521
- **Dependency**: Added `pyyaml` to `pyproject.toml` dependencies.

pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ dependencies = [
1111
"pydantic>=2",
1212
"sentence-transformers",
1313
"openai>=1.30.0",
14-
"pydantic-ai",
14+
"pydantic-ai[mcp]",
1515
"requests",
1616
"python-dotenv",
1717
"gradio==5.42.0",
@@ -23,6 +23,7 @@ dependencies = [
2323
"imageio",
2424
"rdflib",
2525
"sparqlwrapper",
26+
"repocards",
2627
"pyyaml",
2728
]
2829

src/ai_agent/agent/agent.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,11 @@
1313
from utils.utils import _best_runnable_link
1414
from utils.config import get_config
1515
from .models import AgentToolSelection, ToolRunLog
16-
from .tools.repo_info_tool import tool_repo_summary, RepoSummaryInput, coerce_github_url_or_none
16+
from .tools.repo_info_tool import tool_repo_summary, RepoSummaryInput
1717
from .tools.rerank_tool import tool_rerank, RerankInput
1818
from .tools.search_tool import tool_search_tools, SearchToolsInput
1919
from .tools.gradio_space_tool import tool_run_example, RunExampleInput
20-
from .utils import AgentState, limit_tool_calls, cap_prepare
20+
from .utils import AgentState, limit_tool_calls, cap_prepare, coerce_github_url_or_none
2121

2222
log = logging.getLogger("agent.core")
2323

@@ -103,7 +103,7 @@ async def rerank(ctx: RunContext[AgentState], query: str, candidate_names: List[
103103
# return out.model_dump(mode="python")
104104

105105
@agent.tool(retries=0, prepare=cap_prepare)
106-
@limit_tool_calls("repo_info", cap=3)
106+
@limit_tool_calls("repo_info", cap=6)
107107
async def repo_info(ctx: RunContext[AgentState], url: str):
108108
norm_url = coerce_github_url_or_none(url)
109109
if not norm_url:
@@ -117,8 +117,13 @@ async def repo_info(ctx: RunContext[AgentState], url: str):
117117
return payload
118118

119119
try:
120-
out = tool_repo_summary(RepoSummaryInput(url=norm_url))
121-
ctx.deps.tool_calls.append({"tool": "repo_info", "url": norm_url, "truncated": out.truncated})
120+
out = await tool_repo_summary(RepoSummaryInput(url=norm_url))
121+
ctx.deps.tool_calls.append({
122+
"tool": "repo_info",
123+
"url": norm_url,
124+
"truncated": out.truncated,
125+
"source": out.source
126+
})
122127
return out.model_dump(mode="python")
123128
except Exception as e:
124129
ctx.deps.tool_calls.append({"tool": "repo_info", "url": norm_url, "error": str(e)})
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
from __future__ import annotations
2+
3+
import asyncio
4+
import logging
5+
from typing import Optional
6+
7+
from pydantic import BaseModel
8+
from pydantic_ai.mcp import MCPServerSSE
9+
10+
from .utils import _clip
11+
from ..utils import _coerce_owner_repo_ref
12+
13+
log = logging.getLogger("agent.deepwiki")
14+
15+
# DeepWiki MCP server endpoint (SSE transport)
16+
DEEPWIKI_SSE_URL = "https://mcp.deepwiki.com/sse"
17+
18+
# Timeout for DeepWiki operations (seconds)
19+
DEEPWIKI_TIMEOUT = 60
20+
21+
22+
23+
class DeepWikiInput(BaseModel):
24+
"""Input for DeepWiki operations."""
25+
url: str # GitHub repository URL or owner/repo format
26+
27+
28+
class DeepWikiContentsOutput(BaseModel):
29+
"""Output from read_wiki_contents."""
30+
success: bool
31+
contents: Optional[str] = None
32+
error: Optional[str] = None
33+
truncated: bool = False
34+
35+
36+
async def get_wiki_contents(input: DeepWikiInput) -> DeepWikiContentsOutput:
37+
"""
38+
Fetch repo docs from DeepWiki MCP (SSE) and return a clipped string
39+
to keep LLM token usage under control.
40+
"""
41+
owner, repo, _ = _coerce_owner_repo_ref(input.url)
42+
repo = f"{owner}/{repo}"
43+
44+
try:
45+
server = MCPServerSSE(DEEPWIKI_SSE_URL)
46+
47+
async with server:
48+
result = await asyncio.wait_for(
49+
server.direct_call_tool("read_wiki_contents", {"repoName": repo}),
50+
timeout=DEEPWIKI_TIMEOUT,
51+
)
52+
53+
text = None
54+
if isinstance(result, list):
55+
text = "\n".join([p for p in result if isinstance(p, str)]) or None
56+
57+
if text and text.strip():
58+
clipped_text, truncated = _clip(text.strip())
59+
return DeepWikiContentsOutput(
60+
success=True,
61+
contents=clipped_text,
62+
truncated=truncated,
63+
)
64+
65+
return DeepWikiContentsOutput(success=False, error="No content returned from DeepWiki")
66+
67+
except asyncio.TimeoutError:
68+
log.warning(f"DeepWiki timed out after {DEEPWIKI_TIMEOUT}s for {repo}")
69+
return DeepWikiContentsOutput(
70+
success=False,
71+
error=f"DeepWiki request timed out after {DEEPWIKI_TIMEOUT}s",
72+
)
73+
except Exception as e:
74+
log.error(f"Failed to get wiki contents for {repo}: {e}")
75+
return DeepWikiContentsOutput(
76+
success=False,
77+
error=f"Failed to connect to DeepWiki: {str(e)}",
78+
)
79+
80+
81+
__all__ = [
82+
"get_wiki_contents",
83+
"DeepWikiInput",
84+
"DeepWikiContentsOutput",
85+
]

0 commit comments

Comments
 (0)