Skip to content

Commit 418af7a

Browse files
committed
MCP documentation: Use resource index from cratedb-about
cratedb-about [1,2] includes a knowledge outline file in YAML format, `cratedb-outline.yaml`, and also provides a Python API to read and query it. [1] https://pypi.org/project/cratedb-about [2] https://github.com/crate/about
1 parent 42d4d18 commit 418af7a

File tree

6 files changed

+71
-34
lines changed

6 files changed

+71
-34
lines changed

cratedb_mcp/__main__.py

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,21 +2,24 @@
22
import httpx
33
from mcp.server.fastmcp import FastMCP
44

5-
from .knowledge import DOCUMENTATION_INDEX, Queries
6-
from .settings import DOCS_CACHE_TTL, HTTP_URL
5+
from .knowledge import DocumentationIndex, Queries, documentation_url_permitted
6+
from .settings import DOCS_CACHE_TTL, HTTP_TIMEOUT, HTTP_URL
77

88
# Configure Hishel, an httpx client with caching.
99
# Define one hour of caching time.
1010
controller = hishel.Controller(allow_stale=True)
1111
storage = hishel.SQLiteStorage(ttl=DOCS_CACHE_TTL)
1212
client = hishel.CacheClient(controller=controller, storage=storage)
1313

14+
# Load CrateDB documentation outline.
15+
documentation_index = DocumentationIndex()
16+
1417
# Create FastMCP application object.
1518
mcp = FastMCP("cratedb-mcp")
1619

1720

1821
def query_cratedb(query: str) -> list[dict]:
19-
return httpx.post(f'{HTTP_URL}/_sql', json={'stmt': query}).json()
22+
return httpx.post(f'{HTTP_URL}/_sql', json={'stmt': query}, timeout=HTTP_TIMEOUT).json()
2023

2124

2225
@mcp.tool(description="Send a SQL query to CrateDB, only 'SELECT' queries are allows, queries that"
@@ -27,17 +30,17 @@ def query_sql(query: str):
2730
return query_cratedb(query)
2831

2932
@mcp.tool(description='Gets an index with CrateDB documentation links to fetch, should download docs'
30-
' before answering questions. Has documentation name, description and link.')
33+
' before answering questions. Has documentation title, description, and link.')
3134
def get_cratedb_documentation_index():
32-
return DOCUMENTATION_INDEX
35+
return documentation_index.items()
3336

3437
@mcp.tool(description='Downloads the latest CrateDB documentation piece by link.'
3538
' Only used to download CrateDB docs.')
3639
def fetch_cratedb_docs(link: str):
37-
"""Fetches a CrateDB documentation link from GitHub raw content."""
38-
if 'https://raw.githubusercontent.com/crate/crate/' not in link:
39-
raise ValueError('Only github cratedb links can be fetched.')
40-
return client.get(link).text
40+
"""Fetches a CrateDB documentation link."""
41+
if not documentation_url_permitted(link):
42+
raise ValueError(f'Link is not permitted: {link}')
43+
return client.get(link, timeout=HTTP_TIMEOUT).text
4144

4245
@mcp.tool(description="Returns an aggregation of all CrateDB's schema, tables and their metadata")
4346
def get_table_metadata() -> list[dict]:

cratedb_mcp/knowledge.py

Lines changed: 38 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
# ruff: noqa: E501
2+
import cachetools
3+
from cratedb_about import CrateDbKnowledgeOutline
4+
25

36
class Queries:
47
TABLES_METADATA = """
@@ -83,20 +86,38 @@ class Queries:
8386
ORDER BY severity DESC"""
8487

8588

86-
# 'description' is very important, it gives context to the LLMs to properly decide which one to use.
87-
DOCUMENTATION_INDEX = [
88-
# TODO: Add all there are.
89-
{
90-
"name": "about/overview",
91-
"description": "The most important factual and technical information about CrateDB per medium-sized (~300kB) llms.txt context file.",
92-
"link": "https://cdn.crate.io/about/v1/llms.txt"},
93-
{
94-
"name": "scalar functions",
95-
"description": "documentation about specific scalar/methods/functions for CrateDB SQL",
96-
"link": "https://raw.githubusercontent.com/crate/crate/refs/heads/5.10/docs/general/builtins/scalar-functions.rst"},
97-
{
98-
"name": "optimize query 101",
99-
"description": "documentation about optimizing CrateDB SQL statements",
100-
"link": "https://raw.githubusercontent.com/crate/cratedb-guide/9ab661997d7704ecbb63af9c3ee33535957e24e6/docs/performance/optimization.rst"
101-
}
102-
]
89+
class DocumentationIndex:
90+
"""
91+
Define documentation sections supplied to the MCP server.
92+
Load knowledge outline from YAML file and read all items.
93+
94+
The `description` attribute is very important, it gives context
95+
to the LLM to properly decide which one to use.
96+
97+
Canonical source: https://github.com/crate/about/blob/main/src/cratedb_about/outline/cratedb-outline.yaml
98+
99+
Examples:
100+
```yaml
101+
- title: "CrateDB SQL functions"
102+
link: https://cratedb.com/docs/crate/reference/en/latest/_sources/general/builtins/scalar-functions.rst.txt
103+
description: The reference documentation about all SQL functions CrateDB provides.
104+
105+
- title: "Guide: CrateDB query optimization"
106+
link: https://cratedb.com/docs/guide/_sources/performance/optimization.rst.txt
107+
description: Essential principles for optimizing queries in CrateDB while avoiding the most common pitfalls.
108+
```
109+
"""
110+
111+
def __init__(self):
112+
self.outline = CrateDbKnowledgeOutline.load()
113+
114+
@cachetools.cached(cache={})
115+
def items(self):
116+
return self.outline.find_items().to_dict()
117+
118+
119+
def documentation_url_permitted(url: str) -> bool:
120+
return (
121+
url.startswith("https://cratedb.com/") or
122+
url.startswith("https://github.com/crate") or
123+
url.startswith("https://raw.githubusercontent.com/crate"))

cratedb_mcp/settings.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,3 +12,6 @@
1212
# TODO: Add software test after refactoring away from module scope.
1313
warnings.warn(f"Environment variable `CRATEDB_MCP_DOCS_CACHE_TTL` invalid: {e}. "
1414
f"Using default value: {DOCS_CACHE_TTL}.", category=UserWarning, stacklevel=2)
15+
16+
# Configure HTTP timeout for all conversations.
17+
HTTP_TIMEOUT = 10.0

pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@ dynamic = [
2020
"version",
2121
]
2222
dependencies = [
23-
"cratedb-about==0.0.3",
23+
"cachetools<6",
24+
"cratedb-about==0.0.4",
2425
"hishel<0.2",
2526
"mcp[cli]>=1.5.0",
2627
]

tests/test_knowledge.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,14 @@
1-
from cratedb_mcp.knowledge import DOCUMENTATION_INDEX, Queries
1+
from cratedb_mcp.knowledge import DocumentationIndex, Queries
22

33

44
def test_documentation_index():
5-
assert len(DOCUMENTATION_INDEX) == 3
6-
assert DOCUMENTATION_INDEX[1]["name"] == "scalar functions"
7-
assert DOCUMENTATION_INDEX[2]["name"] == "optimize query 101"
5+
documentation_index = DocumentationIndex()
6+
titles = [item["title"] for item in documentation_index.items()]
7+
assert len(titles) >= 50
8+
assert "CrateDB database" in titles
9+
assert "CrateDB features" in titles
10+
assert "CrateDB SQL reference: Scalar functions" in titles
11+
assert "Guide: CrateDB query optimization" in titles
812

913

1014
def test_queries():

tests/test_mcp.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,15 +15,20 @@ def test_get_documentation_index():
1515

1616
def test_fetch_docs_forbidden():
1717
with pytest.raises(ValueError) as ex:
18-
fetch_cratedb_docs("https://cratedb.com/docs/crate/reference/en/latest/_sources/general/builtins/scalar-functions.rst.txt")
19-
assert ex.match("Only github cratedb links can be fetched")
18+
fetch_cratedb_docs("https://example.com")
19+
assert ex.match("Link is not permitted: https://example.com")
2020

2121

22-
def test_fetch_docs_permitted():
22+
def test_fetch_docs_permitted_github():
2323
response = fetch_cratedb_docs("https://raw.githubusercontent.com/crate/crate/refs/heads/5.10/docs/general/builtins/scalar-functions.rst")
2424
assert "initcap" in response
2525

2626

27+
def test_fetch_docs_permitted_cratedb_com():
28+
response = fetch_cratedb_docs("https://cratedb.com/docs/crate/reference/en/latest/_sources/general/builtins/scalar-functions.rst.txt")
29+
assert "initcap" in response
30+
31+
2732
def test_query_sql_forbidden():
2833
with pytest.raises(ValueError) as ex:
2934
assert "RelationUnknown" in str(query_sql("INSERT INTO foobar (id) VALUES (42) RETURNING id"))

0 commit comments

Comments
 (0)