Skip to content

Commit 61998a0

Browse files
authored
Add SerpApi engine resources (#24)
* Add engine resources and docs updates * Add engine generator and refresh engine schemas * Address formatting and sanitizer feedback
1 parent 101c016 commit 61998a0

File tree

113 files changed

+53648
-545
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

113 files changed

+53648
-545
lines changed

Dockerfile

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,15 @@ WORKDIR /app
77
COPY pyproject.toml /app/
88
COPY README.md /app/
99
COPY src /app/src
10+
COPY engines /app/engines
11+
COPY build-engines.py /app/build-engines.py
1012

1113
RUN uv sync
1214

1315
ENV PATH="/app/.venv/bin:$PATH"
1416

17+
RUN python /app/build-engines.py
18+
1519
EXPOSE 8000
1620

1721
CMD ["python", "src/server.py"]

README.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ A Model Context Protocol (MCP) server implementation that integrates with [SerpA
88
## Features
99

1010
- **Multi-Engine Search**: Google, Bing, Yahoo, DuckDuckGo, YouTube, eBay, and [more](https://serpapi.com/search-engine-apis)
11+
- **Engine Resources**: Per-engine parameter schemas available via MCP resources (see Search Tool)
1112
- **Real-time Weather Data**: Location-based weather with forecasts via search queries
1213
- **Stock Market Data**: Company financials and market data through search integration
1314
- **Dynamic Result Processing**: Automatically detects and formats different result types
@@ -68,6 +69,7 @@ curl "https://mcp.serpapi.com/mcp" -H "Authorization: Bearer your_key" -d '...'
6869
## Search Tool
6970

7071
The MCP server has one main Search Tool that supports all SerpApi engines and result types. You can find all available parameters on the [SerpApi API reference](https://serpapi.com/search-api).
72+
Engine parameter schemas are also exposed as MCP resources: `serpapi://engines` (index) and `serpapi://engines/<engine>`.
7173

7274
The parameters you can provide are specific for each API engine. Some sample parameters are provided below:
7375

@@ -87,7 +89,7 @@ The parameters you can provide are specific for each API engine. Some sample par
8789
{"name": "search", "arguments": {"params": {"q": "detailed search"}, "mode": "complete"}}
8890
```
8991

90-
**Supported Engines:** Google, Bing, Yahoo, DuckDuckGo, YouTube, eBay, and more.
92+
**Supported Engines:** Google, Bing, Yahoo, DuckDuckGo, YouTube, eBay, and more (see `serpapi://engines`).
9193

9294
**Result Types:** Answer boxes, organic results, news, images, shopping - automatically detected and formatted.
9395

@@ -100,6 +102,9 @@ uv sync && uv run src/server.py
100102
# Docker
101103
docker build -t serpapi-mcp . && docker run -p 8000:8000 serpapi-mcp
102104

105+
# Regenerate engine resources (Playground scrape)
106+
python build-engines.py
107+
103108
# Testing with MCP Inspector
104109
npx @modelcontextprotocol/inspector
105110
# Configure: URL mcp.serpapi.com/YOUR_KEY/mcp, Transport "Streamable HTTP transport"

build-engines.py

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
#!/usr/bin/env python3
2+
"""Build SerpApi engine parameter data for MCP usage."""
3+
4+
from __future__ import annotations
5+
6+
import html
7+
import json
8+
from pathlib import Path
9+
from urllib.request import Request, urlopen
10+
11+
from bs4 import BeautifulSoup
12+
from markdownify import markdownify
13+
14+
PLAYGROUND_URL = "https://serpapi.com/playground"
15+
EXCLUDED_ENGINES = {
16+
"google_scholar_profiles",
17+
"google_light_fast",
18+
"google_lens_image_sources",
19+
}
20+
PARAM_KEEP_KEYS = {"html", "type", "options", "required"}
21+
OUTPUT_DIR = Path("engines")
22+
TIMEOUT_SECONDS = 30
23+
USER_AGENT = "Mozilla/5.0"
24+
25+
26+
def html_to_markdown(value: str) -> str:
27+
"""Convert HTML to markdown, normalizing whitespace."""
28+
md = markdownify(html.unescape(value), strip=["a"])
29+
return " ".join(md.split())
30+
31+
32+
def normalize_options(options: list[object]) -> list[object]:
33+
"""Normalize option values, simplifying [value, label] pairs where possible."""
34+
normalized = []
35+
for option in options:
36+
if isinstance(option, list) and option:
37+
value = option[0]
38+
label = option[1] if len(option) > 1 else None
39+
if (
40+
label is not None
41+
and (
42+
isinstance(value, (int, float))
43+
or (isinstance(value, str) and value.isdigit())
44+
)
45+
and value != label
46+
):
47+
normalized.append(option)
48+
else:
49+
normalized.append(value)
50+
else:
51+
normalized.append(option)
52+
return normalized
53+
54+
55+
def fetch_props(url: str) -> dict[str, object]:
56+
"""Fetch playground HTML and extract React props."""
57+
req = Request(url, headers={"User-Agent": USER_AGENT})
58+
with urlopen(req, timeout=TIMEOUT_SECONDS) as resp:
59+
page_html = resp.read().decode("utf-8", errors="ignore")
60+
soup = BeautifulSoup(page_html, "html.parser")
61+
node = soup.find(attrs={"data-react-props": True})
62+
if not node:
63+
raise RuntimeError("Failed to locate data-react-props in playground HTML.")
64+
return json.loads(html.unescape(node["data-react-props"]))
65+
66+
67+
def normalize_engine(engine: str, payload: dict[str, object]) -> dict[str, object]:
68+
"""Normalize engine payload, extracting relevant parameter metadata."""
69+
normalized_params: dict[str, dict[str, object]] = {}
70+
common_params: dict[str, dict[str, object]] = {}
71+
if isinstance(payload, dict):
72+
for group_name, group in payload.items():
73+
if not isinstance(group, dict):
74+
continue
75+
if not isinstance(params := group.get("parameters"), dict):
76+
continue
77+
for param_name, param in params.items():
78+
if not isinstance(param, dict):
79+
continue
80+
filtered = {k: v for k, v in param.items() if k in PARAM_KEEP_KEYS}
81+
if isinstance(options := filtered.get("options"), list):
82+
filtered["options"] = normalize_options(options)
83+
if isinstance(html_value := filtered.pop("html", None), str):
84+
filtered["description"] = html_to_markdown(html_value)
85+
if filtered:
86+
filtered["group"] = group_name
87+
if group_name == "serpapi_parameters":
88+
common_params[param_name] = filtered
89+
else:
90+
normalized_params[param_name] = filtered
91+
92+
return {
93+
"engine": engine,
94+
"params": normalized_params,
95+
"common_params": common_params,
96+
}
97+
98+
99+
def main() -> int:
100+
"""Main entry point: fetch playground data and generate engine files."""
101+
props = fetch_props(PLAYGROUND_URL)
102+
if not isinstance(params := props.get("parameters"), dict):
103+
raise RuntimeError("Playground props missing 'parameters' map.")
104+
105+
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
106+
engines = []
107+
108+
for engine, payload in sorted(params.items()):
109+
if not isinstance(engine, str) or engine in EXCLUDED_ENGINES:
110+
continue
111+
if not isinstance(payload, dict):
112+
continue
113+
(OUTPUT_DIR / f"{engine}.json").write_text(
114+
json.dumps(normalize_engine(engine, payload), indent=2, ensure_ascii=False),
115+
encoding="utf-8",
116+
)
117+
engines.append(engine)
118+
119+
print(f"Wrote {len(engines)} engine files to {OUTPUT_DIR}")
120+
return 0
121+
122+
123+
if __name__ == "__main__":
124+
raise SystemExit(main())

0 commit comments

Comments
 (0)