Skip to content

Commit 15f161e

Browse files
Panniantongclaude
andauthored
fix(reddit,bilibili): switch to rdt-cli and add bili-cli support (#235)
Reddit: Exa crawling had chronic CRAWL_LIVECRAWL_TIMEOUT issues. rdt-cli (304 stars, public-clis) works without login — search, read full posts, and comments all verified. Massive improvement. Bilibili: add bili-cli (590 stars) as optional enhanced backend for hot/rank/search/feed. yt-dlp remains for video metadata + subtitles. Also fix UA string (was "agent-reach/1.0", now proper browser UA). 75 tests passing. Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 794455c commit 15f161e

File tree

6 files changed

+1694
-76
lines changed

6 files changed

+1694
-76
lines changed

agent_reach/channels/bilibili.py

Lines changed: 18 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# -*- coding: utf-8 -*-
2-
"""Bilibili — video via yt-dlp, search via /x/web-interface API."""
2+
"""Bilibili — video via yt-dlp, search/browse via bili-cli or API."""
33

44
import json
55
import os
@@ -8,7 +8,7 @@
88
import urllib.request
99
from .base import Channel
1010

11-
_UA = "agent-reach/1.0"
11+
_UA = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36"
1212
_TIMEOUT = 10
1313
_SEARCH_API = "https://api.bilibili.com/x/web-interface/search/all/v2?keyword=test&page=1"
1414

@@ -24,23 +24,10 @@ def _search_api_ok() -> bool:
2424
return False
2525

2626

27-
def _bilisearch_ok() -> bool:
28-
"""Return True if yt-dlp bilisearch works without 412."""
29-
try:
30-
result = subprocess.run(
31-
["yt-dlp", "--flat-playlist", "--no-download", "-j",
32-
"bilisearch1:test"],
33-
capture_output=True, text=True, timeout=_TIMEOUT,
34-
)
35-
return result.returncode == 0
36-
except Exception:
37-
return False
38-
39-
4027
class BilibiliChannel(Channel):
4128
name = "bilibili"
42-
description = "B站视频和字幕"
43-
backends = ["yt-dlp", "B站搜索 API"]
29+
description = "B站视频、字幕和搜索"
30+
backends = ["yt-dlp", "bili-cli (可选)", "B站搜索 API"]
4431
tier = 1
4532

4633
def can_handle(self, url: str) -> bool:
@@ -53,28 +40,27 @@ def check(self, config=None):
5340
return "off", "yt-dlp 未安装。安装:pip install yt-dlp"
5441

5542
proxy = (config.get("bilibili_proxy") if config else None) or os.environ.get("BILIBILI_PROXY")
56-
57-
# 检测搜索 API 连通性
58-
api_ok = _search_api_ok()
59-
# 检测 yt-dlp bilisearch 是否 412
60-
ytdlp_search_ok = _bilisearch_ok()
43+
has_bili_cli = bool(shutil.which("bili"))
6144

6245
parts = []
6346

6447
# 视频读取状态
6548
if proxy:
6649
parts.append("视频读取:yt-dlp(代理已配置)")
6750
else:
68-
parts.append("视频读取:yt-dlp(本地环境,服务器可能需要代理)")
51+
parts.append("视频读取:yt-dlp")
6952

70-
# 搜索状态
71-
if api_ok:
72-
parts.append("搜索:B站 API 可用(/x/web-interface/search/all/v2)")
53+
# bili-cli 增强
54+
if has_bili_cli:
55+
parts.append("搜索/热门/排行:bili-cli 可用")
7356
else:
74-
parts.append("搜索:B站 API 不可达,搜索功能可能受限")
75-
76-
if not ytdlp_search_ok:
77-
parts.append("提示:yt-dlp bilisearch 不可用(可能 HTTP 412 反爬),搜索将走 B站 API")
78-
79-
status = "ok" if api_ok else "warn"
57+
# 检测搜索 API 连通性
58+
api_ok = _search_api_ok()
59+
if api_ok:
60+
parts.append("搜索:B站 API 可用")
61+
else:
62+
parts.append("搜索:B站 API 不可达")
63+
parts.append("提示:安装 bili-cli 可解锁热门/排行/动态:pipx install bilibili-cli")
64+
65+
status = "ok" if has_bili_cli or _search_api_ok() else "warn"
8066
return status, "。".join(parts)

agent_reach/channels/reddit.py

Lines changed: 12 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,15 @@
11
# -*- coding: utf-8 -*-
2-
"""Reddit — search and read via Exa (no direct Reddit API needed)."""
2+
"""Reddit — search and read via rdt-cli (public-clis/rdt-cli)."""
33

44
import shutil
55
import subprocess
66
from .base import Channel
77

88

9-
def _exa_available() -> bool:
10-
"""Return True if mcporter is installed and Exa MCP is configured."""
11-
mcporter = shutil.which("mcporter")
12-
if not mcporter:
13-
return False
14-
try:
15-
r = subprocess.run(
16-
[mcporter, "config", "list"], capture_output=True,
17-
encoding="utf-8", errors="replace", timeout=5
18-
)
19-
return "exa" in r.stdout.lower()
20-
except Exception:
21-
return False
22-
23-
249
class RedditChannel(Channel):
2510
name = "reddit"
26-
description = "Reddit 帖子和评论(通过 Exa 搜索和阅读)"
27-
backends = ["Exa via mcporter"]
11+
description = "Reddit 帖子和评论"
12+
backends = ["rdt-cli"]
2813
tier = 0
2914

3015
def can_handle(self, url: str) -> bool:
@@ -33,10 +18,14 @@ def can_handle(self, url: str) -> bool:
3318
return "reddit.com" in d or "redd.it" in d
3419

3520
def check(self, config=None):
36-
if _exa_available():
37-
return "ok", "通过 Exa 搜索和阅读 Reddit 内容(免费,无需代理)"
21+
rdt = shutil.which("rdt")
22+
if rdt:
23+
return "ok", (
24+
"rdt-cli 可用(搜索帖子、阅读全文、查看评论,无需登录)"
25+
)
3826
return "off", (
39-
"需要 mcporter + Exa MCP。安装:\n"
40-
" npm install -g mcporter\n"
41-
" mcporter config add exa https://mcp.exa.ai/mcp"
27+
"需要安装 rdt-cli:\n"
28+
" pipx install rdt-cli\n"
29+
"或:\n"
30+
" uv tool install rdt-cli"
4231
)

agent_reach/cli.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -243,9 +243,9 @@ def _cmd_install(args):
243243
# Environment-specific advice
244244
if env == "server":
245245
print()
246-
print("Tip: Reddit and Bilibili block server IPs.")
247-
print(" Reddit search still works via Exa (free).")
248-
print(" For full access: agent-reach configure proxy http://user:pass@ip:port")
246+
print("Tip: Bilibili may block server IPs.")
247+
print(" Reddit: rdt-cli works without proxy (pipx install rdt-cli).")
248+
print(" For Bilibili full access: agent-reach configure proxy http://user:pass@ip:port")
249249
print(" Cheap option: https://www.webshare.io ($1/month)")
250250

251251
# Test channels
@@ -1000,7 +1000,7 @@ def _cmd_configure(args):
10001000
if args.key == "proxy":
10011001
config.set("bilibili_proxy", value)
10021002
print(f"✅ Proxy configured for Bilibili!")
1003-
print(" Note: Reddit 已改为通过 Exa 访问,无需代理。")
1003+
print(" Note: Reddit 已改为通过 rdt-cli 访问,无需代理。")
10041004

10051005
elif args.key == "twitter-cookies":
10061006
# Accept two formats:
@@ -1433,9 +1433,9 @@ def _cmd_setup():
14331433
print(" 跳过。公开 API 也能用")
14341434
print()
14351435

1436-
# Step 3: Reddit — no config needed (uses Exa)
1437-
print("【信息】Reddit — 通过 Exa 搜索和阅读,无需配置")
1438-
print(" 搜索和阅读 Reddit 内容已通过 Exa 自动完成,免费无需代理。")
1436+
# Step 3: Reddit — rdt-cli
1437+
print("【信息】Reddit — 通过 rdt-cli 搜索和阅读,无需配置")
1438+
print(" 安装:pipx install rdt-cli")
14391439
print()
14401440

14411441
# Step 4: Groq (Whisper)

agent_reach/skill/SKILL.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -63,11 +63,11 @@ twitter search "query" --limit 10
6363
# YouTube/B站字幕
6464
yt-dlp --write-sub --skip-download -o "/tmp/%(id)s" "URL"
6565

66-
# Reddit 搜索(通过 Exa,免费无需代理)
67-
mcporter call 'exa.web_search_exa(query: "query", numResults: 5, includeDomains: ["reddit.com"])'
66+
# Reddit 搜索
67+
rdt search "query" --limit 10
6868

69-
# Reddit 读帖(通过 Exa)
70-
mcporter call 'exa.crawling_exa(urls: ["https://www.reddit.com/r/.../comments/.../"], maxCharacters: 10000)'
69+
# Reddit 读帖 + 评论
70+
rdt read POST_ID
7171

7272
# V2EX 热门
7373
curl -s "https://www.v2ex.com/api/topics/hot.json" -H "User-Agent: agent-reach/1.0"

agent_reach/skill/references/social.md

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -142,20 +142,23 @@ user = ch.get_user("Livid")
142142

143143
> **节点列表**: https://www.v2ex.com/planes
144144
145-
## Reddit (通过 Exa)
145+
## Reddit (rdt-cli)
146146

147-
Reddit 封锁了几乎所有非浏览器访问(包括代理 IP)。搜索和阅读全部通过 Exa 完成,免费且无需代理。
147+
```bash
148+
# 搜索帖子
149+
rdt search "query" --limit 10
148150

149-
### 搜索 Reddit 内容
151+
# 读帖子全文 + 评论
152+
rdt read POST_ID
150153

151-
```bash
152-
mcporter call 'exa.web_search_exa(query: "your search query", numResults: 5, includeDomains: ["reddit.com"])'
153-
```
154+
# 浏览 subreddit
155+
rdt sub python --limit 20
154156

155-
### 阅读完整帖子和评论
157+
# 浏览热门
158+
rdt popular --limit 10
156159

157-
```bash
158-
mcporter call 'exa.crawling_exa(urls: ["https://www.reddit.com/r/SUBREDDIT/comments/POST_ID/TITLE/"], maxCharacters: 10000)'
160+
# 浏览 /r/all
161+
rdt all --limit 10
159162
```
160163

161-
> **零配置**: 只需安装 Exa MCP(`agent-reach install --env=auto` 自动完成)。无需代理,无需 API Key
164+
> **安装**: `pipx install rdt-cli`。无需登录即可搜索和阅读

0 commit comments

Comments
 (0)