Skip to content

Commit c5f4536

Browse files
committed
Fix LobeHub skill package resources
1 parent bdebca3 commit c5f4536

38 files changed

Lines changed: 4766 additions & 9 deletions

.claude-plugin/plugin.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "mineru",
3-
"version": "3.3.0",
3+
"version": "3.3.1",
44
"description": "Parse PDF / Office / image files into clean Markdown via MinerU — zero-dependency, AI-Native, auto-routing between the free Agent API and the token-gated Standard API, with 15 content-tool delivery sinks.",
55
"author": {
66
"name": "Nebutra",

.github/workflows/publish-skill.yml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@ jobs:
4949
uses: astral-sh/setup-uv@v5
5050
- name: Smoke test CLI via uv (PEP 723 inline script, zero-install)
5151
run: uv run --script scripts/mineru.py --version
52+
- name: Build self-contained marketplace skill
53+
run: python scripts/build_skill_package.py --out dist/marketplace-skill
5254

5355
publish-clawhub:
5456
name: Publish to ClawHub (OpenClaw)
@@ -73,8 +75,9 @@ jobs:
7375
CLAWHUB_DISABLE_TELEMETRY: "1"
7476
if: ${{ env.CLAWHUB_TOKEN != '' }}
7577
run: |
78+
python scripts/build_skill_package.py --out dist/marketplace-skill
7679
clawhub login --token "$CLAWHUB_TOKEN"
77-
clawhub skill publish "$PWD/skills/mineru" \
80+
clawhub skill publish "$PWD/dist/marketplace-skill" \
7881
--slug mineru-skill \
7982
--name "MinerU PDF Parser" \
8083
--version "${GITHUB_REF_NAME#v}" \

.github/workflows/release.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,10 @@ jobs:
3737
- name: Create skill package
3838
run: |
3939
mkdir -p dist
40+
python3 scripts/build_skill_package.py --out dist/marketplace-skill
4041
zip -r dist/mineru-skill.zip \
4142
SKILL.md \
43+
dist/marketplace-skill/ \
4244
skills/mineru/ \
4345
.claude-plugin/ \
4446
scripts/ \

SKILL.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@ name: mineru
33
description: "An AI-Native skill for parsing PDF / Office / image files into clean Markdown with MinerU — a fast, zero-config document parser for AI agents. Works with NO token via the lightweight Agent API and auto-upgrades to the Standard API (token) for large files, batches, and DOCX/HTML/LaTeX export. Use when: (1) Converting PDF/Word/PPT/Excel/image to Markdown, (2) Extracting text, tables, formulas, or running OCR on scanned docs, (3) Batch-parsing a folder in parallel, (4) Piping parsed Markdown straight back to an agent or into Obsidian."
44
homepage: https://mineru.net
55
metadata:
6+
author: Nebutra
7+
version: "3.3.1"
8+
argument-hint: <pdf-file-or-url>
69
openclaw:
710
emoji: "📄"
811
requires:

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "mineru-skill"
7-
version = "3.3.0"
7+
version = "3.3.1"
88
description = "AI-Native, zero-dependency CLI that parses PDF/Office/image files into clean Markdown via MinerU (Agent + Standard APIs, auto-routing)"
99
readme = "README.md"
1010
license = "MIT"

scripts/build_skill_package.py

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
#!/usr/bin/env python3
2+
"""Build a self-contained marketplace skill directory."""
3+
4+
from __future__ import annotations
5+
6+
import argparse
7+
import shutil
8+
from pathlib import Path
9+
10+
11+
ROOT = Path(__file__).resolve().parents[1]
12+
DEFAULT_OUTPUT = ROOT / "dist" / "marketplace-skill"
13+
14+
PACKAGE_PATHS = [
15+
"SKILL.md",
16+
"scripts",
17+
"references",
18+
"LICENSE",
19+
"README.md",
20+
"README_CN.md",
21+
]
22+
23+
EXCLUDED_DIRS = {"__pycache__", ".pytest_cache", ".ruff_cache"}
24+
EXCLUDED_SUFFIXES = {".pyc", ".pyo"}
25+
EXCLUDED_FILES = {"build_skill_package.py"}
26+
27+
28+
def _copy_file(src: Path, dst: Path) -> None:
29+
dst.parent.mkdir(parents=True, exist_ok=True)
30+
shutil.copy2(src, dst)
31+
32+
33+
def _copy_tree(src: Path, dst: Path) -> None:
34+
for path in src.rglob("*"):
35+
rel = path.relative_to(src)
36+
if any(part in EXCLUDED_DIRS for part in rel.parts):
37+
continue
38+
target = dst / rel
39+
if path.is_dir():
40+
target.mkdir(parents=True, exist_ok=True)
41+
continue
42+
if path.name in EXCLUDED_FILES:
43+
continue
44+
if path.suffix in EXCLUDED_SUFFIXES:
45+
continue
46+
_copy_file(path, target)
47+
48+
49+
def build_package(output: Path = DEFAULT_OUTPUT) -> Path:
50+
if output.exists():
51+
shutil.rmtree(output)
52+
output.mkdir(parents=True, exist_ok=True)
53+
54+
for rel_path in PACKAGE_PATHS:
55+
src = ROOT / rel_path
56+
dst = output / rel_path
57+
if src.is_dir():
58+
_copy_tree(src, dst)
59+
else:
60+
_copy_file(src, dst)
61+
62+
return output
63+
64+
65+
def main() -> None:
66+
parser = argparse.ArgumentParser(description="Build a self-contained marketplace skill.")
67+
parser.add_argument("--out", type=Path, default=DEFAULT_OUTPUT, help="Output directory")
68+
args = parser.parse_args()
69+
70+
output = build_package(args.out)
71+
file_count = sum(1 for path in output.rglob("*") if path.is_file())
72+
print(f"Built marketplace skill package at {output} ({file_count} files)")
73+
74+
75+
if __name__ == "__main__":
76+
main()

scripts/mineru.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@
5353
from pathlib import Path, PurePosixPath
5454
from typing import Optional
5555

56-
__version__ = "3.3.0"
56+
__version__ = "3.3.1"
5757

5858
# --------------------------------------------------------------------------- #
5959
# Constants (kept in sync with https://mineru.net/apiManage/docs)

skills/mineru/SKILL.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ name: mineru
33
description: An AI-Native skill for parsing PDF / Office / image files into Markdown with MinerU — a fast, zero-config document parser for AI agents. Works with NO token via the Agent API and auto-upgrades to the Standard API (token) for large files, batches, and DOCX/HTML/LaTeX export. Use when converting PDF/Word/PPT/Excel/image documents, extracting text/tables/formulas, running OCR, or batch processing.
44
metadata:
55
author: Nebutra
6-
version: "3.3.0"
6+
version: "3.3.1"
77
argument-hint: <pdf-file-or-url>
88
---
99

Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
# MinerU API Reference
2+
3+
Official docs: https://mineru.net/apiManage/docs · Token: https://mineru.net/apiManage/token
4+
5+
MinerU exposes **two** document-parsing APIs. This skill auto-routes between them.
6+
7+
| | 🎯 Standard API | ⚡ Agent API (lightweight) |
8+
|---|---|---|
9+
| Base URL | `https://mineru.net/api/v4` | `https://mineru.net/api/v1/agent` |
10+
| Token | **required** (`Bearer`) | **none** (IP rate-limited) |
11+
| Models | `pipeline` / `vlm` / `MinerU-HTML` | fixed lightweight `pipeline` |
12+
| File size | ≤ 200 MB | ≤ 10 MB |
13+
| Pages | ≤ 200 | ≤ 20 |
14+
| Batch | ≤ 50 per request | single file only |
15+
| Output | zip (Markdown + JSON, optional DOCX/HTML/LaTeX) | Markdown only (CDN link) |
16+
| Designed for | high-accuracy / complex / batch | AI-agent / quick / no-login |
17+
18+
Free Standard-API quota: **1000 pages/day at highest priority** (overflow is lower priority).
19+
20+
---
21+
22+
## Authentication (Standard API)
23+
24+
```
25+
Authorization: Bearer YOUR_API_TOKEN
26+
```
27+
28+
Get a token at https://mineru.net/apiManage/token.
29+
30+
> **Response envelopes.** Business endpoints return `{"code":0,"data":{…},"msg":"ok"}`.
31+
> The auth/gateway layer returns a *different* shape on failure:
32+
> `{"success":false,"msgCode":"A0202","msg":"user authenticate failed"}`.
33+
> Clients must handle both — this skill maps `msgCode` to the same error hints.
34+
35+
---
36+
37+
## Standard API endpoints (`/api/v4`)
38+
39+
### Single URL — `POST /extract/task`
40+
41+
```json
42+
{
43+
"url": "https://example.com/doc.pdf",
44+
"model_version": "vlm",
45+
"is_ocr": false,
46+
"enable_formula": true,
47+
"enable_table": true,
48+
"language": "ch",
49+
"page_ranges": "1-10",
50+
"extra_formats": ["docx", "html"],
51+
"data_id": "my-document"
52+
}
53+
```
54+
Response → `{ "code": 0, "data": { "task_id": "…" } }`. HTML inputs require `model_version: "MinerU-HTML"`.
55+
56+
### Get task result — `GET /extract/task/{task_id}`
57+
58+
```json
59+
{ "code": 0, "data": { "task_id": "", "state": "done", "full_zip_url": "https://…", "err_msg": "" } }
60+
```
61+
62+
### Batch local upload — `POST /file-urls/batch`
63+
64+
Returns signed upload URLs; PUT each file (no `Content-Type`). Up to **50** files / request.
65+
66+
```json
67+
{ "files": [ { "name": "doc.pdf", "data_id": "doc" } ], "model_version": "vlm" }
68+
```
69+
Response → `{ "code": 0, "data": { "batch_id": "…", "file_urls": ["https://…"] } }`.
70+
71+
### Batch URL — `POST /extract/task/batch`
72+
73+
```json
74+
{ "files": [ { "url": "https://…/doc.pdf", "data_id": "doc" } ], "model_version": "vlm" }
75+
```
76+
77+
### Batch results — `GET /extract-results/batch/{batch_id}`
78+
79+
```json
80+
{ "code": 0, "data": { "batch_id": "", "extract_result": [
81+
{ "file_name": "doc.pdf", "state": "done", "full_zip_url": "https://…" }
82+
] } }
83+
```
84+
85+
---
86+
87+
## Agent API endpoints (`/api/v1/agent`) — no token
88+
89+
### URL — `POST /parse/url`
90+
91+
```json
92+
{ "url": "https://…/doc.pdf", "language": "ch", "enable_table": true, "is_ocr": false, "enable_formula": true, "page_range": "1-10" }
93+
```
94+
`page_range` accepts `from-to` or a single page only (no commas). Returns `{ "code": 0, "data": { "task_id": "…" } }`.
95+
96+
### File — `POST /parse/file`
97+
98+
```json
99+
{ "file_name": "doc.pdf", "language": "ch" }
100+
```
101+
Response → `{ "data": { "task_id": "…", "file_url": "https://oss…" } }`; PUT the file to `file_url`.
102+
103+
### Result — `GET /parse/{task_id}`
104+
105+
```json
106+
{ "code": 0, "data": { "task_id": "", "state": "done", "markdown_url": "https://cdn…/full.md" } }
107+
```
108+
109+
---
110+
111+
## Task states
112+
113+
`pending` (queued) · `running` (parsing) · `converting` (format conversion) ·
114+
`uploading` (downloading source, Agent) · `waiting-file` (awaiting upload) ·
115+
`done` (complete) · `failed` (error).
116+
117+
---
118+
119+
## Parameters
120+
121+
| Parameter | Type | Default | Notes |
122+
|-----------|------|---------|-------|
123+
| `model_version` | string | `pipeline` | `pipeline`, `vlm` (recommended), `MinerU-HTML` (HTML only) |
124+
| `is_ocr` | bool | `false` | OCR for scanned docs (pipeline/vlm) |
125+
| `enable_formula` | bool | `true` | Formula recognition |
126+
| `enable_table` | bool | `true` | Table recognition |
127+
| `language` | string | `ch` | OCR language (see official `language` table) |
128+
| `page_ranges` | string | all | Standard: `"2,4-6"`; Agent `page_range`: `"1-10"` only |
129+
| `extra_formats` | array | `[]` | `docx` / `html` / `latex` (Standard only) |
130+
| `data_id` | string || `[A-Za-z0-9_.-]`, ≤ 128 chars |
131+
| `no_cache` | bool | `false` | Bypass URL cache (Standard) |
132+
| `cache_tolerance` | int | `900` | Cache TTL seconds (Standard) |
133+
134+
---
135+
136+
## Limits
137+
138+
| | Standard | Agent |
139+
|---|---|---|
140+
| File size | 200 MB | 10 MB |
141+
| Pages | 200 | 20 |
142+
| Batch | 50 / request | 1 |
143+
| Quota | 1000 pages/day priority | IP rate-limited (HTTP 429) |
144+
145+
Supported types: PDF, images (png/jpg/jpeg/jp2/webp/gif/bmp), Doc(x), Ppt(x), Xls(x); HTML is Standard-only.
146+
147+
---
148+
149+
## Error codes
150+
151+
| Code | Meaning |
152+
|------|---------|
153+
| `A0202` | Invalid token |
154+
| `A0211` | Token expired |
155+
| `-500` | Parameter error |
156+
| `-10001` / `-10002` | Service error / invalid params |
157+
| `-60002` | Unsupported file format |
158+
| `-60003` / `-60004` | File read failed / empty file |
159+
| `-60005` | File too large (> 200 MB) |
160+
| `-60006` | Too many pages (> 200) |
161+
| `-60008` | File read timeout (URL unreachable) |
162+
| `-60010` | Parse failed |
163+
| `-60015` / `-60016` | File / format conversion failed |
164+
| `-60018` | Daily quota reached |
165+
| `-60022` | Web page read failed (rate-limited) |
166+
| **Agent API** | |
167+
| `-30001` | Exceeds Agent 10 MB limit → use Standard API |
168+
| `-30002` | Unsupported file type for Agent |
169+
| `-30003` | Exceeds Agent 20-page limit → use Standard API or `--pages` |
170+
| `-30004` | Invalid request parameters |

0 commit comments

Comments
 (0)