Nebutra
diff --git a/‎.claude-plugin/plugin.json‎
Lines changed: 1 addition & 1 deletion b/‎.claude-plugin/plugin.json‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/publish-skill.yml‎
Lines changed: 4 additions & 1 deletion b/‎.github/workflows/publish-skill.yml‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎.github/workflows/release.yml‎
Lines changed: 2 additions & 0 deletions b/‎.github/workflows/release.yml‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎SKILL.md‎
Lines changed: 3 additions & 0 deletions b/‎SKILL.md‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 1 addition & 1 deletion b/‎pyproject.toml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎scripts/build_skill_package.py‎
Lines changed: 76 additions & 0 deletions b/‎scripts/build_skill_package.py‎
Lines changed: 76 additions & 0 deletions
diff --git a/‎scripts/mineru.py‎
Lines changed: 1 addition & 1 deletion b/‎scripts/mineru.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎skills/mineru/SKILL.md‎
Lines changed: 1 addition & 1 deletion b/‎skills/mineru/SKILL.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎skills/mineru/references/api_reference.md‎
Lines changed: 170 additions & 0 deletions b/‎skills/mineru/references/api_reference.md‎
Lines changed: 170 additions & 0 deletions
@@ -1,6 +1,6 @@
 {
   "name": "mineru",
-  "version": "3.3.0",
+  "version": "3.3.1",
   "description": "Parse PDF / Office / image files into clean Markdown via MinerU — zero-dependency, AI-Native, auto-routing between the free Agent API and the token-gated Standard API, with 15 content-tool delivery sinks.",
   "author": {
     "name": "Nebutra",
 
@@ -49,6 +49,8 @@ jobs:
         uses: astral-sh/setup-uv@v5
       - name: Smoke test CLI via uv (PEP 723 inline script, zero-install)
         run: uv run --script scripts/mineru.py --version
+      - name: Build self-contained marketplace skill
+        run: python scripts/build_skill_package.py --out dist/marketplace-skill
 
   publish-clawhub:
     name: Publish to ClawHub (OpenClaw)
@@ -73,8 +75,9 @@ jobs:
           CLAWHUB_DISABLE_TELEMETRY: "1"
         if: ${{ env.CLAWHUB_TOKEN != '' }}
         run: |
+          python scripts/build_skill_package.py --out dist/marketplace-skill
           clawhub login --token "$CLAWHUB_TOKEN"
-          clawhub skill publish "$PWD/skills/mineru" \
+          clawhub skill publish "$PWD/dist/marketplace-skill" \
             --slug mineru-skill \
             --name "MinerU PDF Parser" \
             --version "${GITHUB_REF_NAME#v}" \
 
@@ -37,8 +37,10 @@ jobs:
       - name: Create skill package
         run: |
           mkdir -p dist
+          python3 scripts/build_skill_package.py --out dist/marketplace-skill
           zip -r dist/mineru-skill.zip \
             SKILL.md \
+            dist/marketplace-skill/ \
             skills/mineru/ \
             .claude-plugin/ \
             scripts/ \
 
@@ -3,6 +3,9 @@ name: mineru
 description: "An AI-Native skill for parsing PDF / Office / image files into clean Markdown with MinerU — a fast, zero-config document parser for AI agents. Works with NO token via the lightweight Agent API and auto-upgrades to the Standard API (token) for large files, batches, and DOCX/HTML/LaTeX export. Use when: (1) Converting PDF/Word/PPT/Excel/image to Markdown, (2) Extracting text, tables, formulas, or running OCR on scanned docs, (3) Batch-parsing a folder in parallel, (4) Piping parsed Markdown straight back to an agent or into Obsidian."
 homepage: https://mineru.net
 metadata:
+  author: Nebutra
+  version: "3.3.1"
+  argument-hint: <pdf-file-or-url>
   openclaw:
     emoji: "📄"
     requires:
 
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "mineru-skill"
-version = "3.3.0"
+version = "3.3.1"
 description = "AI-Native, zero-dependency CLI that parses PDF/Office/image files into clean Markdown via MinerU (Agent + Standard APIs, auto-routing)"
 readme = "README.md"
 license = "MIT"
 
@@ -0,0 +1,76 @@
+#!/usr/bin/env python3
+"""Build a self-contained marketplace skill directory."""
+
+from __future__ import annotations
+
+import argparse
+import shutil
+from pathlib import Path
+
+
+ROOT = Path(__file__).resolve().parents[1]
+DEFAULT_OUTPUT = ROOT / "dist" / "marketplace-skill"
+
+PACKAGE_PATHS = [
+    "SKILL.md",
+    "scripts",
+    "references",
+    "LICENSE",
+    "README.md",
+    "README_CN.md",
+]
+
+EXCLUDED_DIRS = {"__pycache__", ".pytest_cache", ".ruff_cache"}
+EXCLUDED_SUFFIXES = {".pyc", ".pyo"}
+EXCLUDED_FILES = {"build_skill_package.py"}
+
+
+def _copy_file(src: Path, dst: Path) -> None:
+    dst.parent.mkdir(parents=True, exist_ok=True)
+    shutil.copy2(src, dst)
+
+
+def _copy_tree(src: Path, dst: Path) -> None:
+    for path in src.rglob("*"):
+        rel = path.relative_to(src)
+        if any(part in EXCLUDED_DIRS for part in rel.parts):
+            continue
+        target = dst / rel
+        if path.is_dir():
+            target.mkdir(parents=True, exist_ok=True)
+            continue
+        if path.name in EXCLUDED_FILES:
+            continue
+        if path.suffix in EXCLUDED_SUFFIXES:
+            continue
+        _copy_file(path, target)
+
+
+def build_package(output: Path = DEFAULT_OUTPUT) -> Path:
+    if output.exists():
+        shutil.rmtree(output)
+    output.mkdir(parents=True, exist_ok=True)
+
+    for rel_path in PACKAGE_PATHS:
+        src = ROOT / rel_path
+        dst = output / rel_path
+        if src.is_dir():
+            _copy_tree(src, dst)
+        else:
+            _copy_file(src, dst)
+
+    return output
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description="Build a self-contained marketplace skill.")
+    parser.add_argument("--out", type=Path, default=DEFAULT_OUTPUT, help="Output directory")
+    args = parser.parse_args()
+
+    output = build_package(args.out)
+    file_count = sum(1 for path in output.rglob("*") if path.is_file())
+    print(f"Built marketplace skill package at {output} ({file_count} files)")
+
+
+if __name__ == "__main__":
+    main()
@@ -53,7 +53,7 @@
 from pathlib import Path, PurePosixPath
 from typing import Optional
 
-__version__ = "3.3.0"
+__version__ = "3.3.1"
 
 # --------------------------------------------------------------------------- #
 # Constants (kept in sync with https://mineru.net/apiManage/docs)
 
@@ -3,7 +3,7 @@ name: mineru
 description: An AI-Native skill for parsing PDF / Office / image files into Markdown with MinerU — a fast, zero-config document parser for AI agents. Works with NO token via the Agent API and auto-upgrades to the Standard API (token) for large files, batches, and DOCX/HTML/LaTeX export. Use when converting PDF/Word/PPT/Excel/image documents, extracting text/tables/formulas, running OCR, or batch processing.
 metadata:
   author: Nebutra
-  version: "3.3.0"
+  version: "3.3.1"
   argument-hint: <pdf-file-or-url>
 ---
 
 
@@ -0,0 +1,170 @@
+# MinerU API Reference
+
+Official docs: https://mineru.net/apiManage/docs · Token: https://mineru.net/apiManage/token
+
+MinerU exposes **two** document-parsing APIs. This skill auto-routes between them.
+
+| | 🎯 Standard API | ⚡ Agent API (lightweight) |
+|---|---|---|
+| Base URL | `https://mineru.net/api/v4` | `https://mineru.net/api/v1/agent` |
+| Token | **required** (`Bearer`) | **none** (IP rate-limited) |
+| Models | `pipeline` / `vlm` / `MinerU-HTML` | fixed lightweight `pipeline` |
+| File size | ≤ 200 MB | ≤ 10 MB |
+| Pages | ≤ 200 | ≤ 20 |
+| Batch | ≤ 50 per request | single file only |
+| Output | zip (Markdown + JSON, optional DOCX/HTML/LaTeX) | Markdown only (CDN link) |
+| Designed for | high-accuracy / complex / batch | AI-agent / quick / no-login |
+
+Free Standard-API quota: **1000 pages/day at highest priority** (overflow is lower priority).
+
+---
+
+## Authentication (Standard API)
+
+```
+Authorization: Bearer YOUR_API_TOKEN
+```
+
+Get a token at https://mineru.net/apiManage/token.
+
+> **Response envelopes.** Business endpoints return `{"code":0,"data":{…},"msg":"ok"}`.
+> The auth/gateway layer returns a *different* shape on failure:
+> `{"success":false,"msgCode":"A0202","msg":"user authenticate failed"}`.
+> Clients must handle both — this skill maps `msgCode` to the same error hints.
+
+---
+
+## Standard API endpoints (`/api/v4`)
+
+### Single URL — `POST /extract/task`
+
+```json
+{
+  "url": "https://example.com/doc.pdf",
+  "model_version": "vlm",
+  "is_ocr": false,
+  "enable_formula": true,
+  "enable_table": true,
+  "language": "ch",
+  "page_ranges": "1-10",
+  "extra_formats": ["docx", "html"],
+  "data_id": "my-document"
+}
+```
+Response → `{ "code": 0, "data": { "task_id": "…" } }`. HTML inputs require `model_version: "MinerU-HTML"`.
+
+### Get task result — `GET /extract/task/{task_id}`
+
+```json
+{ "code": 0, "data": { "task_id": "…", "state": "done", "full_zip_url": "https://…", "err_msg": "" } }
+```
+
+### Batch local upload — `POST /file-urls/batch`
+
+Returns signed upload URLs; PUT each file (no `Content-Type`). Up to **50** files / request.
+
+```json
+{ "files": [ { "name": "doc.pdf", "data_id": "doc" } ], "model_version": "vlm" }
+```
+Response → `{ "code": 0, "data": { "batch_id": "…", "file_urls": ["https://…"] } }`.
+
+### Batch URL — `POST /extract/task/batch`
+
+```json
+{ "files": [ { "url": "https://…/doc.pdf", "data_id": "doc" } ], "model_version": "vlm" }
+```
+
+### Batch results — `GET /extract-results/batch/{batch_id}`
+
+```json
+{ "code": 0, "data": { "batch_id": "…", "extract_result": [
+  { "file_name": "doc.pdf", "state": "done", "full_zip_url": "https://…" }
+] } }
+```
+
+---
+
+## Agent API endpoints (`/api/v1/agent`) — no token
+
+### URL — `POST /parse/url`
+
+```json
+{ "url": "https://…/doc.pdf", "language": "ch", "enable_table": true, "is_ocr": false, "enable_formula": true, "page_range": "1-10" }
+```
+`page_range` accepts `from-to` or a single page only (no commas). Returns `{ "code": 0, "data": { "task_id": "…" } }`.
+
+### File — `POST /parse/file`
+
+```json
+{ "file_name": "doc.pdf", "language": "ch" }
+```
+Response → `{ "data": { "task_id": "…", "file_url": "https://oss…" } }`; PUT the file to `file_url`.
+
+### Result — `GET /parse/{task_id}`
+
+```json
+{ "code": 0, "data": { "task_id": "…", "state": "done", "markdown_url": "https://cdn…/full.md" } }
+```
+
+---
+
+## Task states
+
+`pending` (queued) · `running` (parsing) · `converting` (format conversion) ·
+`uploading` (downloading source, Agent) · `waiting-file` (awaiting upload) ·
+`done` (complete) · `failed` (error).
+
+---
+
+## Parameters
+
+| Parameter | Type | Default | Notes |
+|-----------|------|---------|-------|
+| `model_version` | string | `pipeline` | `pipeline`, `vlm` (recommended), `MinerU-HTML` (HTML only) |
+| `is_ocr` | bool | `false` | OCR for scanned docs (pipeline/vlm) |
+| `enable_formula` | bool | `true` | Formula recognition |
+| `enable_table` | bool | `true` | Table recognition |
+| `language` | string | `ch` | OCR language (see official `language` table) |
+| `page_ranges` | string | all | Standard: `"2,4-6"`; Agent `page_range`: `"1-10"` only |
+| `extra_formats` | array | `[]` | `docx` / `html` / `latex` (Standard only) |
+| `data_id` | string | – | `[A-Za-z0-9_.-]`, ≤ 128 chars |
+| `no_cache` | bool | `false` | Bypass URL cache (Standard) |
+| `cache_tolerance` | int | `900` | Cache TTL seconds (Standard) |
+
+---
+
+## Limits
+
+| | Standard | Agent |
+|---|---|---|
+| File size | 200 MB | 10 MB |
+| Pages | 200 | 20 |
+| Batch | 50 / request | 1 |
+| Quota | 1000 pages/day priority | IP rate-limited (HTTP 429) |
+
+Supported types: PDF, images (png/jpg/jpeg/jp2/webp/gif/bmp), Doc(x), Ppt(x), Xls(x); HTML is Standard-only.
+
+---
+
+## Error codes
+
+| Code | Meaning |
+|------|---------|
+| `A0202` | Invalid token |
+| `A0211` | Token expired |
+| `-500` | Parameter error |
+| `-10001` / `-10002` | Service error / invalid params |
+| `-60002` | Unsupported file format |
+| `-60003` / `-60004` | File read failed / empty file |
+| `-60005` | File too large (> 200 MB) |
+| `-60006` | Too many pages (> 200) |
+| `-60008` | File read timeout (URL unreachable) |
+| `-60010` | Parse failed |
+| `-60015` / `-60016` | File / format conversion failed |
+| `-60018` | Daily quota reached |
+| `-60022` | Web page read failed (rate-limited) |
+| **Agent API** | |
+| `-30001` | Exceeds Agent 10 MB limit → use Standard API |
+| `-30002` | Unsupported file type for Agent |
+| `-30003` | Exceeds Agent 20-page limit → use Standard API or `--pages` |
+| `-30004` | Invalid request parameters |
Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "mineru",`
`3`		`- "version": "3.3.0",`
	`3`	`+ "version": "3.3.1",`
`4`	`4`	`"description": "Parse PDF / Office / image files into clean Markdown via MinerU — zero-dependency, AI-Native, auto-routing between the free Agent API and the token-gated Standard API, with 15 content-tool delivery sinks.",`
`5`	`5`	`"author": {`
`6`	`6`	`"name": "Nebutra",`