11#!/usr/bin/env python3
2- """Static check: forbid relative-up (``..``) markdown links inside ``docs/``.
2+ """Static check: forbid markdown links inside ``docs/`` that VitePress can't resolve .
33
44Why this exists
55---------------
66``docs/`` ships through VitePress, which serves pages from ``docs/`` as the
7- deploy root. Any markdown link target that escapes the doc root with
8- ``..`` (e.g. ``[foo](../../static/foo.js)``) breaks the VitePress build:
9- the path resolves outside the docs site and breaks deploy on every push.
10-
11- We've fixed it more than once — a previous round of "just one ../static
12- link, this once" cost a doc-pipeline cleanup PR. This lint exists so the
13- next attempt fails CI before merge.
7+ deploy root and runs a dead-link check at build time: every markdown link is
8+ resolved as a site page, and any target it can't resolve fails the build
9+ (``[vitepress] N dead link(s) found`` → ``exit 1``). A broken link therefore
10+ breaks deploy on every push. This lint catches the two recurring forms
11+ *before* merge, so the build doesn't have to be the thing that notices.
1412
1513What it flags
1614-------------
17- Markdown link patterns whose target starts with ``..`` (any number of
18- parent segments) inside any ``.md`` file under ``docs/``:
15+ Both forms are markdown inline links (``](target)``) outside fenced code
16+ blocks, inside any built ``.md`` file under ``docs/``:
17+
18+ 1. **Relative-up** — target starts with ``..`` and escapes the doc root::
19+
20+ [text](../foo)
21+ [text](../../static/foo.js)
22+
23+ We've fixed this more than once — a previous "just one ../static link,
24+ this once" cost a doc-pipeline cleanup PR.
25+
26+ 2. **Source-file** — a *relative* link to a repo source file (``.py``,
27+ ``.ts``, … optionally with a ``:line`` anchor) that has no doc page::
1928
20- [text](../foo)
21- [text](../../bar/baz.md)
22- [text](.../weird) # leading ``..`` covers this too
29+ [text](utils/token_tracker.py)
30+ [text](main_routers/system_router.py:194)
2331
24- Other ``..`` text (shell commands inside fenced code blocks, prose
25- mentions, etc.) is NOT flagged — only the ``](...)`` link target form.
32+ VitePress resolves these against the current doc dir (e.g.
33+ ``docs/design/security/main_routers/...``), finds nothing, and aborts.
34+ This is the form that broke the build in the telemetry / local-mutation
35+ design docs — the ``..`` rule above missed it because the target has no
36+ leading ``..``.
37+
38+ Absolute URLs (``http(s)://…`` incl. GitHub ``blob`` links), site-absolute
39+ paths (``/logo.jpg``), ``mailto:``, and in-page anchors (``#section``) are
40+ fine and never flagged. ``..`` text outside the ``](...)`` link form (shell
41+ snippets, prose) is not flagged either.
42+
43+ Build-scope parity
44+ ------------------
45+ Only files VitePress actually builds are inspected:
46+ - ``node_modules/`` is skipped (third-party READMEs, never deployed).
47+ - The README translations in ``SRC_EXCLUDE`` are skipped to mirror the
48+ ``srcExclude`` list in ``docs/.vitepress/config.ts`` — keep the two in
49+ sync if that list changes.
2650
2751Suppression
2852-----------
29- None. If you genuinely need to reference a non-docs file, either inline
30- the path as code (`` `static/foo.js` ``) without a link, or move the
31- content into ``docs/``. A per-line escape hatch would defeat the purpose.
53+ None. If you genuinely need to reference a non-docs file, either inline the
54+ path as code (`` `utils/token_tracker.py:194` ``) without a link, or use a
55+ full GitHub URL (``https://github.com/.../blob/main/utils/token_tracker.py``),
56+ or move the content into ``docs/``. A per-line escape hatch would defeat the
57+ purpose.
3258
3359Run
3460---
4470REPO_ROOT = Path (__file__ ).resolve ().parent .parent
4571DOCS_DIR = REPO_ROOT / "docs"
4672
47- # Match a markdown inline link whose target starts with "..".
48- # - Captures the link text and the offending target so the error is actionable.
49- # - Only the URL form ``](...)`` matters; collapsed/reference-style links
50- # (``[foo][bar]`` + a separate definition) aren't a vitepress hazard.
51- LINK_PATTERN = re .compile (r"\]\((\.\.[^)]*)\)" )
73+ # Mirror `srcExclude` in docs/.vitepress/config.ts — these aren't built, so a
74+ # broken link in them can't break deploy. Keep in sync if that list changes.
75+ SRC_EXCLUDE = {"README_en.md" , "README_ja.md" , "README_ru.md" }
76+
77+ # Any markdown inline link target. Reference-style links (``[foo][bar]``) and
78+ # image-only refs aren't a vitepress page-resolution hazard, so only the URL
79+ # form ``](...)`` matters.
80+ LINK_PATTERN = re .compile (r"\]\(([^)]+)\)" )
81+
82+ # Source-file extensions a doc might wrongly link to as if it were a page.
83+ # A trailing ``:line`` / ``:line-line`` anchor (our code-reference convention)
84+ # is part of the same hazard, so allow it after the extension.
85+ SRC_FILE_PATTERN = re .compile (
86+ r"\.(?:py|js|mjs|cjs|ts|tsx|jsx|vue|css|scss|sass|less|html?|sh|bash|zsh"
87+ r"|bat|cmd|ps1|go|rs|rb|java|kt|swift|c|cc|cpp|cxx|h|hpp|toml|ini|cfg"
88+ r"|conf|ya?ml|sql|env)(?::\d+(?:-\d+)?)?$" ,
89+ re .IGNORECASE ,
90+ )
91+
92+ # Targets that resolve fine and must never be flagged.
93+ _SAFE_PREFIXES = ("http://" , "https://" , "mailto:" , "tel:" , "#" , "/" )
94+
95+
96+ def _classify (target : str ) -> str | None :
97+ """Return a violation kind for an offending link target, else ``None``."""
98+ if target .startswith (_SAFE_PREFIXES ):
99+ return None
100+ if target .startswith (".." ):
101+ return "relative-up"
102+ # Strip a query/fragment before testing the file extension.
103+ path_part = re .split (r"[?#]" , target , maxsplit = 1 )[0 ]
104+ if SRC_FILE_PATTERN .search (path_part ):
105+ return "source-file"
106+ return None
52107
53108
54109def main () -> int :
@@ -57,8 +112,12 @@ def main() -> int:
57112 # haven't created the folder yet.
58113 return 0
59114
60- failures : list [tuple [Path , int , str ]] = []
115+ failures : list [tuple [Path , int , str , str ]] = []
61116 for md_path in sorted (DOCS_DIR .rglob ("*.md" )):
117+ if "node_modules" in md_path .parts :
118+ continue
119+ if md_path .name in SRC_EXCLUDE :
120+ continue
62121 try :
63122 text = md_path .read_text (encoding = "utf-8" )
64123 except Exception as e :
@@ -78,24 +137,26 @@ def main() -> int:
78137 if in_fence :
79138 continue
80139 for m in LINK_PATTERN .finditer (line ):
81- failures .append ((md_path , lineno , m .group (1 )))
140+ target = m .group (1 ).strip ()
141+ kind = _classify (target )
142+ if kind is not None :
143+ failures .append ((md_path , lineno , target , kind ))
82144
83145 if not failures :
84146 return 0
85147
86148 rel = lambda p : p .resolve ().relative_to (REPO_ROOT ).as_posix ()
87- print ("Forbidden relative-up markdown links inside docs/:" , file = sys .stderr )
88- for path , lineno , target in failures :
89- print (
90- f" { rel (path )} :{ lineno } -> ({ target } )" ,
91- file = sys .stderr ,
92- )
149+ print ("Unresolvable markdown links inside docs/:" , file = sys .stderr )
150+ for path , lineno , target , kind in failures :
151+ print (f" [{ kind } ] { rel (path )} :{ lineno } -> ({ target } )" , file = sys .stderr )
93152 print (
94- "\n VitePress builds docs/ as the site root; any markdown link target "
95- "starting with '..' resolves outside the site and breaks deploy.\n "
153+ "\n VitePress builds docs/ as the site root and dead- link-checks every "
154+ "link; the targets above don't resolve to a doc page and break deploy.\n "
96155 "Fix: drop the link wrapper and inline the path as code, e.g.\n "
97- " [foo/bar.js](../../foo/bar.js) -> `foo/bar.js`\n "
98- " or move the referenced content into docs/." ,
156+ " [utils/token_tracker.py:194](utils/token_tracker.py) -> `utils/token_tracker.py:194`\n "
157+ " [foo/bar.js](../../foo/bar.js) -> `foo/bar.js`\n "
158+ "or use a full GitHub URL (https://github.com/.../blob/main/<path>), "
159+ "or move the referenced content into docs/." ,
99160 file = sys .stderr ,
100161 )
101162 return 1
0 commit comments