-
Notifications
You must be signed in to change notification settings - Fork 220
Expand file tree
/
Copy pathgenerate_sitemap.py
More file actions
154 lines (113 loc) · 4.46 KB
/
generate_sitemap.py
File metadata and controls
154 lines (113 loc) · 4.46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
#!/usr/bin/env python3
"""Generate a deterministic sitemap.xml for the static docs site."""
from __future__ import annotations
import subprocess
from pathlib import Path
from urllib.parse import urljoin
from xml.sax.saxutils import escape
DOCS_ROOT = Path(__file__).resolve().parent
BASE_URL = "https://commonware.xyz"
EXCLUDED_FILES = {"template.html"}
EXCLUDED_DIRS = {".venv"}
EXTRA_FILES = ["llms.txt", "robots.txt"]
def get_versions() -> list[str]:
"""Get last 3 git tags as versions. Fails if no tag exists."""
result = subprocess.run(
["git", "-C", str(DOCS_ROOT.parent), "tag", "-l", "v*", "--sort=-v:refname"],
capture_output=True,
text=True,
check=True,
)
all_tags = [t for t in result.stdout.strip().split("\n") if t]
if not all_tags:
raise RuntimeError("No version tags found")
return all_tags[:3]
def collect_html() -> list[Path]:
"""Return sorted relative paths of HTML files to include in the sitemap."""
results = []
for path in DOCS_ROOT.rglob("*.html"):
rel = path.relative_to(DOCS_ROOT)
if rel.name in EXCLUDED_FILES:
continue
if any(part.startswith(".") for part in rel.parts):
continue
if any(part in EXCLUDED_DIRS for part in rel.parts):
continue
if rel.parts[0] == "code":
continue
results.append(rel)
return sorted(results)
CODE_EXTENSIONS = {".md", ".rs", ".toml"}
def collect_code(version: str) -> list[Path]:
"""Return sorted relative paths of code files from the versioned directory."""
code_dir = DOCS_ROOT / "code" / version
if not code_dir.exists():
return []
results = []
for path in code_dir.rglob("*"):
if not path.is_file():
continue
if path.suffix not in CODE_EXTENSIONS:
continue
rel = path.relative_to(DOCS_ROOT)
if rel.parts[2] == "docs":
continue
if any(part.startswith(".") for part in rel.parts):
continue
results.append(rel)
return sorted(results)
def build_url(rel: Path, base_url: str) -> str:
"""Convert a relative path to an absolute URL using the provided base."""
normalized = base_url.rstrip("/") + "/"
if rel == Path("index.html"):
return normalized
# Use extensionless URLs so sitemap entries match pretty URL redirects.
if rel.suffix == ".html":
rel = rel.with_suffix("")
return urljoin(normalized, rel.as_posix())
def write_sitemap(urls: list[str]) -> None:
"""Write sitemap.xml with the provided URLs."""
lines = [
'<?xml version="1.0" encoding="UTF-8"?>',
'<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">',
]
for url in urls:
escaped_url = escape(url)
lines.append(" <url>")
lines.append(f" <loc>{escaped_url}</loc>")
lines.append(" </url>")
lines.append("</urlset>")
content = "\n".join(lines) + "\n"
(DOCS_ROOT / "sitemap.xml").write_text(content, encoding="utf-8")
def write_llms_txt(versions: list[str]) -> None:
"""Write llms.txt with versioned paths for LLM discovery."""
latest = versions[0]
version_lines = [f"- /code/{versions[0]}/ (latest)"]
version_lines += [f"- /code/{v}/" for v in versions[1:]]
content = f"""# Commonware Library
> Source code is mirrored at versioned paths under /code/. These paths are
> not browseable directories. Use [sitemap.xml](/sitemap.xml) to discover
> all available files (.rs, .md, .toml). If a file is not in the sitemap,
> it does not exist.
Start with [README.md](/code/{latest}/README.md) for an overview.
## MCP Server
An MCP (Model Context Protocol) server is available at https://mcp.commonware.xyz
for AI assistants that support MCP (Claude Code, Cursor, etc.). The server
provides tools to search code, list crates, and fetch files directly.
## Versions
{chr(10).join(version_lines)}
"""
(DOCS_ROOT / "llms.txt").write_text(content, encoding="utf-8")
def main() -> None:
versions = get_versions()
# Write llms.txt with versioned paths
write_llms_txt(versions)
# Collect URLs - include all version paths in sitemap
urls = [build_url(rel, BASE_URL) for rel in collect_html()]
for version in versions:
urls += [build_url(rel, BASE_URL) for rel in collect_code(version)]
for extra in EXTRA_FILES:
urls.append(urljoin(BASE_URL.rstrip("/") + "/", extra))
write_sitemap(urls)
if __name__ == "__main__":
main()