From c1b79a8be3f47ea6e60348ec97725bdb6b0bfa72 Mon Sep 17 00:00:00 2001 From: angpt Date: Tue, 2 Jun 2026 15:05:40 -0500 Subject: [PATCH] docs: redirect GitHub Pages docs to GitBook --- .github/workflows/pages-redirect.yml | 79 ++++++ .gitignore | 3 +- scripts/generate_github_pages_redirects.py | 315 +++++++++++++++++++++ tests/docs/test_github_pages_redirects.py | 172 +++++++++++ 4 files changed, 568 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/pages-redirect.yml create mode 100644 scripts/generate_github_pages_redirects.py create mode 100644 tests/docs/test_github_pages_redirects.py diff --git a/.github/workflows/pages-redirect.yml b/.github/workflows/pages-redirect.yml new file mode 100644 index 0000000000..76831fa989 --- /dev/null +++ b/.github/workflows/pages-redirect.yml @@ -0,0 +1,79 @@ +name: GitHub Pages Redirects + +on: + push: + branches: [main] + paths: + - 'docs/**' + - 'scripts/publish_docs.py' + - 'scripts/validate_docs.py' + - 'scripts/generate_github_pages_redirects.py' + - 'tests/docs/**' + - '.github/workflows/pages-redirect.yml' + pull_request: + paths: + - 'docs/**' + - 'scripts/publish_docs.py' + - 'scripts/validate_docs.py' + - 'scripts/generate_github_pages_redirects.py' + - 'tests/docs/**' + - '.github/workflows/pages-redirect.yml' + workflow_dispatch: + +jobs: + redirect-site: + permissions: + contents: write + runs-on: ubuntu-latest + steps: + - name: Check out the repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Run redirect generator tests + run: python -m unittest discover -s tests/docs -p 'test_*.py' + + - name: Build GitBook site + run: python scripts/publish_docs.py + + - name: Validate site output + run: python scripts/validate_docs.py + + - name: Build GitHub Pages redirects + run: python scripts/generate_github_pages_redirects.py + + - name: Upload generated redirect site as PR artifact + if: github.event_name == 'pull_request' + uses: actions/upload-artifact@v4 + with: + name: github-pages-redirect-preview + path: gh-pages-redirect/ + + - name: Deploy to gh-pages branch + if: ${{ github.event_name == 'push' || github.event_name == 'workflow_dispatch' }} + run: | + git config user.name 'github-actions[bot]' + git config user.email 'github-actions[bot]@users.noreply.github.com' + + mv gh-pages-redirect/ /tmp/gh-pages-redirect/ + + git fetch origin gh-pages || true + if git rev-parse --verify origin/gh-pages >/dev/null 2>&1; then + git checkout gh-pages + else + git checkout --orphan gh-pages + git rm -rf . + fi + + rsync -a --delete --exclude='.git' /tmp/gh-pages-redirect/ . + git add -A + if ! git diff --cached --quiet; then + git commit -m "docs: update GitHub Pages redirects from ${{ github.sha }}" + git push origin gh-pages + fi diff --git a/.gitignore b/.gitignore index f6bb268f69..67f8aa4c1a 100644 --- a/.gitignore +++ b/.gitignore @@ -22,4 +22,5 @@ __init__.py uv.lock # docs build output (pushed to gitbook-docs branch by CI) -/site \ No newline at end of file +/site +/gh-pages-redirect diff --git a/scripts/generate_github_pages_redirects.py b/scripts/generate_github_pages_redirects.py new file mode 100644 index 0000000000..27f1571990 --- /dev/null +++ b/scripts/generate_github_pages_redirects.py @@ -0,0 +1,315 @@ +"""Build a redirect-only GitHub Pages site for legacy llamafile docs URLs. + +Usage: + python scripts/generate_github_pages_redirects.py +""" + +from __future__ import annotations + +import argparse +import html +import json +import shutil +import sys +from pathlib import Path +from urllib.parse import quote + +DEFAULT_SOURCE_DIR = Path("site") +DEFAULT_OUTPUT_DIR = Path("gh-pages-redirect") +DEFAULT_BASE_URL = "https://docs.mozilla.ai/llamafile/" +DEFAULT_PAGES_BASE_PATH = "/llamafile" +PASSTHROUGH_DIRS = ("images",) +GITHUB_DOCS_BASE_URL = "https://github.com/mozilla-ai/llamafile/blob/main/docs/" + +# Legacy GitHub Pages routes to the current GitBook URLs. +LEGACY_GITBOOK_ROUTES = { + "": "", + "quickstart": "getting-started/quickstart", + "example_llamafiles": "getting-started/pre-built-llamafiles", + "pre-built-llamafiles": "getting-started/pre-built-llamafiles", + "running_llamafile": "using-llamafile/running_llamafile", + "creating_llamafiles": "using-llamafile/creating_llamafiles", + "source_installation": "using-llamafile/source_installation", + "building_dlls": "using-llamafile/building_dlls", + "technical_details": "reference/technical_details", + "support": "reference/support", + "troubleshooting": "reference/troubleshooting", + "whisperfile": "whisperfile", + "whisperfile/getting-started": "whisperfile/getting-started", + "whisperfile/packaging": "whisperfile/packaging", + "whisperfile/gpu": "whisperfile/gpu", + "whisperfile/translate": "whisperfile/translate", + "whisperfile/server": "whisperfile/server", +} + +# These pages used to exist on GitHub Pages but are no longer published to GitBook. +LEGACY_GITHUB_DOC_PATHS = { + "AGENTS": "AGENTS.md", + "commands/build": "commands/build.md", + "commands/check": "commands/check.md", + "commands/clean": "commands/clean.md", + "skills/llamafile/SKILL": "skills/llamafile/SKILL.md", + "skills/llamafile/architecture": "skills/llamafile/architecture.md", + "skills/llamafile/building": "skills/llamafile/building.md", + "skills/llamafile/development": "skills/llamafile/development.md", + "skills/llamafile/testing": "skills/llamafile/testing.md", + "skills/llamafile/update_llamacpp": "skills/llamafile/update_llamacpp.md", +} + + +def normalize_base_url(base_url: str) -> str: + """Return the destination docs URL with a single trailing slash.""" + return base_url.rstrip("/") + "/" + + +def normalize_pages_base_path(pages_base_path: str) -> str: + """Return the GitHub Pages project prefix with a leading slash.""" + stripped = pages_base_path.strip("/") + return f"/{stripped}" if stripped else "/" + + +def legacy_route_aliases(route: str) -> set[str]: + """Return route aliases for common slug variations.""" + if not route: + return set() + + route_parts = route.split("/") + last_segment = route_parts[-1] + aliases = set() + + for alias_segment in { + last_segment.replace("-", "_"), + last_segment.replace("_", "-"), + last_segment.lower(), + }: + if alias_segment and alias_segment != last_segment: + aliases.add("/".join([*route_parts[:-1], alias_segment])) + + return aliases + + +def build_target_url(base_url: str, route: str) -> str: + """Build an internal GitBook redirect destination for a route.""" + if not route: + return base_url + + normalized_route = quote(route.strip("/"), safe="/") + return f"{base_url}{normalized_route}/" + + +def build_github_doc_url(doc_path: str) -> str: + """Build a GitHub URL for a docs file that is no longer on GitBook.""" + normalized_path = quote(doc_path.strip("/"), safe="/") + return f"{GITHUB_DOCS_BASE_URL}{normalized_path}" + + +def collect_redirect_targets(base_url: str) -> dict[str, str]: + """Collect route-to-target mappings for legacy Pages URLs.""" + routes: dict[str, str] = {} + + for route, target_route in LEGACY_GITBOOK_ROUTES.items(): + routes[route] = build_target_url(base_url, target_route) + for alias_route in legacy_route_aliases(route): + routes.setdefault(alias_route, routes[route]) + + for route, doc_path in LEGACY_GITHUB_DOC_PATHS.items(): + routes[route] = build_github_doc_url(doc_path) + for alias_route in legacy_route_aliases(route): + routes.setdefault(alias_route, routes[route]) + + return routes + + +def serialize_json_for_script(value: object, *, sort_keys: bool = False) -> str: + """Serialize JSON safely for embedding inside an HTML script tag.""" + serialized = json.dumps(value, ensure_ascii=True, sort_keys=sort_keys) + return ( + serialized.replace("<", "\\u003c") + .replace(">", "\\u003e") + .replace("&", "\\u0026") + ) + + +def redirect_page_html(target_url: str) -> str: + """Build a simple HTML redirect page.""" + escaped_target_url = html.escape(target_url, quote=True) + serialized_target_url = serialize_json_for_script(target_url) + + return f""" + + + + Redirecting... + + + + + + +

This documentation moved to {escaped_target_url}.

+ + +""" + + +def not_found_page_html(base_url: str, pages_base_path: str, redirect_targets: dict[str, str]) -> str: + """Build a smart 404 page that redirects legacy paths when possible.""" + escaped_base_url = html.escape(base_url, quote=True) + serialized_base_url = serialize_json_for_script(base_url) + serialized_pages_base_path = serialize_json_for_script(pages_base_path) + serialized_redirect_targets = serialize_json_for_script(redirect_targets, sort_keys=True) + + return f""" + + + + Redirecting... + + + + + +

This documentation moved to {escaped_base_url}.

+ + +""" + + +def write_file(path: Path, content: str) -> None: + """Write a UTF-8 text file, creating parent directories when needed.""" + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(content, encoding="utf-8") + + +def copy_passthrough_assets(source_dir: Path, output_dir: Path) -> None: + """Copy assets that should remain directly accessible.""" + for directory_name in PASSTHROUGH_DIRS: + source_path = source_dir / directory_name + if not source_path.exists(): + continue + + shutil.copytree(source_path, output_dir / directory_name) + + +def build_redirect_site( + source_dir: Path, + output_dir: Path, + base_url: str, + pages_base_path: str, +) -> int: + """Build the redirect site and return the number of redirect pages.""" + normalized_base_url = normalize_base_url(base_url) + normalized_pages_base_path = normalize_pages_base_path(pages_base_path) + + if output_dir.exists(): + shutil.rmtree(output_dir) + output_dir.mkdir(parents=True, exist_ok=True) + + redirect_targets = collect_redirect_targets(normalized_base_url) + redirect_count = 0 + + for route, target_url in sorted(redirect_targets.items()): + redirect_html = redirect_page_html(target_url) + + if route: + directory_redirect_path = output_dir / route / "index.html" + file_redirect_path = output_dir / Path(route).with_suffix(".html") + write_file(directory_redirect_path, redirect_html) + write_file(file_redirect_path, redirect_html) + redirect_count += 2 + else: + write_file(output_dir / "index.html", redirect_html) + redirect_count += 1 + + write_file( + output_dir / "404.html", + not_found_page_html(normalized_base_url, normalized_pages_base_path, redirect_targets), + ) + write_file(output_dir / ".nojekyll", "") + copy_passthrough_assets(source_dir, output_dir) + + return redirect_count + + +def parse_args() -> argparse.Namespace: + """Parse CLI arguments.""" + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + "--source-dir", + type=Path, + default=DEFAULT_SOURCE_DIR, + help=f"Built GitBook site input directory. Defaults to {DEFAULT_SOURCE_DIR}.", + ) + parser.add_argument( + "--output-dir", + type=Path, + default=DEFAULT_OUTPUT_DIR, + help=f"Redirect site output directory. Defaults to {DEFAULT_OUTPUT_DIR}.", + ) + parser.add_argument( + "--base-url", + default=DEFAULT_BASE_URL, + help=f"Destination docs base URL. Defaults to {DEFAULT_BASE_URL}.", + ) + parser.add_argument( + "--pages-base-path", + default=DEFAULT_PAGES_BASE_PATH, + help=f"GitHub Pages project path prefix. Defaults to {DEFAULT_PAGES_BASE_PATH}.", + ) + return parser.parse_args() + + +def main() -> int: + """Build the redirect site for legacy GitHub Pages routes.""" + args = parse_args() + + if not args.source_dir.exists(): + print(f"Input directory does not exist: {args.source_dir}", file=sys.stderr) + return 1 + + redirect_count = build_redirect_site( + source_dir=args.source_dir, + output_dir=args.output_dir, + base_url=args.base_url, + pages_base_path=args.pages_base_path, + ) + print(f"Done - {redirect_count} redirect pages written to {args.output_dir}/") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tests/docs/test_github_pages_redirects.py b/tests/docs/test_github_pages_redirects.py new file mode 100644 index 0000000000..ec58826ff9 --- /dev/null +++ b/tests/docs/test_github_pages_redirects.py @@ -0,0 +1,172 @@ +import importlib.util +import runpy +import tempfile +import unittest +from pathlib import Path +from unittest import mock + +REPO_ROOT = Path(__file__).resolve().parents[2] +SCRIPT_PATH = REPO_ROOT / "scripts" / "generate_github_pages_redirects.py" + + +def load_redirect_generator_module(): + spec = importlib.util.spec_from_file_location("generate_github_pages_redirects", SCRIPT_PATH) + assert spec is not None + assert spec.loader is not None + + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + return module + + +def write_sample_site(source_dir: Path, *, include_images: bool = True) -> None: + source_dir.mkdir(parents=True, exist_ok=True) + + if include_images: + (source_dir / "images").mkdir(parents=True) + (source_dir / "images" / "logo.png").write_bytes(b"png") + + +class RedirectSiteTests(unittest.TestCase): + def setUp(self) -> None: + self.tempdir = tempfile.TemporaryDirectory() + self.tmp_path = Path(self.tempdir.name) + self.source_dir = self.tmp_path / "site" + self.output_dir = self.tmp_path / "gh-pages-redirect" + self.redirect_generator = load_redirect_generator_module() + + def tearDown(self) -> None: + self.tempdir.cleanup() + + def test_generate_github_pages_redirects(self) -> None: + write_sample_site(self.source_dir) + + redirect_count = self.redirect_generator.build_redirect_site( + source_dir=self.source_dir, + output_dir=self.output_dir, + base_url=self.redirect_generator.DEFAULT_BASE_URL, + pages_base_path=self.redirect_generator.DEFAULT_PAGES_BASE_PATH, + ) + + root_redirect = (self.output_dir / "index.html").read_text(encoding="utf-8") + quickstart_redirect = (self.output_dir / "quickstart" / "index.html").read_text(encoding="utf-8") + legacy_slug_redirect = (self.output_dir / "example-llamafiles" / "index.html").read_text(encoding="utf-8") + github_redirect = (self.output_dir / "AGENTS" / "index.html").read_text(encoding="utf-8") + not_found_redirect = (self.output_dir / "404.html").read_text(encoding="utf-8") + + self.assertGreater(redirect_count, 0) + self.assertIn("https://docs.mozilla.ai/llamafile/", root_redirect) + self.assertIn("https://docs.mozilla.ai/llamafile/getting-started/quickstart/", quickstart_redirect) + self.assertIn( + "https://docs.mozilla.ai/llamafile/getting-started/pre-built-llamafiles/", + legacy_slug_redirect, + ) + self.assertIn("https://github.com/mozilla-ai/llamafile/blob/main/docs/AGENTS.md", github_redirect) + self.assertTrue((self.output_dir / "quickstart.html").exists()) + self.assertTrue((self.output_dir / "agents" / "index.html").exists()) + self.assertTrue((self.output_dir / ".nojekyll").exists()) + self.assertEqual((self.output_dir / "images" / "logo.png").read_bytes(), b"png") + self.assertIn('const pagesBasePath = "/llamafile";', not_found_redirect) + self.assertIn('"quickstart": "https://docs.mozilla.ai/llamafile/getting-started/quickstart/"', not_found_redirect) + + def test_serialize_json_for_script_escapes_html_sensitive_characters(self) -> None: + serialized = self.redirect_generator.serialize_json_for_script( + { + "target": "https://docs.mozilla.ai/llamafile/?q=&x=1", + } + ) + + self.assertIn("\\u003c/script\\u003e", serialized) + self.assertIn("\\u003cunsafe\\u003e", serialized) + self.assertIn("\\u0026x=1", serialized) + self.assertNotIn("", serialized) + + def test_build_redirect_site_replaces_existing_output_and_skips_missing_images(self) -> None: + write_sample_site(self.source_dir, include_images=False) + self.output_dir.mkdir(parents=True) + (self.output_dir / "stale.txt").write_text("old\n", encoding="utf-8") + + redirect_count = self.redirect_generator.build_redirect_site( + source_dir=self.source_dir, + output_dir=self.output_dir, + base_url="https://docs.mozilla.ai/llamafile", + pages_base_path="llamafile", + ) + + self.assertGreater(redirect_count, 0) + self.assertFalse((self.output_dir / "stale.txt").exists()) + self.assertTrue((self.output_dir / "index.html").exists()) + self.assertFalse((self.output_dir / "images").exists()) + + def test_parse_args_and_main_success(self) -> None: + custom_output_dir = self.tmp_path / "custom-output" + write_sample_site(self.source_dir, include_images=False) + + argv = [ + "generate_github_pages_redirects.py", + "--source-dir", + str(self.source_dir), + "--output-dir", + str(custom_output_dir), + "--base-url", + "https://docs.mozilla.ai/llamafile", + "--pages-base-path", + "llamafile", + ] + + with mock.patch("sys.argv", argv): + args = self.redirect_generator.parse_args() + + self.assertEqual(args.source_dir, self.source_dir) + self.assertEqual(args.output_dir, custom_output_dir) + self.assertEqual(args.base_url, "https://docs.mozilla.ai/llamafile") + self.assertEqual(args.pages_base_path, "llamafile") + + with mock.patch("sys.argv", argv): + with mock.patch("sys.stdout", new_callable=lambda: __import__("io").StringIO()) as stdout: + exit_code = self.redirect_generator.main() + + self.assertEqual(exit_code, 0) + self.assertIn("Done - ", stdout.getvalue()) + self.assertTrue((custom_output_dir / "404.html").exists()) + + def test_main_returns_error_for_missing_source(self) -> None: + missing_source = self.tmp_path / "missing-site" + + argv = [ + "generate_github_pages_redirects.py", + "--source-dir", + str(missing_source), + "--output-dir", + str(self.output_dir), + ] + + with mock.patch("sys.argv", argv): + with mock.patch("sys.stderr", new_callable=lambda: __import__("io").StringIO()) as stderr: + exit_code = self.redirect_generator.main() + + self.assertEqual(exit_code, 1) + self.assertIn(f"Input directory does not exist: {missing_source}", stderr.getvalue()) + + def test_script_entrypoint_exits_with_main_status(self) -> None: + entrypoint_output_dir = self.tmp_path / "entrypoint-output" + write_sample_site(self.source_dir, include_images=False) + + argv = [ + str(SCRIPT_PATH), + "--source-dir", + str(self.source_dir), + "--output-dir", + str(entrypoint_output_dir), + ] + + with mock.patch("sys.argv", argv): + with self.assertRaises(SystemExit) as exc_info: + runpy.run_path(str(SCRIPT_PATH), run_name="__main__") + + self.assertEqual(exc_info.exception.code, 0) + self.assertTrue((entrypoint_output_dir / "index.html").exists()) + + +if __name__ == "__main__": + unittest.main()