|
| 1 | +#!/usr/bin/env node |
| 2 | +/** |
| 3 | + * Post-build script: fix LLM-generated URLs to use Docusaurus slugs. |
| 4 | + * The docusaurus-plugin-llms builds URLs from file paths (e.g. network/1-getting-started), |
| 5 | + * but our docs use frontmatter `slug` (e.g. getting-started). This script rewrites |
| 6 | + * the generated llms*.txt files so URLs match the actual site routes. |
| 7 | + * |
| 8 | + * Run after: npm run build (or add "postbuild" / call from build script). |
| 9 | + */ |
| 10 | + |
| 11 | +import fs from "fs"; |
| 12 | +import path from "path"; |
| 13 | +import { fileURLToPath } from "url"; |
| 14 | + |
| 15 | +const __dirname = path.dirname(fileURLToPath(import.meta.url)); |
| 16 | +const rootDir = path.resolve(__dirname, ".."); |
| 17 | +const docsDir = path.join(rootDir, "docs"); |
| 18 | +const buildDir = path.join(rootDir, "build"); |
| 19 | + |
| 20 | +const SITE_BASE = "https://dev.flare.network"; |
| 21 | + |
| 22 | +function escapeRe(s) { |
| 23 | + return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); |
| 24 | +} |
| 25 | + |
| 26 | +/** |
| 27 | + * Extract slug or id from frontmatter (first --- block). |
| 28 | + * Docusaurus uses slug for URL; id also affects path when slug is not set. |
| 29 | + */ |
| 30 | +function getSlugOrIdFromFile(filePath) { |
| 31 | + const raw = fs.readFileSync(filePath, "utf8"); |
| 32 | + const match = raw.match(/^---\r?\n([\s\S]*?)\r?\n---/); |
| 33 | + if (!match) return null; |
| 34 | + const front = match[1]; |
| 35 | + const slugMatch = front.match(/^slug:\s*["']?([^"'\n]*)["']?\s*$/m); |
| 36 | + if (slugMatch) return slugMatch[1].trim(); |
| 37 | + const idMatch = front.match(/^id:\s*["']?([^"'\s\n]+)["']?\s*$/m); |
| 38 | + return idMatch ? idMatch[1].trim() : null; |
| 39 | +} |
| 40 | + |
| 41 | +/** |
| 42 | + * Recursively collect all .md and .mdx paths under dir, relative to baseDir. |
| 43 | + */ |
| 44 | +function collectDocPaths(dir, baseDir, list = []) { |
| 45 | + const entries = fs.readdirSync(dir, { withFileTypes: true }); |
| 46 | + for (const e of entries) { |
| 47 | + const full = path.join(dir, e.name); |
| 48 | + const rel = path.relative(baseDir, full); |
| 49 | + if (e.isDirectory()) { |
| 50 | + collectDocPaths(full, baseDir, list); |
| 51 | + } else if (/\.(md|mdx)$/i.test(e.name)) { |
| 52 | + list.push(rel); |
| 53 | + } |
| 54 | + } |
| 55 | + return list; |
| 56 | +} |
| 57 | + |
| 58 | +/** |
| 59 | + * Build list of [wrongPath, rightPath] for URL replacement. |
| 60 | + * wrongPath = path from file (e.g. network/1-getting-started) |
| 61 | + * rightPath = path using slug (e.g. network/getting-started) |
| 62 | + * When frontmatter has no slug/id, infers clean path by stripping leading "N-" from segments (e.g. 3-governance -> governance). |
| 63 | + */ |
| 64 | +function buildReplacements() { |
| 65 | + const replacements = []; |
| 66 | + const seen = new Set(); |
| 67 | + const relPaths = collectDocPaths(docsDir, docsDir); |
| 68 | + for (const rel of relPaths) { |
| 69 | + const ext = path.extname(rel); |
| 70 | + const pathWithoutExt = rel.slice(0, -ext.length); |
| 71 | + const lastSegment = path.basename(pathWithoutExt); |
| 72 | + const dir = path.dirname(pathWithoutExt); |
| 73 | + const slug = getSlugOrIdFromFile(path.join(docsDir, rel)); |
| 74 | + |
| 75 | + let rightPath; |
| 76 | + if (slug) { |
| 77 | + const slugSegment = |
| 78 | + slug === "/" || slug === "" |
| 79 | + ? "" |
| 80 | + : slug.includes("/") |
| 81 | + ? slug.split("/").pop() |
| 82 | + : slug; |
| 83 | + if ( |
| 84 | + lastSegment === slugSegment && |
| 85 | + pathWithoutExt !== (slug === "/" ? "" : pathWithoutExt) |
| 86 | + ) { |
| 87 | + continue; |
| 88 | + } |
| 89 | + rightPath = |
| 90 | + slug === "/" || slug === "" |
| 91 | + ? "" |
| 92 | + : slug.includes("/") |
| 93 | + ? slug |
| 94 | + : dir |
| 95 | + ? `${dir}/${slug}` |
| 96 | + : slug; |
| 97 | + } else { |
| 98 | + const inferredSegment = lastSegment.replace(/^\d+-/, ""); |
| 99 | + if (inferredSegment === lastSegment || !inferredSegment) continue; |
| 100 | + rightPath = dir ? `${dir}/${inferredSegment}` : inferredSegment; |
| 101 | + } |
| 102 | + |
| 103 | + if (pathWithoutExt !== rightPath && !seen.has(pathWithoutExt)) { |
| 104 | + seen.add(pathWithoutExt); |
| 105 | + replacements.push([pathWithoutExt, rightPath]); |
| 106 | + } |
| 107 | + } |
| 108 | + return replacements; |
| 109 | +} |
| 110 | + |
| 111 | +function fixFile(filePath, replacements) { |
| 112 | + let content = fs.readFileSync(filePath, "utf8"); |
| 113 | + let changed = false; |
| 114 | + for (const [wrongPath, rightPath] of replacements) { |
| 115 | + const wrongUrl = `${SITE_BASE}/${wrongPath}`; |
| 116 | + const rightUrl = `${SITE_BASE}/${rightPath}`; |
| 117 | + const re = new RegExp(escapeRe(wrongUrl) + "(?!/)", "g"); |
| 118 | + const next = content.replace(re, rightUrl); |
| 119 | + if (next !== content) { |
| 120 | + content = next; |
| 121 | + changed = true; |
| 122 | + } |
| 123 | + } |
| 124 | + if (changed) { |
| 125 | + fs.writeFileSync(filePath, content, "utf8"); |
| 126 | + console.log("[fix-llms-urls] Updated:", path.relative(rootDir, filePath)); |
| 127 | + } |
| 128 | +} |
| 129 | + |
| 130 | +function main() { |
| 131 | + if (!fs.existsSync(buildDir)) { |
| 132 | + console.warn("[fix-llms-urls] build/ not found, skipping."); |
| 133 | + return; |
| 134 | + } |
| 135 | + const replacements = buildReplacements(); |
| 136 | + if (replacements.length === 0) { |
| 137 | + console.log("[fix-llms-urls] No slug-based path corrections needed."); |
| 138 | + return; |
| 139 | + } |
| 140 | + const txtFiles = fs |
| 141 | + .readdirSync(buildDir, { withFileTypes: true }) |
| 142 | + .filter( |
| 143 | + (e) => e.isFile() && e.name.endsWith(".txt") && e.name.startsWith("llms"), |
| 144 | + ); |
| 145 | + for (const e of txtFiles) { |
| 146 | + fixFile(path.join(buildDir, e.name), replacements); |
| 147 | + } |
| 148 | +} |
| 149 | + |
| 150 | +main(); |
0 commit comments