|
| 1 | +#!/usr/bin/env node |
| 2 | +/** |
| 3 | + * Post-build script to append static API documentation to llms.txt files. |
| 4 | + * |
| 5 | + * This script: |
| 6 | + * 1. Finds HTML files in static/aztec-nr-api/devnet |
| 7 | + * 2. Converts them to markdown-like text |
| 8 | + * 3. Appends the content to build/llms-full.txt |
| 9 | + * 4. Adds links to build/llms.txt |
| 10 | + */ |
| 11 | + |
| 12 | +const fs = require("fs"); |
| 13 | +const path = require("path"); |
| 14 | + |
| 15 | +const BUILD_DIR = path.join(__dirname, "..", "build"); |
| 16 | +const STATIC_DIR = path.join(__dirname, "..", "static"); |
| 17 | + |
| 18 | +// Load version from developer_versions.json (same as docusaurus.config.js) |
| 19 | +const developerVersions = require("../developer_versions.json"); |
| 20 | + |
| 21 | +// Find devnet version dynamically (same logic as docusaurus.config.js) |
| 22 | +const devnetVersion = developerVersions.find((v) => v.includes("devnet")); |
| 23 | +if (!devnetVersion) { |
| 24 | + console.warn("Warning: No devnet version found in developer_versions.json"); |
| 25 | +} |
| 26 | + |
| 27 | +// The API docs directories use "devnet" as the folder name |
| 28 | +const API_DIRS = [ |
| 29 | + { |
| 30 | + name: "Aztec.nr API Reference", |
| 31 | + dir: "aztec-nr-api/devnet", |
| 32 | + description: `Auto-generated API documentation for Aztec.nr (${devnetVersion || "devnet"})`, |
| 33 | + }, |
| 34 | +]; |
| 35 | + |
| 36 | +/** |
| 37 | + * Extract text content from HTML, stripping tags and normalizing whitespace. |
| 38 | + * Only extracts content from <main> element to avoid redundant navigation. |
| 39 | + */ |
| 40 | +function htmlToText(html) { |
| 41 | + // Extract only the <main> content to avoid sidebar/navigation redundancy |
| 42 | + const mainMatch = html.match(/<main[^>]*>([\s\S]*?)<\/main>/i); |
| 43 | + const content = mainMatch ? mainMatch[1] : html; |
| 44 | + |
| 45 | + return ( |
| 46 | + content |
| 47 | + // Remove the breadcrumb div (first div with navigation links) |
| 48 | + .replace(/<div><a[^>]*>aztec-nr<\/a>[\s\S]*?<\/div>/i, "") |
| 49 | + // Remove script and style elements entirely |
| 50 | + .replace(/<script[^>]*>[\s\S]*?<\/script>/gi, "") |
| 51 | + .replace(/<style[^>]*>[\s\S]*?<\/style>/gi, "") |
| 52 | + // Remove HTML comments |
| 53 | + .replace(/<!--[\s\S]*?-->/g, "") |
| 54 | + // Convert headers to markdown |
| 55 | + .replace(/<h1[^>]*>([\s\S]*?)<\/h1>/gi, "\n# $1\n") |
| 56 | + .replace(/<h2[^>]*>([\s\S]*?)<\/h2>/gi, "\n## $1\n") |
| 57 | + .replace(/<h3[^>]*>([\s\S]*?)<\/h3>/gi, "\n### $1\n") |
| 58 | + .replace(/<h4[^>]*>([\s\S]*?)<\/h4>/gi, "\n#### $1\n") |
| 59 | + // Convert code blocks |
| 60 | + .replace(/<pre[^>]*><code[^>]*>([\s\S]*?)<\/code><\/pre>/gi, "\n```\n$1\n```\n") |
| 61 | + .replace(/<code[^>]*>([\s\S]*?)<\/code>/gi, "`$1`") |
| 62 | + // Convert links - extract href and text |
| 63 | + .replace(/<a[^>]*href="([^"]*)"[^>]*>([\s\S]*?)<\/a>/gi, "[$2]($1)") |
| 64 | + // Convert lists |
| 65 | + .replace(/<li[^>]*>([\s\S]*?)<\/li>/gi, "- $1\n") |
| 66 | + // Convert paragraphs |
| 67 | + .replace(/<p[^>]*>([\s\S]*?)<\/p>/gi, "\n$1\n") |
| 68 | + // Convert line breaks |
| 69 | + .replace(/<br\s*\/?>/gi, "\n") |
| 70 | + // Remove remaining HTML tags |
| 71 | + .replace(/<[^>]+>/g, "") |
| 72 | + // Decode common HTML entities |
| 73 | + .replace(/ /g, " ") |
| 74 | + .replace(/</g, "<") |
| 75 | + .replace(/>/g, ">") |
| 76 | + .replace(/&/g, "&") |
| 77 | + .replace(/"/g, '"') |
| 78 | + .replace(/'/g, "'") |
| 79 | + // Normalize whitespace |
| 80 | + .replace(/\n{3,}/g, "\n\n") |
| 81 | + .replace(/[ \t]+/g, " ") |
| 82 | + .trim() |
| 83 | + ); |
| 84 | +} |
| 85 | + |
| 86 | +/** |
| 87 | + * Recursively find all HTML files in a directory. |
| 88 | + */ |
| 89 | +function findHtmlFiles(dir, files = []) { |
| 90 | + if (!fs.existsSync(dir)) { |
| 91 | + return files; |
| 92 | + } |
| 93 | + |
| 94 | + const entries = fs.readdirSync(dir, { withFileTypes: true }); |
| 95 | + |
| 96 | + for (const entry of entries) { |
| 97 | + const fullPath = path.join(dir, entry.name); |
| 98 | + if (entry.isDirectory()) { |
| 99 | + findHtmlFiles(fullPath, files); |
| 100 | + } else if (entry.name.endsWith(".html")) { |
| 101 | + files.push(fullPath); |
| 102 | + } |
| 103 | + } |
| 104 | + |
| 105 | + return files; |
| 106 | +} |
| 107 | + |
| 108 | +/** |
| 109 | + * Get the relative URL path for a file. |
| 110 | + */ |
| 111 | +function getUrlPath(filePath, staticDir) { |
| 112 | + const relativePath = path.relative(staticDir, filePath); |
| 113 | + // Convert to URL path format |
| 114 | + return "/" + relativePath.replace(/\\/g, "/"); |
| 115 | +} |
| 116 | + |
| 117 | +/** |
| 118 | + * Sort files by importance - Aztec-specific content first, std library last. |
| 119 | + */ |
| 120 | +function sortByImportance(files) { |
| 121 | + const priority = { |
| 122 | + noir_aztec: 0, |
| 123 | + protocol_types: 1, |
| 124 | + address_note: 2, |
| 125 | + balance_set: 2, |
| 126 | + field_note: 2, |
| 127 | + uint_note: 2, |
| 128 | + poseidon: 2, |
| 129 | + compressed_string: 2, |
| 130 | + sha256: 2, |
| 131 | + std: 3, |
| 132 | + }; |
| 133 | + |
| 134 | + return files.sort((a, b) => { |
| 135 | + const getPriority = (filePath) => { |
| 136 | + for (const [dir, p] of Object.entries(priority)) { |
| 137 | + if (filePath.includes(`/${dir}/`)) return p; |
| 138 | + } |
| 139 | + return 2; // Default priority for unknown dirs |
| 140 | + }; |
| 141 | + return getPriority(a) - getPriority(b); |
| 142 | + }); |
| 143 | +} |
| 144 | + |
| 145 | +/** |
| 146 | + * Main function to append API docs to llms.txt files. |
| 147 | + */ |
| 148 | +function main() { |
| 149 | + const llmsTxtPath = path.join(BUILD_DIR, "llms.txt"); |
| 150 | + const llmsFullTxtPath = path.join(BUILD_DIR, "llms-full.txt"); |
| 151 | + |
| 152 | + // Check if build files exist |
| 153 | + if (!fs.existsSync(llmsTxtPath)) { |
| 154 | + console.error("Error: build/llms.txt not found. Run the build first."); |
| 155 | + process.exit(1); |
| 156 | + } |
| 157 | + |
| 158 | + let llmsTxtContent = fs.readFileSync(llmsTxtPath, "utf-8"); |
| 159 | + let llmsFullTxtContent = fs.existsSync(llmsFullTxtPath) |
| 160 | + ? fs.readFileSync(llmsFullTxtPath, "utf-8") |
| 161 | + : ""; |
| 162 | + |
| 163 | + let totalFiles = 0; |
| 164 | + let linksSection = "\n\n# API Reference Documentation\n\n"; |
| 165 | + let fullContentSection = "\n\n---\n\n# API Reference Documentation\n\n"; |
| 166 | + |
| 167 | + for (const apiDir of API_DIRS) { |
| 168 | + const dirPath = path.join(STATIC_DIR, apiDir.dir); |
| 169 | + |
| 170 | + if (!fs.existsSync(dirPath)) { |
| 171 | + console.log(`Skipping ${apiDir.name}: directory not found`); |
| 172 | + continue; |
| 173 | + } |
| 174 | + |
| 175 | + const htmlFiles = sortByImportance(findHtmlFiles(dirPath)); |
| 176 | + console.log(`Found ${htmlFiles.length} HTML files in ${apiDir.dir}`); |
| 177 | + |
| 178 | + if (htmlFiles.length === 0) { |
| 179 | + continue; |
| 180 | + } |
| 181 | + |
| 182 | + // Add section header |
| 183 | + linksSection += `## ${apiDir.name}\n\n`; |
| 184 | + linksSection += `${apiDir.description}\n\n`; |
| 185 | + fullContentSection += `## ${apiDir.name}\n\n`; |
| 186 | + fullContentSection += `${apiDir.description}\n\n`; |
| 187 | + |
| 188 | + // Process only index files for links to avoid overwhelming the llms.txt |
| 189 | + const indexFiles = htmlFiles.filter( |
| 190 | + (f) => f.endsWith("index.html") || f.includes("/fn.") || f.includes("/struct.") || f.includes("/trait.") |
| 191 | + ); |
| 192 | + |
| 193 | + // Add links for key files |
| 194 | + for (const file of indexFiles.slice(0, 100)) { |
| 195 | + // Limit to 100 links per section |
| 196 | + const urlPath = getUrlPath(file, STATIC_DIR); |
| 197 | + const fileName = path.basename(file, ".html"); |
| 198 | + linksSection += `- [${fileName}](${urlPath})\n`; |
| 199 | + } |
| 200 | + |
| 201 | + if (indexFiles.length > 100) { |
| 202 | + linksSection += `- ... and ${indexFiles.length - 100} more files\n`; |
| 203 | + } |
| 204 | + |
| 205 | + linksSection += "\n"; |
| 206 | + |
| 207 | + // Add full content for all files |
| 208 | + for (const file of htmlFiles) { |
| 209 | + try { |
| 210 | + const html = fs.readFileSync(file, "utf-8"); |
| 211 | + const text = htmlToText(html); |
| 212 | + |
| 213 | + if (text.length > 100) { |
| 214 | + // Only include if there's meaningful content |
| 215 | + const urlPath = getUrlPath(file, STATIC_DIR); |
| 216 | + fullContentSection += `### ${urlPath}\n\n`; |
| 217 | + fullContentSection += text + "\n\n---\n\n"; |
| 218 | + totalFiles++; |
| 219 | + } |
| 220 | + } catch (err) { |
| 221 | + console.error(`Error processing ${file}: ${err.message}`); |
| 222 | + } |
| 223 | + } |
| 224 | + } |
| 225 | + |
| 226 | + // Append to llms.txt |
| 227 | + fs.writeFileSync(llmsTxtPath, llmsTxtContent + linksSection); |
| 228 | + console.log(`Updated llms.txt with API reference links`); |
| 229 | + |
| 230 | + // Append to llms-full.txt |
| 231 | + fs.writeFileSync(llmsFullTxtPath, llmsFullTxtContent + fullContentSection); |
| 232 | + console.log(`Updated llms-full.txt with ${totalFiles} API reference files`); |
| 233 | +} |
| 234 | + |
| 235 | +main(); |
0 commit comments