Skip to content

Commit 41207ab

Browse files
authored
Add API route to export full documentation as text (#11)
1 parent e59ea96 commit 41207ab

File tree

1 file changed

+195
-0
lines changed

1 file changed

+195
-0
lines changed

src/pages/llms-full.txt.ts

Lines changed: 195 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,195 @@
1+
import { promises as fs } from 'node:fs';
2+
import { join } from 'node:path';
3+
import type { APIRoute } from 'astro';
4+
5+
export const prerender = true;
6+
7+
// Use process.cwd() which is the project root during Astro build
8+
const docsDir = join(process.cwd(), 'src/content/docs');
9+
10+
// Document order matching the sidebar structure
11+
const docOrder = [
12+
// Getting Started
13+
'index.mdx',
14+
'quickstart.mdx',
15+
'working-with-sprites.mdx',
16+
// Concepts
17+
'concepts/lifecycle.mdx',
18+
'concepts/services.mdx',
19+
'concepts/networking.mdx',
20+
'concepts/checkpoints.mdx',
21+
// CLI
22+
'cli/installation.mdx',
23+
'cli/authentication.mdx',
24+
'cli/commands.mdx',
25+
// SDKs
26+
'sdks/javascript.mdx',
27+
'sdks/go.mdx',
28+
// API
29+
'api/rest.mdx',
30+
// Reference
31+
'reference/base-images.mdx',
32+
'reference/configuration.mdx',
33+
'reference/billing.mdx',
34+
];
35+
36+
// Section headers for organization
37+
const sections: Record<string, string> = {
38+
'index.mdx': '# Getting Started',
39+
'concepts/lifecycle.mdx': '# Concepts',
40+
'cli/installation.mdx': '# CLI',
41+
'sdks/javascript.mdx': '# SDKs',
42+
'api/rest.mdx': '# API',
43+
'reference/base-images.mdx': '# Reference',
44+
};
45+
46+
interface DocMeta {
47+
title: string;
48+
description?: string;
49+
}
50+
51+
function extractFrontmatter(content: string): { meta: DocMeta; body: string } {
52+
const frontmatterMatch = content.match(/^---\n([\s\S]*?)\n---\n([\s\S]*)$/);
53+
54+
if (!frontmatterMatch) {
55+
return { meta: { title: 'Untitled' }, body: content };
56+
}
57+
58+
const [, frontmatterStr, body] = frontmatterMatch;
59+
const meta: DocMeta = { title: 'Untitled' };
60+
61+
// Parse YAML-like frontmatter (simple key: value parsing)
62+
for (const line of frontmatterStr.split('\n')) {
63+
const titleMatch = line.match(/^title:\s*(.+)$/);
64+
if (titleMatch) {
65+
meta.title = titleMatch[1].replace(/^["']|["']$/g, '');
66+
}
67+
const descMatch = line.match(/^description:\s*(.+)$/);
68+
if (descMatch) {
69+
meta.description = descMatch[1].replace(/^["']|["']$/g, '');
70+
}
71+
}
72+
73+
return { meta, body };
74+
}
75+
76+
function cleanMdxContent(content: string): string {
77+
// Remove MDX import statements at the start of the file (before any content)
78+
// This preserves imports inside code blocks
79+
content = content.replace(
80+
/^(\s*import\s+.*?(?:from\s+['"].*?['"])?;?\s*\n)+/m,
81+
'',
82+
);
83+
84+
// Process Tabs components - extract TabItem contents
85+
// Need to handle nested content carefully (code blocks with special chars)
86+
content = content.replace(/<Tabs>[\s\S]*?<\/Tabs>/g, (match) => {
87+
const results: string[] = [];
88+
89+
// Split by TabItem boundaries and extract content
90+
const tabItemRegex =
91+
/<TabItem[^>]*label="([^"]*)"[^>]*>([\s\S]*?)(?=<TabItem|<\/Tabs>)/g;
92+
93+
for (const [, label, tabContent] of match.matchAll(tabItemRegex)) {
94+
// Clean up the content - remove closing </TabItem> if present
95+
const cleanContent = tabContent.replace(/<\/TabItem>\s*$/, '').trim();
96+
if (cleanContent) {
97+
results.push(`**${label}:**\n${cleanContent}`);
98+
}
99+
}
100+
101+
return results.length > 0 ? results.join('\n\n') : '';
102+
});
103+
104+
// Remove self-closing JSX/MDX components (like <Callout ... />)
105+
content = content.replace(/<[A-Z][a-zA-Z]*\s+[^>]*\/>/g, '');
106+
107+
// Remove JSX components with content (non-greedy, for simple components)
108+
// Handle Callout, Snippet, and other simple wrapper components
109+
content = content.replace(
110+
/<Callout[^>]*>([\s\S]*?)<\/Callout>/g,
111+
(_, inner) => {
112+
// Keep the content, just remove the wrapper
113+
return inner.trim();
114+
},
115+
);
116+
117+
// Remove remaining JSX component tags (opening and closing)
118+
content = content.replace(/<[A-Z][a-zA-Z]*[^>]*>/g, '');
119+
content = content.replace(/<\/[A-Z][a-zA-Z]*>/g, '');
120+
121+
// Convert relative links to fully qualified URLs
122+
// Matches markdown links like [text](/path) or [text](/path/)
123+
content = content.replace(
124+
/\[([^\]]+)\]\(\/([^)]*)\)/g,
125+
(_, text, path) => `[${text}](https://docs.sprites.dev/${path})`,
126+
);
127+
128+
// Clean up excessive blank lines
129+
content = content.replace(/\n{4,}/g, '\n\n\n');
130+
131+
// Trim leading/trailing whitespace
132+
content = content.trim();
133+
134+
return content;
135+
}
136+
137+
function slugToUrl(slug: string): string {
138+
const path = slug.replace(/\.mdx$/, '').replace(/^index$/, '');
139+
return `https://docs.sprites.dev/${path}${path ? '/' : ''}`;
140+
}
141+
142+
export const GET: APIRoute = async () => {
143+
const parts: string[] = [];
144+
145+
// Header
146+
parts.push(`# Sprites Documentation (Full Content)
147+
148+
> This file contains the complete documentation for Sprites, a product by Fly.io that provides persistent, hardware-isolated execution environments for arbitrary code.
149+
150+
Generated: ${new Date().toISOString().split('T')[0]}
151+
Source: https://docs.sprites.dev/
152+
Summary: https://docs.sprites.dev/llms.txt
153+
154+
---
155+
`);
156+
157+
let currentSection = '';
158+
159+
for (const docPath of docOrder) {
160+
const fullPath = join(docsDir, docPath);
161+
162+
try {
163+
const content = await fs.readFile(fullPath, 'utf-8');
164+
const { meta, body } = extractFrontmatter(content);
165+
const cleanedContent = cleanMdxContent(body);
166+
167+
// Add section header if we're entering a new section
168+
if (sections[docPath] && sections[docPath] !== currentSection) {
169+
currentSection = sections[docPath];
170+
parts.push(`\n${currentSection}\n`);
171+
}
172+
173+
// Add document with title and URL
174+
const url = slugToUrl(docPath);
175+
parts.push(`## ${meta.title}
176+
177+
URL: ${url}
178+
${meta.description ? `\n${meta.description}\n` : ''}
179+
${cleanedContent}
180+
181+
---
182+
`);
183+
} catch (error) {
184+
console.warn(`Warning: Could not read ${docPath}:`, error);
185+
}
186+
}
187+
188+
const fullContent = parts.join('\n');
189+
190+
return new Response(fullContent, {
191+
headers: {
192+
'Content-Type': 'text/plain; charset=utf-8',
193+
},
194+
});
195+
};

0 commit comments

Comments
 (0)