Skip to content

Commit 725668f

Browse files
committed
Hide disabled pages from LLMs
Previously, disabled pages (marked as draft) could still be accessed at `/{slug}.md`. This also updates llms.txt to dynamically pull from the docs collection. This should avoid any drift between the human-focused and llm-focused pages in the future.
1 parent 31b6d6e commit 725668f

File tree

4 files changed

+143
-239
lines changed

4 files changed

+143
-239
lines changed

src/lib/utils.ts

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,67 @@ import { twMerge } from 'tailwind-merge';
44
export function cn(...inputs: ClassValue[]) {
55
return twMerge(clsx(inputs));
66
}
7+
8+
export function cleanMdxContent(content: string): string {
9+
// Remove MDX import statements at the start of the file
10+
content = content.replace(
11+
/^(\s*import\s+.*?(?:from\s+['"].*?['"])?;?\s*\n)+/m,
12+
'',
13+
);
14+
15+
// Process Tabs components - extract TabItem contents
16+
content = content.replace(/<Tabs>[\s\S]*?<\/Tabs>/g, (match) => {
17+
const results: string[] = [];
18+
const tabItemRegex =
19+
/<TabItem[^>]*label="([^"]*)"[^>]*>([\s\S]*?)(?=<TabItem|<\/Tabs>)/g;
20+
21+
for (const [, label, tabContent] of match.matchAll(tabItemRegex)) {
22+
const cleanContent = tabContent.replace(/<\/TabItem>\s*$/, '').trim();
23+
if (cleanContent) {
24+
results.push(`**${label}:**\n${cleanContent}`);
25+
}
26+
}
27+
28+
return results.length > 0 ? results.join('\n\n') : '';
29+
});
30+
31+
// Process CardGrid/LinkCard components - convert to markdown links
32+
content = content.replace(/<CardGrid[^>]*>[\s\S]*?<\/CardGrid>/g, (match) => {
33+
const links: string[] = [];
34+
const linkCardRegex =
35+
/<LinkCard\s+[^>]*href="([^"]*)"[^>]*title="([^"]*)"[^>]*description="([^"]*)"[^>]*\/>/g;
36+
37+
for (const [, href, title, description] of match.matchAll(linkCardRegex)) {
38+
const fullUrl = href.startsWith('/')
39+
? `https://docs.sprites.dev${href}`
40+
: href;
41+
links.push(`- [${title}](${fullUrl}) - ${description}`);
42+
}
43+
44+
return links.length > 0 ? links.join('\n') : '';
45+
});
46+
47+
// Remove self-closing JSX/MDX components
48+
content = content.replace(/<[A-Z][a-zA-Z]*\s+[^>]*\/>/g, '');
49+
50+
// Handle Callout components - keep content
51+
content = content.replace(
52+
/<Callout[^>]*>([\s\S]*?)<\/Callout>/g,
53+
(_, inner) => inner.trim(),
54+
);
55+
56+
// Remove remaining JSX component tags
57+
content = content.replace(/<[A-Z][a-zA-Z]*[^>]*>/g, '');
58+
content = content.replace(/<\/[A-Z][a-zA-Z]*>/g, '');
59+
60+
// Convert relative links to fully qualified URLs
61+
content = content.replace(
62+
/\[([^\]]+)\]\(\/([^)]*)\)/g,
63+
(_, text, path) => `[${text}](https://docs.sprites.dev/${path})`,
64+
);
65+
66+
// Clean up excessive blank lines
67+
content = content.replace(/\n{4,}/g, '\n\n\n');
68+
69+
return content.trim();
70+
}

src/pages/[...slug].md.ts

Lines changed: 5 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -1,80 +1,20 @@
11
import { getCollection } from 'astro:content';
22
import type { APIRoute, GetStaticPaths } from 'astro';
33

4+
import { cleanMdxContent } from '@/lib/utils';
5+
46
export const prerender = true;
57

68
export const getStaticPaths: GetStaticPaths = async () => {
7-
const docs = await getCollection('docs');
9+
const docs = await getCollection('docs', ({ data }) => {
10+
return data.draft !== true;
11+
});
812
return docs.map((doc) => ({
913
params: { slug: doc.id },
1014
props: { doc },
1115
}));
1216
};
1317

14-
function cleanMdxContent(content: string): string {
15-
// Remove MDX import statements at the start of the file
16-
content = content.replace(
17-
/^(\s*import\s+.*?(?:from\s+['"].*?['"])?;?\s*\n)+/m,
18-
'',
19-
);
20-
21-
// Process Tabs components - extract TabItem contents
22-
content = content.replace(/<Tabs>[\s\S]*?<\/Tabs>/g, (match) => {
23-
const results: string[] = [];
24-
const tabItemRegex =
25-
/<TabItem[^>]*label="([^"]*)"[^>]*>([\s\S]*?)(?=<TabItem|<\/Tabs>)/g;
26-
27-
for (const [, label, tabContent] of match.matchAll(tabItemRegex)) {
28-
const cleanContent = tabContent.replace(/<\/TabItem>\s*$/, '').trim();
29-
if (cleanContent) {
30-
results.push(`**${label}:**\n${cleanContent}`);
31-
}
32-
}
33-
34-
return results.length > 0 ? results.join('\n\n') : '';
35-
});
36-
37-
// Process CardGrid/LinkCard components - convert to markdown links
38-
content = content.replace(/<CardGrid[^>]*>[\s\S]*?<\/CardGrid>/g, (match) => {
39-
const links: string[] = [];
40-
const linkCardRegex =
41-
/<LinkCard\s+[^>]*href="([^"]*)"[^>]*title="([^"]*)"[^>]*description="([^"]*)"[^>]*\/>/g;
42-
43-
for (const [, href, title, description] of match.matchAll(linkCardRegex)) {
44-
const fullUrl = href.startsWith('/')
45-
? `https://docs.sprites.dev${href}`
46-
: href;
47-
links.push(`- [${title}](${fullUrl}) - ${description}`);
48-
}
49-
50-
return links.length > 0 ? links.join('\n') : '';
51-
});
52-
53-
// Remove self-closing JSX/MDX components
54-
content = content.replace(/<[A-Z][a-zA-Z]*\s+[^>]*\/>/g, '');
55-
56-
// Handle Callout components - keep content
57-
content = content.replace(
58-
/<Callout[^>]*>([\s\S]*?)<\/Callout>/g,
59-
(_, inner) => inner.trim(),
60-
);
61-
62-
// Remove remaining JSX component tags
63-
content = content.replace(/<[A-Z][a-zA-Z]*[^>]*>/g, '');
64-
content = content.replace(/<\/[A-Z][a-zA-Z]*>/g, '');
65-
66-
// Convert relative links to fully qualified URLs
67-
content = content.replace(
68-
/\[([^\]]+)\]\(\/([^)]*)\)/g,
69-
(_, text, path) => `[${text}](https://docs.sprites.dev/${path})`,
70-
);
71-
72-
// Clean up excessive blank lines
73-
content = content.replace(/\n{4,}/g, '\n\n\n');
74-
75-
return content.trim();
76-
}
77-
7818
export const GET: APIRoute = async ({ props }) => {
7919
const { doc } = props as {
8020
doc: {

src/pages/llms-full.txt.ts

Lines changed: 49 additions & 153 deletions
Original file line numberDiff line numberDiff line change
@@ -1,148 +1,57 @@
1-
import { promises as fs } from 'node:fs';
2-
import { join } from 'node:path';
1+
import type { CollectionEntry } from 'astro:content';
2+
import { getCollection } from 'astro:content';
33
import type { APIRoute } from 'astro';
4+
import { sidebarConfig } from '@/lib/sidebar';
5+
import { cleanMdxContent } from '@/lib/utils';
46

57
export const prerender = true;
68

7-
// Use process.cwd() which is the project root during Astro build
8-
const docsDir = join(process.cwd(), 'src/content/docs');
9-
10-
// Document order matching the sidebar structure
11-
const docOrder = [
12-
// Getting Started
13-
'index.mdx',
14-
'quickstart.mdx',
15-
'working-with-sprites.mdx',
16-
// Concepts
17-
'concepts/lifecycle.mdx',
18-
'concepts/services.mdx',
19-
'concepts/networking.mdx',
20-
'concepts/checkpoints.mdx',
21-
// CLI
22-
'cli/installation.mdx',
23-
'cli/authentication.mdx',
24-
'cli/commands.mdx',
25-
// SDKs
26-
'sdks/javascript.mdx',
27-
'sdks/go.mdx',
28-
// API (generated)
29-
'api/index.mdx',
30-
'api/exec.mdx',
31-
'api/checkpoints.mdx',
32-
'api/services.mdx',
33-
'api/proxy.mdx',
34-
'api/policy.mdx',
35-
'api/types.mdx',
36-
// Reference
37-
'reference/base-images.mdx',
38-
'reference/configuration.mdx',
39-
'reference/billing.mdx',
40-
];
41-
42-
// Section headers for organization
43-
const sections: Record<string, string> = {
44-
'index.mdx': '# Getting Started',
45-
'concepts/lifecycle.mdx': '# Concepts',
46-
'cli/installation.mdx': '# CLI',
47-
'sdks/javascript.mdx': '# SDKs',
48-
'api/index.mdx': '# API',
49-
'reference/base-images.mdx': '# Reference',
9+
export type DocsGroup = {
10+
label: string;
11+
items: {
12+
slug: string;
13+
body: string;
14+
title: string;
15+
description?: string;
16+
}[];
5017
};
5118

52-
interface DocMeta {
53-
title: string;
54-
description?: string;
55-
}
56-
57-
function extractFrontmatter(content: string): { meta: DocMeta; body: string } {
58-
const frontmatterMatch = content.match(/^---\n([\s\S]*?)\n---\n([\s\S]*)$/);
19+
export async function getGroupedDocs(): Promise<DocsGroup[]> {
20+
const collection = await getCollection('docs', ({ data }) => {
21+
return data.draft !== true;
22+
});
5923

60-
if (!frontmatterMatch) {
61-
return { meta: { title: 'Untitled' }, body: content };
24+
const atlas = new Map<string, CollectionEntry<'docs'>>();
25+
for (const doc of collection) {
26+
atlas.set(doc.id, doc);
6227
}
6328

64-
const [, frontmatterStr, body] = frontmatterMatch;
65-
const meta: DocMeta = { title: 'Untitled' };
66-
67-
// Parse YAML-like frontmatter (simple key: value parsing)
68-
for (const line of frontmatterStr.split('\n')) {
69-
const titleMatch = line.match(/^title:\s*(.+)$/);
70-
if (titleMatch) {
71-
meta.title = titleMatch[1].replace(/^["']|["']$/g, '');
72-
}
73-
const descMatch = line.match(/^description:\s*(.+)$/);
74-
if (descMatch) {
75-
meta.description = descMatch[1].replace(/^["']|["']$/g, '');
76-
}
77-
}
78-
79-
return { meta, body };
80-
}
81-
82-
function cleanMdxContent(content: string): string {
83-
// Remove MDX import statements at the start of the file (before any content)
84-
// This preserves imports inside code blocks
85-
content = content.replace(
86-
/^(\s*import\s+.*?(?:from\s+['"].*?['"])?;?\s*\n)+/m,
87-
'',
88-
);
89-
90-
// Process Tabs components - extract TabItem contents
91-
// Need to handle nested content carefully (code blocks with special chars)
92-
content = content.replace(/<Tabs>[\s\S]*?<\/Tabs>/g, (match) => {
93-
const results: string[] = [];
94-
95-
// Split by TabItem boundaries and extract content
96-
const tabItemRegex =
97-
/<TabItem[^>]*label="([^"]*)"[^>]*>([\s\S]*?)(?=<TabItem|<\/Tabs>)/g;
98-
99-
for (const [, label, tabContent] of match.matchAll(tabItemRegex)) {
100-
// Clean up the content - remove closing </TabItem> if present
101-
const cleanContent = tabContent.replace(/<\/TabItem>\s*$/, '').trim();
102-
if (cleanContent) {
103-
results.push(`**${label}:**\n${cleanContent}`);
29+
const groups = [];
30+
for (const { label, items: sidebarItems } of sidebarConfig) {
31+
const items = [];
32+
for (const sidebarItem of sidebarItems) {
33+
if (
34+
typeof sidebarItem === 'object' &&
35+
sidebarItem != null &&
36+
'slug' in sidebarItem
37+
) {
38+
const doc = atlas.get(sidebarItem.slug);
39+
if (doc != null && doc.body != null) {
40+
items.push({
41+
slug: doc.id,
42+
body: doc.body,
43+
title: doc.data.title,
44+
description: doc.data.description,
45+
});
46+
} else {
47+
console.warn(`Warning: Could not find ${sidebarItem.label}:`);
48+
}
10449
}
10550
}
51+
groups.push({ label, items });
52+
}
10653

107-
return results.length > 0 ? results.join('\n\n') : '';
108-
});
109-
110-
// Remove self-closing JSX/MDX components (like <Callout ... />)
111-
content = content.replace(/<[A-Z][a-zA-Z]*\s+[^>]*\/>/g, '');
112-
113-
// Remove JSX components with content (non-greedy, for simple components)
114-
// Handle Callout, Snippet, and other simple wrapper components
115-
content = content.replace(
116-
/<Callout[^>]*>([\s\S]*?)<\/Callout>/g,
117-
(_, inner) => {
118-
// Keep the content, just remove the wrapper
119-
return inner.trim();
120-
},
121-
);
122-
123-
// Remove remaining JSX component tags (opening and closing)
124-
content = content.replace(/<[A-Z][a-zA-Z]*[^>]*>/g, '');
125-
content = content.replace(/<\/[A-Z][a-zA-Z]*>/g, '');
126-
127-
// Convert relative links to fully qualified URLs
128-
// Matches markdown links like [text](/path) or [text](/path/)
129-
content = content.replace(
130-
/\[([^\]]+)\]\(\/([^)]*)\)/g,
131-
(_, text, path) => `[${text}](https://docs.sprites.dev/${path})`,
132-
);
133-
134-
// Clean up excessive blank lines
135-
content = content.replace(/\n{4,}/g, '\n\n\n');
136-
137-
// Trim leading/trailing whitespace
138-
content = content.trim();
139-
140-
return content;
141-
}
142-
143-
function slugToUrl(slug: string): string {
144-
const path = slug.replace(/\.mdx$/, '').replace(/^index$/, '');
145-
return `https://docs.sprites.dev/${path}${path ? '/' : ''}`;
54+
return groups;
14655
}
14756

14857
export const GET: APIRoute = async () => {
@@ -160,34 +69,21 @@ Summary: https://docs.sprites.dev/llms.txt
16069
---
16170
`);
16271

163-
let currentSection = '';
164-
165-
for (const docPath of docOrder) {
166-
const fullPath = join(docsDir, docPath);
167-
168-
try {
169-
const content = await fs.readFile(fullPath, 'utf-8');
170-
const { meta, body } = extractFrontmatter(content);
72+
const groups = await getGroupedDocs();
73+
for (const { label, items } of groups) {
74+
parts.push(`\n# ${label}\n`);
75+
for (const { slug, title, description, body } of items) {
17176
const cleanedContent = cleanMdxContent(body);
17277

173-
// Add section header if we're entering a new section
174-
if (sections[docPath] && sections[docPath] !== currentSection) {
175-
currentSection = sections[docPath];
176-
parts.push(`\n${currentSection}\n`);
177-
}
178-
17978
// Add document with title and URL
180-
const url = slugToUrl(docPath);
181-
parts.push(`## ${meta.title}
79+
parts.push(`## ${title}
18280
183-
URL: ${url}
184-
${meta.description ? `\n${meta.description}\n` : ''}
81+
URL: https://docs.sprites.dev/${slug}.md
82+
${description ? `\n${description}\n` : ''}
18583
${cleanedContent}
18684
18785
---
18886
`);
189-
} catch (error) {
190-
console.warn(`Warning: Could not read ${docPath}:`, error);
19187
}
19288
}
19389

0 commit comments

Comments
 (0)