Skip to content

Commit 5c10513

Browse files
committed
Hide disabled pages from LLMs
Previously, disabled pages (marked as draft) could still be accessed at `/{slug}.md`. This also updates llms.txt to dynamically pull from the docs collection. This should avoid any drift between the human-focused and llm-focused pages in the future.
1 parent 31b6d6e commit 5c10513

File tree

4 files changed

+146
-239
lines changed

4 files changed

+146
-239
lines changed

src/lib/utils.ts

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,67 @@ import { twMerge } from 'tailwind-merge';
44
export function cn(...inputs: ClassValue[]) {
55
return twMerge(clsx(inputs));
66
}
7+
8+
export function cleanMdxContent(content: string): string {
9+
// Remove MDX import statements at the start of the file
10+
content = content.replace(
11+
/^(\s*import\s+.*?(?:from\s+['"].*?['"])?;?\s*\n)+/m,
12+
'',
13+
);
14+
15+
// Process Tabs components - extract TabItem contents
16+
content = content.replace(/<Tabs>[\s\S]*?<\/Tabs>/g, (match) => {
17+
const results: string[] = [];
18+
const tabItemRegex =
19+
/<TabItem[^>]*label="([^"]*)"[^>]*>([\s\S]*?)(?=<TabItem|<\/Tabs>)/g;
20+
21+
for (const [, label, tabContent] of match.matchAll(tabItemRegex)) {
22+
const cleanContent = tabContent.replace(/<\/TabItem>\s*$/, '').trim();
23+
if (cleanContent) {
24+
results.push(`**${label}:**\n${cleanContent}`);
25+
}
26+
}
27+
28+
return results.length > 0 ? results.join('\n\n') : '';
29+
});
30+
31+
// Process CardGrid/LinkCard components - convert to markdown links
32+
content = content.replace(/<CardGrid[^>]*>[\s\S]*?<\/CardGrid>/g, (match) => {
33+
const links: string[] = [];
34+
const linkCardRegex =
35+
/<LinkCard\s+[^>]*href="([^"]*)"[^>]*title="([^"]*)"[^>]*description="([^"]*)"[^>]*\/>/g;
36+
37+
for (const [, href, title, description] of match.matchAll(linkCardRegex)) {
38+
const fullUrl = href.startsWith('/')
39+
? `https://docs.sprites.dev${href}`
40+
: href;
41+
links.push(`- [${title}](${fullUrl}) - ${description}`);
42+
}
43+
44+
return links.length > 0 ? links.join('\n') : '';
45+
});
46+
47+
// Remove self-closing JSX/MDX components
48+
content = content.replace(/<[A-Z][a-zA-Z]*\s+[^>]*\/>/g, '');
49+
50+
// Handle Callout components - keep content
51+
content = content.replace(
52+
/<Callout[^>]*>([\s\S]*?)<\/Callout>/g,
53+
(_, inner) => inner.trim(),
54+
);
55+
56+
// Remove remaining JSX component tags
57+
content = content.replace(/<[A-Z][a-zA-Z]*[^>]*>/g, '');
58+
content = content.replace(/<\/[A-Z][a-zA-Z]*>/g, '');
59+
60+
// Convert relative links to fully qualified URLs
61+
content = content.replace(
62+
/\[([^\]]+)\]\(\/([^)]*)\)/g,
63+
(_, text, path) => `[${text}](https://docs.sprites.dev/${path})`,
64+
);
65+
66+
// Clean up excessive blank lines
67+
content = content.replace(/\n{4,}/g, '\n\n\n');
68+
69+
return content.trim();
70+
}

src/pages/[...slug].md.ts

Lines changed: 5 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -1,80 +1,20 @@
11
import { getCollection } from 'astro:content';
22
import type { APIRoute, GetStaticPaths } from 'astro';
33

4+
import { cleanMdxContent } from '@/lib/utils';
5+
46
export const prerender = true;
57

68
export const getStaticPaths: GetStaticPaths = async () => {
7-
const docs = await getCollection('docs');
9+
const docs = await getCollection('docs', ({ data }) => {
10+
return data.draft !== true;
11+
});
812
return docs.map((doc) => ({
913
params: { slug: doc.id },
1014
props: { doc },
1115
}));
1216
};
1317

14-
function cleanMdxContent(content: string): string {
15-
// Remove MDX import statements at the start of the file
16-
content = content.replace(
17-
/^(\s*import\s+.*?(?:from\s+['"].*?['"])?;?\s*\n)+/m,
18-
'',
19-
);
20-
21-
// Process Tabs components - extract TabItem contents
22-
content = content.replace(/<Tabs>[\s\S]*?<\/Tabs>/g, (match) => {
23-
const results: string[] = [];
24-
const tabItemRegex =
25-
/<TabItem[^>]*label="([^"]*)"[^>]*>([\s\S]*?)(?=<TabItem|<\/Tabs>)/g;
26-
27-
for (const [, label, tabContent] of match.matchAll(tabItemRegex)) {
28-
const cleanContent = tabContent.replace(/<\/TabItem>\s*$/, '').trim();
29-
if (cleanContent) {
30-
results.push(`**${label}:**\n${cleanContent}`);
31-
}
32-
}
33-
34-
return results.length > 0 ? results.join('\n\n') : '';
35-
});
36-
37-
// Process CardGrid/LinkCard components - convert to markdown links
38-
content = content.replace(/<CardGrid[^>]*>[\s\S]*?<\/CardGrid>/g, (match) => {
39-
const links: string[] = [];
40-
const linkCardRegex =
41-
/<LinkCard\s+[^>]*href="([^"]*)"[^>]*title="([^"]*)"[^>]*description="([^"]*)"[^>]*\/>/g;
42-
43-
for (const [, href, title, description] of match.matchAll(linkCardRegex)) {
44-
const fullUrl = href.startsWith('/')
45-
? `https://docs.sprites.dev${href}`
46-
: href;
47-
links.push(`- [${title}](${fullUrl}) - ${description}`);
48-
}
49-
50-
return links.length > 0 ? links.join('\n') : '';
51-
});
52-
53-
// Remove self-closing JSX/MDX components
54-
content = content.replace(/<[A-Z][a-zA-Z]*\s+[^>]*\/>/g, '');
55-
56-
// Handle Callout components - keep content
57-
content = content.replace(
58-
/<Callout[^>]*>([\s\S]*?)<\/Callout>/g,
59-
(_, inner) => inner.trim(),
60-
);
61-
62-
// Remove remaining JSX component tags
63-
content = content.replace(/<[A-Z][a-zA-Z]*[^>]*>/g, '');
64-
content = content.replace(/<\/[A-Z][a-zA-Z]*>/g, '');
65-
66-
// Convert relative links to fully qualified URLs
67-
content = content.replace(
68-
/\[([^\]]+)\]\(\/([^)]*)\)/g,
69-
(_, text, path) => `[${text}](https://docs.sprites.dev/${path})`,
70-
);
71-
72-
// Clean up excessive blank lines
73-
content = content.replace(/\n{4,}/g, '\n\n\n');
74-
75-
return content.trim();
76-
}
77-
7818
export const GET: APIRoute = async ({ props }) => {
7919
const { doc } = props as {
8020
doc: {

src/pages/llms-full.txt.ts

Lines changed: 52 additions & 153 deletions
Original file line numberDiff line numberDiff line change
@@ -1,148 +1,60 @@
1-
import { promises as fs } from 'node:fs';
2-
import { join } from 'node:path';
1+
import type { CollectionEntry } from 'astro:content';
2+
import { getCollection } from 'astro:content';
33
import type { APIRoute } from 'astro';
4+
import { sidebarConfig } from '@/lib/sidebar';
5+
import { cleanMdxContent } from '@/lib/utils';
6+
7+
import type { APIRoute } from 'astro';
8+
import type { CollectionEntry } from 'astro:content';
49

510
export const prerender = true;
611

7-
// Use process.cwd() which is the project root during Astro build
8-
const docsDir = join(process.cwd(), 'src/content/docs');
9-
10-
// Document order matching the sidebar structure
11-
const docOrder = [
12-
// Getting Started
13-
'index.mdx',
14-
'quickstart.mdx',
15-
'working-with-sprites.mdx',
16-
// Concepts
17-
'concepts/lifecycle.mdx',
18-
'concepts/services.mdx',
19-
'concepts/networking.mdx',
20-
'concepts/checkpoints.mdx',
21-
// CLI
22-
'cli/installation.mdx',
23-
'cli/authentication.mdx',
24-
'cli/commands.mdx',
25-
// SDKs
26-
'sdks/javascript.mdx',
27-
'sdks/go.mdx',
28-
// API (generated)
29-
'api/index.mdx',
30-
'api/exec.mdx',
31-
'api/checkpoints.mdx',
32-
'api/services.mdx',
33-
'api/proxy.mdx',
34-
'api/policy.mdx',
35-
'api/types.mdx',
36-
// Reference
37-
'reference/base-images.mdx',
38-
'reference/configuration.mdx',
39-
'reference/billing.mdx',
40-
];
41-
42-
// Section headers for organization
43-
const sections: Record<string, string> = {
44-
'index.mdx': '# Getting Started',
45-
'concepts/lifecycle.mdx': '# Concepts',
46-
'cli/installation.mdx': '# CLI',
47-
'sdks/javascript.mdx': '# SDKs',
48-
'api/index.mdx': '# API',
49-
'reference/base-images.mdx': '# Reference',
12+
export type DocsGroup = {
13+
label: string;
14+
items: {
15+
slug: string;
16+
body: string;
17+
title: string;
18+
description?: string;
19+
}[];
5020
};
5121

52-
interface DocMeta {
53-
title: string;
54-
description?: string;
55-
}
56-
57-
function extractFrontmatter(content: string): { meta: DocMeta; body: string } {
58-
const frontmatterMatch = content.match(/^---\n([\s\S]*?)\n---\n([\s\S]*)$/);
59-
60-
if (!frontmatterMatch) {
61-
return { meta: { title: 'Untitled' }, body: content };
62-
}
63-
64-
const [, frontmatterStr, body] = frontmatterMatch;
65-
const meta: DocMeta = { title: 'Untitled' };
22+
export async function getGroupedDocs(): Promise<DocsGroup[]> {
23+
const collection = await getCollection('docs', ({ data }) => {
24+
return data.draft !== true;
25+
});
6626

67-
// Parse YAML-like frontmatter (simple key: value parsing)
68-
for (const line of frontmatterStr.split('\n')) {
69-
const titleMatch = line.match(/^title:\s*(.+)$/);
70-
if (titleMatch) {
71-
meta.title = titleMatch[1].replace(/^["']|["']$/g, '');
72-
}
73-
const descMatch = line.match(/^description:\s*(.+)$/);
74-
if (descMatch) {
75-
meta.description = descMatch[1].replace(/^["']|["']$/g, '');
76-
}
27+
const atlas = new Map<string, CollectionEntry<'docs'>>();
28+
for (const doc of collection) {
29+
atlas.set(doc.id, doc);
7730
}
7831

79-
return { meta, body };
80-
}
81-
82-
function cleanMdxContent(content: string): string {
83-
// Remove MDX import statements at the start of the file (before any content)
84-
// This preserves imports inside code blocks
85-
content = content.replace(
86-
/^(\s*import\s+.*?(?:from\s+['"].*?['"])?;?\s*\n)+/m,
87-
'',
88-
);
89-
90-
// Process Tabs components - extract TabItem contents
91-
// Need to handle nested content carefully (code blocks with special chars)
92-
content = content.replace(/<Tabs>[\s\S]*?<\/Tabs>/g, (match) => {
93-
const results: string[] = [];
94-
95-
// Split by TabItem boundaries and extract content
96-
const tabItemRegex =
97-
/<TabItem[^>]*label="([^"]*)"[^>]*>([\s\S]*?)(?=<TabItem|<\/Tabs>)/g;
98-
99-
for (const [, label, tabContent] of match.matchAll(tabItemRegex)) {
100-
// Clean up the content - remove closing </TabItem> if present
101-
const cleanContent = tabContent.replace(/<\/TabItem>\s*$/, '').trim();
102-
if (cleanContent) {
103-
results.push(`**${label}:**\n${cleanContent}`);
32+
const groups = [];
33+
for (const { label, items: sidebarItems } of sidebarConfig) {
34+
const items = [];
35+
for (const sidebarItem of sidebarItems) {
36+
if (
37+
typeof sidebarItem === 'object' &&
38+
sidebarItem != null &&
39+
'slug' in sidebarItem
40+
) {
41+
const doc = atlas.get(sidebarItem.slug);
42+
if (doc != null && doc.body != null) {
43+
items.push({
44+
slug: doc.id,
45+
body: doc.body,
46+
title: doc.data.title,
47+
description: doc.data.description,
48+
});
49+
} else {
50+
console.warn(`Warning: Could not find ${sidebarItem.label}:`);
51+
}
10452
}
10553
}
54+
groups.push({ label, items });
55+
}
10656

107-
return results.length > 0 ? results.join('\n\n') : '';
108-
});
109-
110-
// Remove self-closing JSX/MDX components (like <Callout ... />)
111-
content = content.replace(/<[A-Z][a-zA-Z]*\s+[^>]*\/>/g, '');
112-
113-
// Remove JSX components with content (non-greedy, for simple components)
114-
// Handle Callout, Snippet, and other simple wrapper components
115-
content = content.replace(
116-
/<Callout[^>]*>([\s\S]*?)<\/Callout>/g,
117-
(_, inner) => {
118-
// Keep the content, just remove the wrapper
119-
return inner.trim();
120-
},
121-
);
122-
123-
// Remove remaining JSX component tags (opening and closing)
124-
content = content.replace(/<[A-Z][a-zA-Z]*[^>]*>/g, '');
125-
content = content.replace(/<\/[A-Z][a-zA-Z]*>/g, '');
126-
127-
// Convert relative links to fully qualified URLs
128-
// Matches markdown links like [text](/path) or [text](/path/)
129-
content = content.replace(
130-
/\[([^\]]+)\]\(\/([^)]*)\)/g,
131-
(_, text, path) => `[${text}](https://docs.sprites.dev/${path})`,
132-
);
133-
134-
// Clean up excessive blank lines
135-
content = content.replace(/\n{4,}/g, '\n\n\n');
136-
137-
// Trim leading/trailing whitespace
138-
content = content.trim();
139-
140-
return content;
141-
}
142-
143-
function slugToUrl(slug: string): string {
144-
const path = slug.replace(/\.mdx$/, '').replace(/^index$/, '');
145-
return `https://docs.sprites.dev/${path}${path ? '/' : ''}`;
57+
return groups;
14658
}
14759

14860
export const GET: APIRoute = async () => {
@@ -160,34 +72,21 @@ Summary: https://docs.sprites.dev/llms.txt
16072
---
16173
`);
16274

163-
let currentSection = '';
164-
165-
for (const docPath of docOrder) {
166-
const fullPath = join(docsDir, docPath);
167-
168-
try {
169-
const content = await fs.readFile(fullPath, 'utf-8');
170-
const { meta, body } = extractFrontmatter(content);
75+
const groups = await getGroupedDocs();
76+
for (const { label, items } of groups) {
77+
parts.push(`\n# ${label}\n`);
78+
for (const { slug, title, description, body } of items) {
17179
const cleanedContent = cleanMdxContent(body);
17280

173-
// Add section header if we're entering a new section
174-
if (sections[docPath] && sections[docPath] !== currentSection) {
175-
currentSection = sections[docPath];
176-
parts.push(`\n${currentSection}\n`);
177-
}
178-
17981
// Add document with title and URL
180-
const url = slugToUrl(docPath);
181-
parts.push(`## ${meta.title}
82+
parts.push(`## ${title}
18283
183-
URL: ${url}
184-
${meta.description ? `\n${meta.description}\n` : ''}
84+
URL: https://docs.sprites.dev/${slug}.md
85+
${description ? `\n${description}\n` : ''}
18586
${cleanedContent}
18687
18788
---
18889
`);
189-
} catch (error) {
190-
console.warn(`Warning: Could not read ${docPath}:`, error);
19190
}
19291
}
19392

0 commit comments

Comments
 (0)