Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 105 additions & 0 deletions src/app/api/markdown/route.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
import fs from 'fs'
import path from 'path'
import { NextRequest } from 'next/server'
import { processRawMdx } from '@/server/mdxToMarkdown'
import { FULL_DOMAIN } from '@/utils/constants'

const CONTENT_DIR = path.join(process.cwd(), 'src/content')

/**
* Resolve a URL path to an MDX file path on disk.
* Mirrors the resolution logic in [...markdownPath]/page.tsx
*/
function resolveMdxFile(urlPath: string): string | null {
// Handle root/home page
if (!urlPath || urlPath === '') {
const indexPath = path.join(CONTENT_DIR, 'index.mdx')
if (fs.existsSync(indexPath)) {
return indexPath
}
return null
}

// Try direct path: {urlPath}.mdx
const directPath = path.join(CONTENT_DIR, `${urlPath}.mdx`)
if (fs.existsSync(directPath)) {
return directPath
}

// Try index path: {urlPath}/index.mdx
const indexPath = path.join(CONTENT_DIR, urlPath, 'index.mdx')
if (fs.existsSync(indexPath)) {
return indexPath
}

return null
}

export async function GET(request: NextRequest) {
// Path is passed via header from middleware, or via query param for direct access
const urlPath =
request.headers.get('x-markdown-path') ?? request.nextUrl.searchParams.get('path') ?? ''

// Prevent path traversal
const normalizedPath = path.normalize(urlPath).replace(/^(\.\.[/\\])+/, '')
if (normalizedPath.includes('..')) {
return new Response('Invalid path', { status: 400 })
}

const mdxFilePath = resolveMdxFile(normalizedPath)

if (!mdxFilePath) {
return new Response('# Page not found\n\nThe requested documentation page does not exist.\n', {
status: 404,
headers: { 'Content-Type': 'text/markdown; charset=utf-8' },
})
}

// Defense in depth: verify resolved path stays within content directory
const realMdxPath = fs.realpathSync(mdxFilePath)
const realContentDir = fs.realpathSync(CONTENT_DIR)
if (!realMdxPath.startsWith(realContentDir)) {
return new Response('Invalid path', { status: 400 })
}

try {
const rawContent = fs.readFileSync(mdxFilePath, 'utf-8')
const { markdown, title, description } = processRawMdx(rawContent)

const parts: string[] = []

if (title) {
parts.push(`# ${title}`)
}
if (description) {
parts.push(`> ${description}`)
}
if (parts.length > 0) {
parts.push('')
}
parts.push(markdown)

// Source URL footer
const sourceUrl = urlPath ? `/${urlPath}` : '/'
parts.push('')
parts.push('---')
parts.push(`Source: ${FULL_DOMAIN}${sourceUrl}`)

const responseBody = parts.join('\n')

return new Response(responseBody, {
status: 200,
headers: {
'Content-Type': 'text/markdown; charset=utf-8',
'Cache-Control': 'public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400',
'X-Content-Type-Options': 'nosniff',
},
})
} catch (error) {
console.error('Error processing MDX file:', mdxFilePath, error)
return new Response('# Error\n\nAn error occurred processing this page.\n', {
status: 500,
headers: { 'Content-Type': 'text/markdown; charset=utf-8' },
})
}
}
80 changes: 80 additions & 0 deletions src/middleware.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
import { NextRequest, NextResponse } from 'next/server'

const AI_BOT_PATTERNS = [
'GPTBot',
'ClaudeBot',
'Claude-Web',
'Anthropic',
'ChatGPT-User',
'PerplexityBot',
'Bytespider',
'Google-Extended',
]

const AI_BOT_REGEX = new RegExp(AI_BOT_PATTERNS.join('|'), 'i')

function isMarkdownRequest(request: NextRequest): boolean {
// 1. Accept header contains text/markdown
const acceptHeader = request.headers.get('accept') || ''
if (acceptHeader.includes('text/markdown')) {
return true
}

// 2. URL ends with .md
if (request.nextUrl.pathname.endsWith('.md')) {
return true
}

// 3. Known AI bot user agent
const userAgent = request.headers.get('user-agent') || ''
if (AI_BOT_REGEX.test(userAgent)) {
return true
}

return false
}

export function middleware(request: NextRequest) {
if (!isMarkdownRequest(request)) {
return NextResponse.next()
}

let contentPath = request.nextUrl.pathname

// Strip .md suffix if present
if (contentPath.endsWith('.md')) {
contentPath = contentPath.slice(0, -3)
}

// Strip leading slash
if (contentPath.startsWith('/')) {
contentPath = contentPath.slice(1)
}

// Rewrite to the markdown API route
// Use nextUrl.clone() to preserve basePath (e.g. /docs)
// Pass the content path via header since rewrite query params
// are not visible to the API route in Next.js
const url = request.nextUrl.clone()
url.pathname = '/api/markdown'
url.search = ''
const requestHeaders = new Headers(request.headers)
requestHeaders.set('x-markdown-path', contentPath)

return NextResponse.rewrite(url, {
request: { headers: requestHeaders },
})
}

export const config = {
matcher: [
/*
* Match all paths except:
* - /api/ (API routes)
* - /_next/ (Next.js internals)
* - /assets/ (static assets)
* - common static file extensions (but NOT .md)
*/
'/((?!api|_next|assets|.*\\.(?:ico|png|jpg|jpeg|gif|svg|css|js|woff|woff2|ttf|eot)$).*)',
],
}
177 changes: 177 additions & 0 deletions src/server/mdxToMarkdown.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
import fm from 'front-matter'

interface ProcessedMdx {
title: string
description: string
markdown: string
}

/**
* Process a raw MDX file string into clean markdown suitable for AI agents.
*
* 1. Parse and extract YAML frontmatter (title, description)
* 2. Strip import statements
* 3. Strip JSX blocks (both self-closing and multi-line)
* 4. Strip JSX expression spacers like {' '}
* 5. Clean up excessive whitespace
*/
export function processRawMdx(rawContent: string): ProcessedMdx {
const { attributes, body } = fm<Record<string, any>>(rawContent)

const title = attributes.title ?? ''
const description = attributes.meta?.description ?? attributes.description ?? ''

let processed = body

// Strip import lines
processed = processed.replace(/^import\s+.*$/gm, '')

// Strip JSX expression spacers: {' '} on their own line
processed = processed.replace(/^\{'\s*'\}\s*$/gm, '')

// Strip export lines (e.g. export const meta = ...)
processed = processed.replace(/^export\s+.*$/gm, '')

// Strip self-closing JSX tags on a single line
// e.g. <CodeDemo path="/Nodes/Image" />
processed = processed.replace(/^[ \t]*<[A-Z][a-zA-Z.]*\b[^>]*\/>\s*$/gm, '')

// Strip multi-line JSX blocks
processed = stripJsxBlocks(processed)

// Clean up excessive blank lines (3+ newlines -> 2)
processed = processed.replace(/\n{3,}/g, '\n\n')

// Trim leading/trailing whitespace
processed = processed.trim()

return { title, description, markdown: processed }
}

/**
* Strip multi-line JSX blocks from markdown content.
*
* When we encounter a line starting with a JSX opening tag
* (< followed by uppercase letter, or <div/<span), we track the
* nesting depth and skip all lines until the block closes.
*
* Handles multi-line opening tags where attributes span multiple lines:
* <Section
* title="..."
* >
*
* Preserves:
* - Standard markdown (headings, paragraphs, lists, code blocks, links)
* - Fenced code blocks (``` ... ```) — never strips inside them
*/
function stripJsxBlocks(content: string): string {
const lines = content.split('\n')
const result: string[] = []

let jsxDepth = 0
let inCodeBlock = false
// Track when we're inside an opening tag that spans multiple lines
// e.g. <Section\n title="..."\n>
let inOpeningTag = false

for (const line of lines) {
const trimmed = line.trim()

// Track fenced code blocks — never strip inside them
if (trimmed.startsWith('```')) {
if (!inCodeBlock) {
inCodeBlock = true
if (jsxDepth === 0 && !inOpeningTag) {
result.push(line)
}
continue
} else {
inCodeBlock = false
if (jsxDepth === 0 && !inOpeningTag) {
result.push(line)
}
continue
}
}

if (inCodeBlock) {
if (jsxDepth === 0 && !inOpeningTag) {
result.push(line)
}
continue
}

// If we're inside a multi-line opening tag, wait for closing >
if (inOpeningTag) {
if (/\/>[ \t]*$/.test(trimmed)) {
// Self-closing end: <Section\n ...\n/>
inOpeningTag = false
// Don't change depth — self-closing tag is net zero
} else if (/>[ \t]*$/.test(trimmed)) {
// Opening tag closed: now count it as +1
inOpeningTag = false
jsxDepth += 1
}
// Either way, skip this line (it's part of JSX)
continue
}

// Check if this line starts a JSX tag (opening or closing)
const jsxTagStart = /^[ \t]*<\/?([A-Z][a-zA-Z.]*|div|span)\b/.test(trimmed)

if (jsxDepth === 0 && !jsxTagStart) {
// Normal markdown line — keep it
result.push(line)
continue
}

// From here, either we're inside a JSX block (depth > 0) or this line starts JSX

// Check for multi-line opening tag: <Component with no > on this line
const isOpeningTagStart = /^[ \t]*<([A-Z][a-zA-Z.]*|div|span)\b/.test(trimmed)
if (isOpeningTagStart && !/>/.test(trimmed)) {
inOpeningTag = true
if (jsxDepth === 0) {
// Starting a new JSX block with multi-line tag
}
continue
}

// Self-closing tag on single line
if (isOpeningTagStart && /\/>[ \t]*$/.test(trimmed)) {
// Net zero depth change — just skip
continue
}

// Count depth changes from complete tags on this line
jsxDepth += countNetJsxDepth(trimmed)

if (jsxDepth <= 0) {
jsxDepth = 0
}
}

return result.join('\n')
}

/**
* Count the net JSX depth change for a line.
* Opening tags: +1, closing tags: -1, self-closing: 0.
*/
function countNetJsxDepth(line: string): number {
let depth = 0

// Opening tags: <Component or <div (NOT self-closing, NOT closing)
const openingTags = line.match(/<([A-Z][a-zA-Z.]*|div|span)\b[^>]*(?<!\/)>/g)
if (openingTags) {
depth += openingTags.length
}

// Closing tags: </Component> or </div>
const closingTags = line.match(/<\/([A-Z][a-zA-Z.]*|div|span)\b[^>]*>/g)
if (closingTags) {
depth -= closingTags.length
}

return depth
}