ueberdosis · mkriegeskorte · Feb 18, 2026 · Feb 18, 2026 · Feb 18, 2026
diff --git a/src/app/api/markdown/route.ts b/src/app/api/markdown/route.ts
@@ -0,0 +1,105 @@
+import fs from 'fs'
+import path from 'path'
+import { NextRequest } from 'next/server'
+import { processRawMdx } from '@/server/mdxToMarkdown'
+import { FULL_DOMAIN } from '@/utils/constants'
+
+const CONTENT_DIR = path.join(process.cwd(), 'src/content')
+
+/**
+ * Resolve a URL path to an MDX file path on disk.
+ * Mirrors the resolution logic in [...markdownPath]/page.tsx
+ */
+function resolveMdxFile(urlPath: string): string | null {
+  // Handle root/home page
+  if (!urlPath || urlPath === '') {
+    const indexPath = path.join(CONTENT_DIR, 'index.mdx')
+    if (fs.existsSync(indexPath)) {
+      return indexPath
+    }
+    return null
+  }
+
+  // Try direct path: {urlPath}.mdx
+  const directPath = path.join(CONTENT_DIR, `${urlPath}.mdx`)
+  if (fs.existsSync(directPath)) {
+    return directPath
+  }
+
+  // Try index path: {urlPath}/index.mdx
+  const indexPath = path.join(CONTENT_DIR, urlPath, 'index.mdx')
+  if (fs.existsSync(indexPath)) {
+    return indexPath
+  }
+
+  return null
+}
+
+export async function GET(request: NextRequest) {
+  // Path is passed via header from middleware, or via query param for direct access
+  const urlPath =
+    request.headers.get('x-markdown-path') ?? request.nextUrl.searchParams.get('path') ?? ''
+
+  // Prevent path traversal
+  const normalizedPath = path.normalize(urlPath).replace(/^(\.\.[/\\])+/, '')
+  if (normalizedPath.includes('..')) {
+    return new Response('Invalid path', { status: 400 })
+  }
+
+  const mdxFilePath = resolveMdxFile(normalizedPath)
+
+  if (!mdxFilePath) {
+    return new Response('# Page not found\n\nThe requested documentation page does not exist.\n', {
+      status: 404,
+      headers: { 'Content-Type': 'text/markdown; charset=utf-8' },
+    })
+  }
+
+  // Defense in depth: verify resolved path stays within content directory
+  const realMdxPath = fs.realpathSync(mdxFilePath)
+  const realContentDir = fs.realpathSync(CONTENT_DIR)
+  if (!realMdxPath.startsWith(realContentDir)) {
+    return new Response('Invalid path', { status: 400 })
+  }
+
+  try {
+    const rawContent = fs.readFileSync(mdxFilePath, 'utf-8')
+    const { markdown, title, description } = processRawMdx(rawContent)
+
+    const parts: string[] = []
+
+    if (title) {
+      parts.push(`# ${title}`)
+    }
+    if (description) {
+      parts.push(`> ${description}`)
+    }
+    if (parts.length > 0) {
+      parts.push('')
+    }
+    parts.push(markdown)
+
+    // Source URL footer
+    const sourceUrl = urlPath ? `/${urlPath}` : '/'
+    parts.push('')
+    parts.push('---')
+    parts.push(`Source: ${FULL_DOMAIN}${sourceUrl}`)
+
+    const responseBody = parts.join('\n')
+
+    return new Response(responseBody, {
+      status: 200,
+      headers: {
+        'Content-Type': 'text/markdown; charset=utf-8',
+        'Cache-Control': 'public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400',
+        'X-Content-Type-Options': 'nosniff',
+      },
+    })
+  } catch (error) {
+    console.error('Error processing MDX file:', mdxFilePath, error)
+    return new Response('# Error\n\nAn error occurred processing this page.\n', {
+      status: 500,
+      headers: { 'Content-Type': 'text/markdown; charset=utf-8' },
+    })
+  }
+}
diff --git a/src/middleware.ts b/src/middleware.ts
@@ -0,0 +1,80 @@
+import { NextRequest, NextResponse } from 'next/server'
+
+const AI_BOT_PATTERNS = [
+  'GPTBot',
+  'ClaudeBot',
+  'Claude-Web',
+  'Anthropic',
+  'ChatGPT-User',
+  'PerplexityBot',
+  'Bytespider',
+  'Google-Extended',
+]
+
+const AI_BOT_REGEX = new RegExp(AI_BOT_PATTERNS.join('|'), 'i')
+
+function isMarkdownRequest(request: NextRequest): boolean {
+  // 1. Accept header contains text/markdown
+  const acceptHeader = request.headers.get('accept') || ''
+  if (acceptHeader.includes('text/markdown')) {
+    return true
+  }
+
+  // 2. URL ends with .md
+  if (request.nextUrl.pathname.endsWith('.md')) {
+    return true
+  }
+
+  // 3. Known AI bot user agent
+  const userAgent = request.headers.get('user-agent') || ''
+  if (AI_BOT_REGEX.test(userAgent)) {
+    return true
+  }
+
+  return false
+}
+
+export function middleware(request: NextRequest) {
+  if (!isMarkdownRequest(request)) {
+    return NextResponse.next()
+  }
+
+  let contentPath = request.nextUrl.pathname
+
+  // Strip .md suffix if present
+  if (contentPath.endsWith('.md')) {
+    contentPath = contentPath.slice(0, -3)
+  }
+
+  // Strip leading slash
+  if (contentPath.startsWith('/')) {
+    contentPath = contentPath.slice(1)
+  }
+
+  // Rewrite to the markdown API route
+  // Use nextUrl.clone() to preserve basePath (e.g. /docs)
+  // Pass the content path via header since rewrite query params
+  // are not visible to the API route in Next.js
+  const url = request.nextUrl.clone()
+  url.pathname = '/api/markdown'
+  url.search = ''
+  const requestHeaders = new Headers(request.headers)
+  requestHeaders.set('x-markdown-path', contentPath)
+
+  return NextResponse.rewrite(url, {
+    request: { headers: requestHeaders },
+  })
+}
+
+export const config = {
+  matcher: [
+    /*
+     * Match all paths except:
+     * - /api/ (API routes)
+     * - /_next/ (Next.js internals)
+     * - /assets/ (static assets)
+     * - common static file extensions (but NOT .md)
+     */
+    '/((?!api|_next|assets|.*\\.(?:ico|png|jpg|jpeg|gif|svg|css|js|woff|woff2|ttf|eot)$).*)',
+  ],
+}
diff --git a/src/server/mdxToMarkdown.ts b/src/server/mdxToMarkdown.ts
@@ -0,0 +1,177 @@
+import fm from 'front-matter'
+
+interface ProcessedMdx {
+  title: string
+  description: string
+  markdown: string
+}
+
+/**
+ * Process a raw MDX file string into clean markdown suitable for AI agents.
+ *
+ * 1. Parse and extract YAML frontmatter (title, description)
+ * 2. Strip import statements
+ * 3. Strip JSX blocks (both self-closing and multi-line)
+ * 4. Strip JSX expression spacers like {' '}
+ * 5. Clean up excessive whitespace
+ */
+export function processRawMdx(rawContent: string): ProcessedMdx {
+  const { attributes, body } = fm<Record<string, any>>(rawContent)
+
+  const title = attributes.title ?? ''
+  const description = attributes.meta?.description ?? attributes.description ?? ''
+
+  let processed = body
+
+  // Strip import lines
+  processed = processed.replace(/^import\s+.*$/gm, '')
+
+  // Strip JSX expression spacers: {' '} on their own line
+  processed = processed.replace(/^\{'\s*'\}\s*$/gm, '')
+
+  // Strip export lines (e.g. export const meta = ...)
+  processed = processed.replace(/^export\s+.*$/gm, '')
+
+  // Strip self-closing JSX tags on a single line
+  // e.g. <CodeDemo path="/Nodes/Image" />
+  processed = processed.replace(/^[ \t]*<[A-Z][a-zA-Z.]*\b[^>]*\/>\s*$/gm, '')
+
+  // Strip multi-line JSX blocks
+  processed = stripJsxBlocks(processed)
+
+  // Clean up excessive blank lines (3+ newlines -> 2)
+  processed = processed.replace(/\n{3,}/g, '\n\n')
+
+  // Trim leading/trailing whitespace
+  processed = processed.trim()
+
+  return { title, description, markdown: processed }
+}
+
+/**
+ * Strip multi-line JSX blocks from markdown content.
+ *
+ * When we encounter a line starting with a JSX opening tag
+ * (< followed by uppercase letter, or <div/<span), we track the
+ * nesting depth and skip all lines until the block closes.
+ *
+ * Handles multi-line opening tags where attributes span multiple lines:
+ *   <Section
+ *     title="..."
+ *   >
+ *
+ * Preserves:
+ * - Standard markdown (headings, paragraphs, lists, code blocks, links)
+ * - Fenced code blocks (``` ... ```) — never strips inside them
+ */
+function stripJsxBlocks(content: string): string {
+  const lines = content.split('\n')
+  const result: string[] = []
+
+  let jsxDepth = 0
+  let inCodeBlock = false
+  // Track when we're inside an opening tag that spans multiple lines
+  // e.g. <Section\n  title="..."\n>
+  let inOpeningTag = false
+
+  for (const line of lines) {
+    const trimmed = line.trim()
+
+    // Track fenced code blocks — never strip inside them
+    if (trimmed.startsWith('```')) {
+      if (!inCodeBlock) {
+        inCodeBlock = true
+        if (jsxDepth === 0 && !inOpeningTag) {
+          result.push(line)
+        }
+        continue
+      } else {
+        inCodeBlock = false
+        if (jsxDepth === 0 && !inOpeningTag) {
+          result.push(line)
+        }
+        continue
+      }
+    }
+
+    if (inCodeBlock) {
+      if (jsxDepth === 0 && !inOpeningTag) {
+        result.push(line)
+      }
+      continue
+    }
+
+    // If we're inside a multi-line opening tag, wait for closing >
+    if (inOpeningTag) {
+      if (/\/>[ \t]*$/.test(trimmed)) {
+        // Self-closing end: <Section\n  ...\n/>
+        inOpeningTag = false
+        // Don't change depth — self-closing tag is net zero
+      } else if (/>[ \t]*$/.test(trimmed)) {
+        // Opening tag closed: now count it as +1
+        inOpeningTag = false
+        jsxDepth += 1
+      }
+      // Either way, skip this line (it's part of JSX)
+      continue
+    }
+
+    // Check if this line starts a JSX tag (opening or closing)
+    const jsxTagStart = /^[ \t]*<\/?([A-Z][a-zA-Z.]*|div|span)\b/.test(trimmed)
+
+    if (jsxDepth === 0 && !jsxTagStart) {
+      // Normal markdown line — keep it
+      result.push(line)
+      continue
+    }
+
+    // From here, either we're inside a JSX block (depth > 0) or this line starts JSX
+
+    // Check for multi-line opening tag: <Component with no > on this line
+    const isOpeningTagStart = /^[ \t]*<([A-Z][a-zA-Z.]*|div|span)\b/.test(trimmed)
+    if (isOpeningTagStart && !/>/.test(trimmed)) {
+      inOpeningTag = true
+      if (jsxDepth === 0) {
+        // Starting a new JSX block with multi-line tag
+      }
+      continue
+    }
+
+    // Self-closing tag on single line
+    if (isOpeningTagStart && /\/>[ \t]*$/.test(trimmed)) {
+      // Net zero depth change — just skip
+      continue
+    }
+
+    // Count depth changes from complete tags on this line
+    jsxDepth += countNetJsxDepth(trimmed)
+
+    if (jsxDepth <= 0) {
+      jsxDepth = 0
+    }
+  }
+
+  return result.join('\n')
+}
+
+/**
+ * Count the net JSX depth change for a line.
+ * Opening tags: +1, closing tags: -1, self-closing: 0.
+ */
+function countNetJsxDepth(line: string): number {
+  let depth = 0
+
+  // Opening tags: <Component or <div (NOT self-closing, NOT closing)
+  const openingTags = line.match(/<([A-Z][a-zA-Z.]*|div|span)\b[^>]*(?<!\/)>/g)
+  if (openingTags) {
+    depth += openingTags.length
+  }
+
+  // Closing tags: </Component> or </div>
+  const closingTags = line.match(/<\/([A-Z][a-zA-Z.]*|div|span)\b[^>]*>/g)
+  if (closingTags) {
+    depth -= closingTags.length
+  }
+
+  return depth
+}