Skip to content

Commit 40e7a22

Browse files
authored
feat(negotiate): RFC 7231 content negotiation with 406 support (#83)
1 parent 848cef9 commit 40e7a22

4 files changed

Lines changed: 266 additions & 44 deletions

File tree

packages/js/src/negotiate.ts

Lines changed: 104 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
export type ContentNegotiationResult = 'markdown' | 'html' | 'not-acceptable'
2+
13
interface AcceptEntry {
24
type: string
35
q: number
@@ -25,7 +27,6 @@ export function parseAcceptHeader(accept: string): AcceptEntry[] {
2527
}
2628
else {
2729
type = part.slice(0, semicolonIdx).trim()
28-
// Extract q value without regex for performance
2930
const paramStr = part.slice(semicolonIdx + 1)
3031
const qIdx = paramStr.indexOf('q=')
3132
if (qIdx !== -1) {
@@ -43,36 +44,49 @@ export function parseAcceptHeader(accept: string): AcceptEntry[] {
4344
}
4445

4546
/**
46-
* Determine if a client prefers markdown over HTML using proper content negotiation.
47-
*
48-
* Uses Accept header quality weights and position ordering:
49-
* - If text/markdown or text/plain has higher quality than text/html -> markdown
50-
* - If same quality, earlier position in Accept header wins
51-
* - Bare wildcard does NOT trigger markdown (prevents breaking OG crawlers)
52-
* - sec-fetch-dest: document always returns false (browser navigation)
47+
* Perform RFC 7231 content negotiation for HTML vs Markdown.
5348
*
54-
* @param acceptHeader - The HTTP Accept header value
55-
* @param secFetchDest - The Sec-Fetch-Dest header value
49+
* Resolution rules:
50+
* - `Sec-Fetch-Dest: document` always returns `'html'` (browser navigation).
51+
* - Missing or empty Accept header returns `'html'` (server picks default).
52+
* - q=0 entries are treated as explicit rejections and ignored for matching
53+
* (but still count towards "something was listed").
54+
* - `text/markdown` and `text/plain` are the markdown-capable types.
55+
* - `text/html` and `application/xhtml+xml` are the html-capable types.
56+
* - `*_/_*` and `text/*` are wildcards; they satisfy 406 but never on their
57+
* own tip negotiation towards markdown (preserves OG crawler behavior).
58+
* - If nothing in the Accept header can be served (no explicit match, no
59+
* wildcard), returns `'not-acceptable'` so the caller can send 406.
60+
* - Otherwise, compares best markdown entry vs best html-or-wildcard entry
61+
* by q, then by position.
5662
*/
57-
export function shouldServeMarkdown(acceptHeader?: string, secFetchDest?: string): boolean {
58-
if (secFetchDest === 'document') {
59-
return false
60-
}
63+
export function negotiateContent(acceptHeader?: string, secFetchDest?: string): ContentNegotiationResult {
64+
if (secFetchDest === 'document')
65+
return 'html'
6166

6267
const accept = acceptHeader || ''
6368
if (!accept)
64-
return false
69+
return 'html'
6570

66-
const parts = accept.split(',')
6771
let bestMdQ = -1
6872
let bestMdPos = -1
69-
let htmlQ = -1
70-
let htmlPos = -1
73+
let bestHtmlQ = -1
74+
let bestHtmlPos = -1
75+
let bestWildcardQ = -1
76+
let bestWildcardPos = -1
77+
let sawAnyEntry = false
78+
let sawAcceptable = false
79+
// Track explicit q=0 rejections so wildcard fallback can't resurrect them.
80+
let rejectedMd = false
81+
let rejectedHtml = false
7182

83+
const parts = accept.split(',')
7284
for (let i = 0; i < parts.length; i++) {
7385
const part = parts[i]!.trim()
7486
if (!part)
7587
continue
88+
sawAnyEntry = true
89+
7690
const semicolonIdx = part.indexOf(';')
7791
let type: string
7892
let q = 1
@@ -82,7 +96,15 @@ export function shouldServeMarkdown(acceptHeader?: string, secFetchDest?: string
8296
else {
8397
type = part.slice(0, semicolonIdx).trim()
8498
const paramStr = part.slice(semicolonIdx + 1)
85-
const qIdx = paramStr.indexOf('q=')
99+
// Find q= case-insensitively without allocating.
100+
let qIdx = -1
101+
for (let j = 0; j < paramStr.length - 1; j++) {
102+
const c = paramStr.charCodeAt(j)
103+
if ((c === 113 || c === 81) && paramStr.charCodeAt(j + 1) === 61 /* = */) {
104+
qIdx = j
105+
break
106+
}
107+
}
86108
if (qIdx !== -1) {
87109
const qStart = qIdx + 2
88110
let qEnd = qStart
@@ -93,26 +115,76 @@ export function shouldServeMarkdown(acceptHeader?: string, secFetchDest?: string
93115
}
94116
}
95117

96-
if (type === 'text/markdown' || type === 'text/plain') {
97-
if (q > bestMdQ || (q === bestMdQ && (bestMdPos === -1 || i < bestMdPos))) {
118+
// Normalize type for case-insensitive comparison (media types per RFC 7231).
119+
const normalized = type.toLowerCase()
120+
121+
if (normalized === 'text/markdown' || normalized === 'text/plain') {
122+
if (q === 0) {
123+
rejectedMd = true
124+
continue
125+
}
126+
sawAcceptable = true
127+
if (q > bestMdQ || (q === bestMdQ && bestMdPos === -1)) {
98128
bestMdQ = q
99129
bestMdPos = i
100130
}
101131
}
102-
else if (type === 'text/html') {
103-
htmlQ = q
104-
htmlPos = i
132+
else if (normalized === 'text/html' || normalized === 'application/xhtml+xml') {
133+
if (q === 0) {
134+
rejectedHtml = true
135+
continue
136+
}
137+
sawAcceptable = true
138+
if (q > bestHtmlQ || (q === bestHtmlQ && bestHtmlPos === -1)) {
139+
bestHtmlQ = q
140+
bestHtmlPos = i
141+
}
142+
}
143+
else if (normalized === '*/*' || normalized === 'text/*') {
144+
if (q === 0)
145+
continue
146+
sawAcceptable = true
147+
if (q > bestWildcardQ || (q === bestWildcardQ && bestWildcardPos === -1)) {
148+
bestWildcardQ = q
149+
bestWildcardPos = i
150+
}
105151
}
106152
}
107153

154+
if (sawAnyEntry && !sawAcceptable)
155+
return 'not-acceptable'
156+
157+
// Apply wildcard fallback only when the concrete type wasn't explicitly rejected.
158+
if (bestMdPos === -1 && !rejectedMd && bestWildcardPos !== -1) {
159+
bestMdQ = bestWildcardQ
160+
bestMdPos = bestWildcardPos
161+
}
162+
if (bestHtmlPos === -1 && !rejectedHtml && bestWildcardPos !== -1) {
163+
bestHtmlQ = bestWildcardQ
164+
bestHtmlPos = bestWildcardPos
165+
}
166+
167+
// Both concrete types were explicitly rejected (q=0) and only a wildcard
168+
// remained. The wildcard satisfied `sawAcceptable`, but we literally cannot
169+
// serve anything the client didn't veto, so 406 is the honest answer.
170+
if (bestMdPos === -1 && bestHtmlPos === -1)
171+
return 'not-acceptable'
108172
if (bestMdPos === -1)
109-
return false
110-
if (htmlPos === -1)
111-
return true
112-
if (bestMdQ > htmlQ)
113-
return true
114-
if (bestMdQ === htmlQ && bestMdPos < htmlPos)
115-
return true
173+
return 'html'
174+
if (bestHtmlPos === -1)
175+
return 'markdown'
176+
if (bestMdQ > bestHtmlQ)
177+
return 'markdown'
178+
if (bestMdQ === bestHtmlQ && bestMdPos < bestHtmlPos)
179+
return 'markdown'
180+
return 'html'
181+
}
116182

117-
return false
183+
/**
184+
* Determine if a client prefers markdown over HTML. Convenience wrapper over
185+
* {@link negotiateContent}; treats `'not-acceptable'` the same as `'html'`
186+
* (callers that want 406 semantics should use `negotiateContent` directly).
187+
*/
188+
export function shouldServeMarkdown(acceptHeader?: string, secFetchDest?: string): boolean {
189+
return negotiateContent(acceptHeader, secFetchDest) === 'markdown'
118190
}

packages/js/test/unit/negotiate.test.ts

Lines changed: 78 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import { describe, expect, it } from 'vitest'
2-
import { parseAcceptHeader, shouldServeMarkdown } from '../../src/negotiate'
2+
import { negotiateContent, parseAcceptHeader, shouldServeMarkdown } from '../../src/negotiate'
33

44
describe('parseAcceptHeader', () => {
55
it('returns empty array for empty string', () => {
@@ -135,3 +135,80 @@ describe('shouldServeMarkdown', () => {
135135
expect(shouldServeMarkdown('text/markdown', undefined)).toBe(true)
136136
})
137137
})
138+
139+
describe('negotiateContent', () => {
140+
it('returns html for sec-fetch-dest: document regardless of Accept', () => {
141+
expect(negotiateContent('text/markdown', 'document')).toBe('html')
142+
expect(negotiateContent('*/*', 'document')).toBe('html')
143+
})
144+
145+
it('returns html for missing Accept header', () => {
146+
expect(negotiateContent()).toBe('html')
147+
expect(negotiateContent('')).toBe('html')
148+
})
149+
150+
it('returns html for standard browser Accept header', () => {
151+
expect(negotiateContent('text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8')).toBe('html')
152+
})
153+
154+
it('returns html for bare wildcard', () => {
155+
expect(negotiateContent('*/*')).toBe('html')
156+
})
157+
158+
it('returns markdown for text/markdown only', () => {
159+
expect(negotiateContent('text/markdown')).toBe('markdown')
160+
})
161+
162+
it('returns markdown when text/markdown beats text/html by quality', () => {
163+
expect(negotiateContent('text/markdown, text/html;q=0.9, */*;q=0.1')).toBe('markdown')
164+
})
165+
166+
it('returns html when text/html beats text/markdown by quality', () => {
167+
expect(negotiateContent('text/markdown;q=0.5, text/html;q=1.0')).toBe('html')
168+
})
169+
170+
it('returns markdown when text/plain has higher q than text/html', () => {
171+
expect(negotiateContent('text/html;q=0.5, text/plain;q=0.9')).toBe('markdown')
172+
})
173+
174+
it('returns not-acceptable when Accept lists only unsupported types', () => {
175+
expect(negotiateContent('application/x-content-negotiation-probe')).toBe('not-acceptable')
176+
expect(negotiateContent('application/json')).toBe('not-acceptable')
177+
expect(negotiateContent('application/pdf, application/json')).toBe('not-acceptable')
178+
})
179+
180+
it('returns html when wildcard is present alongside unsupported types', () => {
181+
expect(negotiateContent('application/json, */*')).toBe('html')
182+
})
183+
184+
it('treats q=0 as rejection', () => {
185+
expect(negotiateContent('text/html;q=0, text/markdown')).toBe('markdown')
186+
expect(negotiateContent('text/markdown;q=0, text/html')).toBe('html')
187+
expect(negotiateContent('text/html;q=0, application/json;q=0')).toBe('not-acceptable')
188+
})
189+
190+
it('does not let wildcard resurrect explicitly rejected types', () => {
191+
// text/html was rejected, so */* can only satisfy markdown.
192+
expect(negotiateContent('text/html;q=0, */*;q=1, text/markdown;q=0.5')).toBe('markdown')
193+
// text/markdown was rejected, so */* can only satisfy html.
194+
expect(negotiateContent('text/markdown;q=0, */*;q=1, text/html;q=0.5')).toBe('html')
195+
// Both explicitly rejected; wildcard cannot satisfy anything we can serve.
196+
expect(negotiateContent('text/markdown;q=0, text/html;q=0, */*;q=1')).toBe('not-acceptable')
197+
})
198+
199+
it('normalizes media types case-insensitively', () => {
200+
expect(negotiateContent('Text/Markdown')).toBe('markdown')
201+
expect(negotiateContent('TEXT/HTML')).toBe('html')
202+
expect(negotiateContent('Text/Markdown;Q=1')).toBe('markdown')
203+
expect(negotiateContent('Application/XHTML+XML')).toBe('html')
204+
})
205+
206+
it('accepts application/xhtml+xml as html-capable', () => {
207+
expect(negotiateContent('application/xhtml+xml')).toBe('html')
208+
})
209+
210+
it('respects text/* wildcard as html fallback', () => {
211+
expect(negotiateContent('text/*')).toBe('html')
212+
expect(negotiateContent('text/markdown, text/*')).toBe('markdown')
213+
})
214+
})

packages/nuxt/src/runtime/server/middleware/mdream.ts

Lines changed: 28 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,16 @@ import type { H3Event } from 'h3'
22
import type { MdreamOptions } from 'mdream'
33
import type { MdreamMarkdownContext, MdreamNegotiateContext, ModuleRuntimeConfig } from '../../types.js'
44
import { withSiteUrl } from '#site-config/server/composables/utils'
5-
import { shouldServeMarkdown as _shouldServeMarkdown } from '@mdream/js/negotiate'
5+
import { negotiateContent } from '@mdream/js/negotiate'
66
import { consola } from 'consola'
7-
import { createError, defineEventHandler, getHeader, setHeader } from 'h3'
7+
import { appendHeader, createError, defineEventHandler, getHeader, setHeader } from 'h3'
88
import { htmlToMarkdown } from 'mdream'
99
import { useNitroApp, useRuntimeConfig } from 'nitropack/runtime'
1010

1111
const logger = consola.withTag('nuxt-mdream')
1212

13-
function shouldServeMarkdown(event: H3Event): boolean {
14-
return _shouldServeMarkdown(
13+
function negotiate(event: H3Event) {
14+
return negotiateContent(
1515
getHeader(event, 'accept'),
1616
getHeader(event, 'sec-fetch-dest'),
1717
)
@@ -76,16 +76,26 @@ export default defineEventHandler(async (event) => {
7676

7777
// Check if we should serve markdown based on Accept header or .md extension
7878
const hasMarkdownExtension = path.endsWith('.md')
79-
let clientPrefersMarkdown = shouldServeMarkdown(event)
79+
const negotiation = negotiate(event)
80+
81+
// Advertise that the response varies by these request headers so caches
82+
// don't collapse markdown and html responses together.
83+
appendHeader(event, 'Vary', 'Accept, Sec-Fetch-Dest')
84+
85+
let clientPrefersMarkdown = negotiation === 'markdown'
8086

8187
// Allow users to override the negotiate decision via hook
8288
const nitroApp = useNitroApp()
8389
const negotiateContext: MdreamNegotiateContext = { event, shouldServe: clientPrefersMarkdown }
8490
await nitroApp.hooks.callHook('mdream:negotiate', negotiateContext)
8591
clientPrefersMarkdown = negotiateContext.shouldServe
8692

87-
// Early exit: skip if not requesting .md and client doesn't prefer markdown
88-
if (!hasMarkdownExtension && !clientPrefersMarkdown) {
93+
// Early exit: skip if not requesting .md and client doesn't prefer markdown.
94+
// We defer the 406 decision until after fetching downstream, because this
95+
// middleware runs for every extensionless route and we can't 406 a JSON-only
96+
// endpoint like /health just because Accept didn't list text/*.
97+
const wantsNotAcceptable = !hasMarkdownExtension && !clientPrefersMarkdown && negotiation === 'not-acceptable'
98+
if (!hasMarkdownExtension && !clientPrefersMarkdown && !wantsNotAcceptable) {
8999
return
90100
}
91101

@@ -127,9 +137,20 @@ export default defineEventHandler(async (event) => {
127137
message: `Expected text/html but got ${contentType} for ${path}`,
128138
})
129139
}
140+
// Not an HTML route, fall through so the non-HTML response is served
130141
return
131142
}
132143

144+
// We now know the route serves HTML. If the client's Accept header listed
145+
// nothing we can serve, this is a genuine 406.
146+
if (wantsNotAcceptable) {
147+
return createError({
148+
statusCode: 406,
149+
statusMessage: 'Not Acceptable',
150+
message: 'This resource can be served as text/html or text/markdown.',
151+
})
152+
}
153+
133154
html = response._data as string
134155
}
135156
catch (e) {

0 commit comments

Comments
 (0)