Skip to content

Commit 8038ef2

Browse files
committed
fix(sec): embed-block の ReDoS 脆弱性を修正
- 正規表現ベースの allowlist を DOM パーサーベースに変更 - EmbedAllowlistEntry 型を導入(host + pathPrefix) - 複数 iframe を禁止(1個のみ許可) - JSDOM の生成を1回に統合 - パス境界チェックを追加(/maps/embedX を拒否)
1 parent 5ea375a commit 8038ef2

File tree

3 files changed

+463
-134
lines changed

3 files changed

+463
-134
lines changed

packages/render/src/elements/embed-block.ts

Lines changed: 127 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -32,41 +32,39 @@ const BOOLEAN_ATTRIBUTES = [
3232
];
3333

3434
/**
35-
* Default allowlist patterns for embed content (ported from Wikidot's default.php)
36-
* Only content matching these patterns will be rendered.
37-
*
38-
* Security: The 'anyiframe' pattern is kept for Wikidot compatibility, but
39-
* hasDangerousIframeAttributes() blocks dangerous attributes like srcdoc and
40-
* non-https src URLs. hasDangerousScripts() blocks all script tags.
35+
* Allowlist entry for embed content validation
36+
* Each entry specifies a host pattern and optional path prefix
4137
*/
42-
export const DEFAULT_EMBED_ALLOWLIST: RegExp[] = [
43-
// Any iframe with standard attributes (Wikidot's 'anyiframe' pattern)
44-
// Note: Dangerous attributes are blocked separately by hasDangerousIframeAttributes()
45-
/^<iframe(\s+[a-z0-9_]+\s*=\s*"[^"]*")+>\s*<\/iframe>$/is,
46-
47-
// YouTube embed
48-
/^<iframe[^>]*\s+src="https?:\/\/(www\.)?youtube\.com\/embed\/[a-zA-Z0-9_-]+"[^>]*>\s*<\/iframe>$/is,
49-
/^<iframe[^>]*\s+src="https?:\/\/(www\.)?youtube-nocookie\.com\/embed\/[a-zA-Z0-9_-]+"[^>]*>\s*<\/iframe>$/is,
50-
51-
// Vimeo embed
52-
/^<iframe[^>]*\s+src="https?:\/\/player\.vimeo\.com\/video\/[0-9]+"[^>]*>\s*<\/iframe>$/is,
38+
export interface EmbedAllowlistEntry {
39+
/** Host pattern. Supports wildcard prefix '*.' (e.g., '*.youtube.com') */
40+
host: string;
41+
/** Optional path prefix that must match (e.g., '/embed/') */
42+
pathPrefix?: string;
43+
}
5344

45+
/**
46+
* Default allowlist for embed content (ported from Wikidot's default.php)
47+
* Only iframes with src matching these host+path patterns will be rendered.
48+
*
49+
* Note: Set to null to allow any HTTPS iframe (Wikidot's 'anyiframe' behavior).
50+
* DOMPurify still enforces HTTPS-only and blocks dangerous attributes.
51+
*/
52+
export const DEFAULT_EMBED_ALLOWLIST: EmbedAllowlistEntry[] | null = [
53+
// YouTube
54+
{ host: "*.youtube.com", pathPrefix: "/embed/" },
55+
{ host: "*.youtube-nocookie.com", pathPrefix: "/embed/" },
56+
// Vimeo
57+
{ host: "player.vimeo.com", pathPrefix: "/video/" },
5458
// Google Maps
55-
/^<iframe[^>]*\s+src="https?:\/\/www\.google\.com\/maps\/embed[^"]*"[^>]*>\s*<\/iframe>$/is,
56-
59+
{ host: "*.google.com", pathPrefix: "/maps/embed" },
5760
// Google Calendar
58-
/^<iframe[^>]*\s+src="https?:\/\/calendar\.google\.com\/calendar\/embed[^"]*"[^>]*>\s*<\/iframe>$/is,
59-
61+
{ host: "calendar.google.com", pathPrefix: "/calendar/embed" },
6062
// Spotify
61-
/^<iframe[^>]*\s+src="https?:\/\/open\.spotify\.com\/embed\/[^"]*"[^>]*>\s*<\/iframe>$/is,
62-
63+
{ host: "open.spotify.com", pathPrefix: "/embed/" },
6364
// SoundCloud
64-
/^<iframe[^>]*\s+src="https?:\/\/w\.soundcloud\.com\/player\/[^"]*"[^>]*>\s*<\/iframe>$/is,
65-
66-
// Note: Twitter/X embed pattern removed due to XSS risks with blockquote content injection
67-
65+
{ host: "w.soundcloud.com", pathPrefix: "/player/" },
6866
// CodePen
69-
/^<iframe[^>]*\s+src="https?:\/\/codepen\.io\/[^"]*"[^>]*>\s*<\/iframe>$/is,
67+
{ host: "codepen.io" },
7068
];
7169

7270
// Initialize DOMPurify with jsdom
@@ -97,39 +95,111 @@ const DOMPURIFY_CONFIG: Config = {
9795
};
9896

9997
/**
100-
* Sanitize embed content using DOMPurify
101-
* Returns null if content is completely removed or src is missing (dangerous content)
98+
* Check if a hostname matches an allowlist entry
99+
* Supports wildcard prefix with '*.' (e.g., '*.youtube.com' matches 'www.youtube.com')
102100
*/
103-
function sanitizeEmbed(content: string): string | null {
101+
function matchesHostPattern(hostname: string, pattern: string): boolean {
102+
const lowerHostname = hostname.toLowerCase();
103+
const lowerPattern = pattern.toLowerCase();
104+
105+
if (lowerPattern.startsWith("*.")) {
106+
// Wildcard match: *.example.com matches example.com and sub.example.com
107+
// But not evil-example.com (must be exact or have dot boundary)
108+
const base = lowerPattern.slice(2); // Remove '*.'
109+
return lowerHostname === base || lowerHostname.endsWith("." + base);
110+
}
111+
// Exact match
112+
return lowerHostname === lowerPattern;
113+
}
114+
115+
/**
116+
* Check if URL matches an allowlist entry (host and optional path prefix)
117+
* Path prefix must match at a boundary (followed by /, ?, #, or end of path)
118+
*/
119+
function matchesAllowlistEntry(url: URL, entry: EmbedAllowlistEntry): boolean {
120+
if (!matchesHostPattern(url.hostname, entry.host)) {
121+
return false;
122+
}
123+
if (entry.pathPrefix) {
124+
const pathLower = url.pathname.toLowerCase();
125+
const prefixLower = entry.pathPrefix.toLowerCase();
126+
if (!pathLower.startsWith(prefixLower)) {
127+
return false;
128+
}
129+
// If prefix ends with /, boundary check is already satisfied
130+
// Otherwise ensure prefix matches at a boundary (not partial, e.g., /embed vs /embedX)
131+
if (!prefixLower.endsWith("/")) {
132+
const remainder = pathLower.slice(prefixLower.length);
133+
if (remainder && !/^[/?#]/.test(remainder)) {
134+
return false;
135+
}
136+
}
137+
}
138+
return true;
139+
}
140+
141+
/**
142+
* Validate and sanitize embed content
143+
* Returns sanitized HTML string or null if content is invalid/dangerous
144+
*
145+
* Validation rules:
146+
* - Content must contain exactly one iframe element
147+
* - iframe must have a valid HTTPS src URL
148+
* - src URL must match the allowlist (host + path prefix)
149+
* - DOMPurify removes dangerous attributes
150+
*/
151+
function validateAndSanitizeEmbed(
152+
content: string,
153+
allowlist: EmbedAllowlistEntry[] | null,
154+
): string | null {
155+
// First, sanitize with DOMPurify to remove dangerous content
104156
const sanitized = purify.sanitize(content.trim(), {
105157
...DOMPURIFY_CONFIG,
106158
RETURN_TRUSTED_TYPE: false,
107159
}) as string;
108-
// If DOMPurify removed everything, the content was dangerous
160+
109161
if (!sanitized.trim()) {
110162
return null;
111163
}
112-
// If iframe exists but has no valid src (empty or removed), reject it
113-
if (/<iframe[^>]*>/i.test(sanitized)) {
114-
const srcMatch = sanitized.match(/\s+src\s*=\s*["']([^"']*)["']/i);
115-
if (!srcMatch || !srcMatch[1]) {
116-
return null;
117-
}
164+
165+
// Parse sanitized content once (avoid multiple JSDOM instances)
166+
const dom = new JSDOM(sanitized);
167+
const iframes = dom.window.document.querySelectorAll("iframe");
168+
169+
// Must have exactly one iframe
170+
if (iframes.length !== 1) {
171+
return null;
118172
}
119-
return sanitized;
120-
}
121173

122-
/**
123-
* Validate embed content against allowlist (pattern-based pre-check)
124-
*/
125-
function matchesAllowlist(content: string, allowlist: RegExp[]): boolean {
126-
const trimmed = content.trim();
127-
for (const pattern of allowlist) {
128-
if (pattern.test(trimmed)) {
129-
return true;
174+
const iframe = iframes[0]!;
175+
const src = iframe.getAttribute("src")?.trim();
176+
if (!src) {
177+
return null;
178+
}
179+
180+
// Parse URL
181+
let url: URL;
182+
try {
183+
url = new URL(src);
184+
} catch {
185+
return null;
186+
}
187+
188+
// Only allow HTTPS
189+
if (url.protocol !== "https:") {
190+
return null;
191+
}
192+
193+
// If allowlist is null, allow any HTTPS iframe (Wikidot's 'anyiframe' behavior)
194+
if (allowlist !== null) {
195+
// Check if URL matches any allowlist entry
196+
const matched = allowlist.some((entry) => matchesAllowlistEntry(url, entry));
197+
if (!matched) {
198+
return null;
130199
}
131200
}
132-
return false;
201+
202+
return sanitized;
133203
}
134204

135205
/**
@@ -153,23 +223,17 @@ function normalizeBooleanAttributes(html: string): string {
153223
/**
154224
* Render embed-block element (Wikidot style [[embed]]..[[/embed]])
155225
*
156-
* Content is validated in two stages:
157-
* 1. Pattern-based allowlist check (for Wikidot compatibility)
158-
* 2. DOMPurify sanitization (for XSS protection)
159-
*
160-
* Both stages must pass for content to be rendered.
226+
* Content is validated in a single pass:
227+
* 1. DOMPurify sanitization (removes dangerous attributes)
228+
* 2. Single iframe requirement check
229+
* 3. HTTPS-only and allowlist (host + path) validation
161230
*/
162231
export function renderEmbedBlock(ctx: RenderContext, data: EmbedBlockData): void {
163-
const allowlist = ctx.options.embedAllowlist ?? DEFAULT_EMBED_ALLOWLIST;
164-
165-
// Stage 1: Pattern-based allowlist check
166-
if (!matchesAllowlist(data.contents, allowlist)) {
167-
ctx.push('<div class="error-block">Sorry, no match for the embedded content.</div>');
168-
return;
169-
}
232+
// Use explicit undefined check to allow null (anyiframe mode)
233+
const allowlist =
234+
ctx.options.embedAllowlist !== undefined ? ctx.options.embedAllowlist : DEFAULT_EMBED_ALLOWLIST;
170235

171-
// Stage 2: DOMPurify sanitization (defense in depth)
172-
const sanitized = sanitizeEmbed(data.contents);
236+
const sanitized = validateAndSanitizeEmbed(data.contents, allowlist);
173237
if (sanitized === null) {
174238
ctx.push('<div class="error-block">Sorry, no match for the embedded content.</div>');
175239
return;

packages/render/src/types.ts

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import type { Element } from "@wdprlib/ast";
2+
import type { EmbedAllowlistEntry } from "./elements/embed-block";
23

34
/**
45
* Page context for resolving links, images, etc.
@@ -73,10 +74,11 @@ export interface RenderOptions {
7374
*/
7475
htmlBlockSandbox?: string | null;
7576
/**
76-
* Allowlist patterns for [[embed]] content.
77-
* Only content matching at least one pattern will be rendered.
78-
* If not provided, uses default allowlist (YouTube, Vimeo, etc.).
79-
* Set to empty array to block all embeds.
77+
* Allowlist for [[embed]] content with host and optional path validation.
78+
* - undefined: Uses default allowlist (YouTube, Vimeo, etc. with path restrictions)
79+
* - EmbedAllowlistEntry[]: Custom allowlist with host patterns and optional path prefixes
80+
* - []: Block all embeds
81+
* - null: Allow any HTTPS iframe (Wikidot's 'anyiframe' behavior)
8082
*/
81-
embedAllowlist?: RegExp[];
83+
embedAllowlist?: EmbedAllowlistEntry[] | null;
8284
}

0 commit comments

Comments
 (0)