Skip to content

Commit dc4b585

Browse files
committed
feat: [[embed]]ブロックを実装
Wikidot形式の[[embed]]..[[/embed]]ブロックをサポート。 - embed-block要素をASTに追加(paragraph-safe) - パーサーにembed-blockルールを追加 - レンダラーで許可リストによるXSS対策を実装 - RenderOptions.embedAllowlistでカスタム許可リストを設定可能 - DEFAULT_EMBED_ALLOWLISTをエクスポート
1 parent 9142f0d commit dc4b585

File tree

10 files changed

+304
-0
lines changed

10 files changed

+304
-0
lines changed

packages/ast/src/element.ts

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -425,6 +425,15 @@ export interface HtmlData {
425425
contents: string;
426426
}
427427

428+
/**
429+
* Embed block data (Wikidot style [[embed]]..[[/embed]])
430+
* Contains raw HTML that is validated against an allowlist at render time.
431+
* Unlike html element, embed-block is paragraph-safe.
432+
*/
433+
export interface EmbedBlockData {
434+
contents: string;
435+
}
436+
428437
export interface IframeData {
429438
url: string;
430439
attributes: AttributeMap;
@@ -505,6 +514,7 @@ export type ElementDataMap = {
505514
"math-inline": MathInlineData;
506515
"equation-reference": string;
507516
embed: Embed;
517+
"embed-block": EmbedBlockData;
508518
html: HtmlData;
509519
iframe: IframeData;
510520
include: IncludeData;
@@ -784,6 +794,8 @@ export function isParagraphSafe(element: Element): boolean {
784794
return true;
785795
case "embed":
786796
return false;
797+
case "embed-block":
798+
return true;
787799
case "html":
788800
case "iframe":
789801
return false;
Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
import type { Element } from "@wdprlib/ast";
2+
import type { BlockRule, ParseContext, RuleResult } from "../types";
3+
import { currentToken } from "../types";
4+
import { parseBlockName } from "./utils";
5+
6+
/**
7+
* Embed block rule: [[embed]]..[[/embed]], [[embedvideo]]..[[/embedvideo]], [[embedaudio]]..[[/embedaudio]]
8+
*
9+
* Wikidotでは許可リストにマッチしたHTMLのみ出力されるが、
10+
* このパーサーでは内容をそのままhtml要素として保持する。
11+
* バリデーションはレンダリング時またはサーバー側で行う想定。
12+
*/
13+
export const embedBlockRule: BlockRule = {
14+
name: "embed-block",
15+
startTokens: ["BLOCK_OPEN"],
16+
requiresLineStart: false,
17+
18+
parse(ctx: ParseContext): RuleResult<Element> {
19+
const openToken = currentToken(ctx);
20+
if (openToken.type !== "BLOCK_OPEN") {
21+
return { success: false };
22+
}
23+
24+
let pos = ctx.pos + 1;
25+
let consumed = 1;
26+
27+
// Parse block name
28+
const nameResult = parseBlockName(ctx, pos);
29+
if (!nameResult) {
30+
return { success: false };
31+
}
32+
33+
const blockName = nameResult.name.toLowerCase();
34+
if (blockName !== "embed" && blockName !== "embedvideo" && blockName !== "embedaudio") {
35+
return { success: false };
36+
}
37+
pos += nameResult.consumed;
38+
consumed += nameResult.consumed;
39+
40+
// Skip whitespace
41+
while (ctx.tokens[pos]?.type === "WHITESPACE") {
42+
pos++;
43+
consumed++;
44+
}
45+
46+
// Expect ]]
47+
if (ctx.tokens[pos]?.type !== "BLOCK_CLOSE") {
48+
return { success: false };
49+
}
50+
pos++;
51+
consumed++;
52+
53+
// Collect content until [[/embed]], [[/embedvideo]], or [[/embedaudio]]
54+
let contents = "";
55+
56+
while (pos < ctx.tokens.length) {
57+
const token = ctx.tokens[pos];
58+
if (!token) break;
59+
60+
// Check for closing [[/embed*]]
61+
if (token.type === "BLOCK_END_OPEN") {
62+
const closeNameResult = parseBlockName(ctx, pos + 1);
63+
if (closeNameResult) {
64+
const closeName = closeNameResult.name.toLowerCase();
65+
if (closeName === "embed" || closeName === "embedvideo" || closeName === "embedaudio") {
66+
break;
67+
}
68+
}
69+
}
70+
71+
contents += token.value;
72+
pos++;
73+
consumed++;
74+
}
75+
76+
// Consume [[/embed*]]
77+
if (ctx.tokens[pos]?.type === "BLOCK_END_OPEN") {
78+
pos++;
79+
consumed++;
80+
const closeNameResult = parseBlockName(ctx, pos);
81+
if (closeNameResult) {
82+
pos += closeNameResult.consumed;
83+
consumed += closeNameResult.consumed;
84+
}
85+
if (ctx.tokens[pos]?.type === "BLOCK_CLOSE") {
86+
pos++;
87+
consumed++;
88+
}
89+
if (ctx.tokens[pos]?.type === "NEWLINE") {
90+
pos++;
91+
consumed++;
92+
}
93+
}
94+
95+
// Trim the contents
96+
contents = contents.trim();
97+
98+
// Wikidotと同じく、embed-blockをparagraphで囲む
99+
return {
100+
success: true,
101+
elements: [
102+
{
103+
element: "container",
104+
data: {
105+
type: "paragraph",
106+
attributes: {},
107+
elements: [
108+
{
109+
element: "embed-block",
110+
data: {
111+
contents,
112+
},
113+
},
114+
],
115+
},
116+
},
117+
],
118+
consumed,
119+
};
120+
},
121+
};

packages/parser/src/parser/rules/block/index.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import { tabviewRule } from "./tabview";
2222
import { includeRule } from "./include";
2323
import { mathBlockRule } from "./math";
2424
import { htmlBlockRule } from "./html";
25+
import { embedBlockRule } from "./embed-block";
2526
import { iframeRule } from "./iframe";
2627
import { iftagsRule } from "./iftags";
2728
import { tocRule } from "./toc";
@@ -49,6 +50,7 @@ export { tabviewRule } from "./tabview";
4950
export { includeRule } from "./include";
5051
export { mathBlockRule } from "./math";
5152
export { htmlBlockRule } from "./html";
53+
export { embedBlockRule } from "./embed-block";
5254
export { iframeRule } from "./iframe";
5355
export { iftagsRule } from "./iftags";
5456
export { tocRule } from "./toc";
@@ -80,6 +82,7 @@ export const blockRules: BlockRule[] = [
8082
includeRule,
8183
mathBlockRule,
8284
htmlBlockRule,
85+
embedBlockRule,
8386
iframeRule,
8487
iftagsRule,
8588
divRule,
Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
import type { EmbedBlockData } from "@wdprlib/ast";
2+
import type { RenderContext } from "../context";
3+
4+
/**
5+
* Boolean attributes that should be normalized to attr="attr" format
6+
* (Wikidot normalizes these attributes in its output)
7+
*/
8+
const BOOLEAN_ATTRIBUTES = [
9+
"allowfullscreen",
10+
"async",
11+
"autofocus",
12+
"autoplay",
13+
"checked",
14+
"controls",
15+
"default",
16+
"defer",
17+
"disabled",
18+
"formnovalidate",
19+
"hidden",
20+
"ismap",
21+
"loop",
22+
"multiple",
23+
"muted",
24+
"novalidate",
25+
"open",
26+
"readonly",
27+
"required",
28+
"reversed",
29+
"selected",
30+
];
31+
32+
/**
33+
* Default allowlist patterns for embed content (ported from Wikidot's default.php)
34+
* Only content matching these patterns will be rendered.
35+
*/
36+
export const DEFAULT_EMBED_ALLOWLIST: RegExp[] = [
37+
// Any iframe with standard attributes (Wikidot's 'anyiframe' pattern)
38+
/^<iframe(\s+[a-z0-9_]+\s*=\s*"[^"]*")+>\s*<\/iframe>$/is,
39+
40+
// YouTube embed
41+
/^<iframe[^>]*\s+src="https?:\/\/(www\.)?youtube\.com\/embed\/[a-zA-Z0-9_-]+"[^>]*>\s*<\/iframe>$/is,
42+
/^<iframe[^>]*\s+src="https?:\/\/(www\.)?youtube-nocookie\.com\/embed\/[a-zA-Z0-9_-]+"[^>]*>\s*<\/iframe>$/is,
43+
44+
// Vimeo embed
45+
/^<iframe[^>]*\s+src="https?:\/\/player\.vimeo\.com\/video\/[0-9]+"[^>]*>\s*<\/iframe>$/is,
46+
47+
// Google Maps
48+
/^<iframe[^>]*\s+src="https?:\/\/www\.google\.com\/maps\/embed[^"]*"[^>]*>\s*<\/iframe>$/is,
49+
50+
// Google Calendar
51+
/^<iframe[^>]*\s+src="https?:\/\/calendar\.google\.com\/calendar\/embed[^"]*"[^>]*>\s*<\/iframe>$/is,
52+
53+
// Spotify
54+
/^<iframe[^>]*\s+src="https?:\/\/open\.spotify\.com\/embed\/[^"]*"[^>]*>\s*<\/iframe>$/is,
55+
56+
// SoundCloud
57+
/^<iframe[^>]*\s+src="https?:\/\/w\.soundcloud\.com\/player\/[^"]*"[^>]*>\s*<\/iframe>$/is,
58+
59+
// Twitter/X embed
60+
/^<blockquote[^>]*class="twitter-tweet"[^>]*>[\s\S]*<\/blockquote>\s*<script[^>]*src="https?:\/\/platform\.twitter\.com\/widgets\.js"[^>]*>\s*<\/script>$/is,
61+
62+
// CodePen
63+
/^<iframe[^>]*\s+src="https?:\/\/codepen\.io\/[^"]*"[^>]*>\s*<\/iframe>$/is,
64+
];
65+
66+
/**
67+
* Check if JS event handlers are present in the content (XSS prevention)
68+
*/
69+
function hasJsEventHandlers(content: string): boolean {
70+
// Match on* event handlers (onclick, onerror, onload, etc.)
71+
return /<[^>]*\s+on[a-z]+\s*=/i.test(content);
72+
}
73+
74+
/**
75+
* Validate embed content against allowlist
76+
*/
77+
function isAllowedEmbed(content: string, allowlist: RegExp[]): boolean {
78+
const trimmed = content.trim();
79+
80+
// Check for JS event handlers
81+
if (hasJsEventHandlers(trimmed)) {
82+
return false;
83+
}
84+
85+
// Check against allowlist patterns
86+
for (const pattern of allowlist) {
87+
if (pattern.test(trimmed)) {
88+
return true;
89+
}
90+
}
91+
92+
return false;
93+
}
94+
95+
/**
96+
* Normalize boolean attributes to Wikidot format (attr -> attr="attr")
97+
*/
98+
function normalizeBooleanAttributes(html: string): string {
99+
let result = html;
100+
for (const attr of BOOLEAN_ATTRIBUTES) {
101+
// Match standalone boolean attribute (not already having a value)
102+
// Pattern: attr followed by whitespace, > or />
103+
const pattern = new RegExp(`\\s${attr}(?=\\s|>|/>)`, "gi");
104+
result = result.replace(pattern, ` ${attr}="${attr}"`);
105+
}
106+
return result;
107+
}
108+
109+
/**
110+
* Render embed-block element (Wikidot style [[embed]]..[[/embed]])
111+
*
112+
* Content is validated against an allowlist to prevent XSS.
113+
* Only matching content is rendered; otherwise an error message is shown.
114+
*/
115+
export function renderEmbedBlock(ctx: RenderContext, data: EmbedBlockData): void {
116+
const allowlist = ctx.options.embedAllowlist ?? DEFAULT_EMBED_ALLOWLIST;
117+
118+
if (!isAllowedEmbed(data.contents, allowlist)) {
119+
ctx.push('<div class="error-block">Sorry, no match for the embedded content.</div>');
120+
return;
121+
}
122+
123+
// Normalize boolean attributes and output
124+
const normalized = normalizeBooleanAttributes(data.contents);
125+
ctx.push(normalized);
126+
}

packages/render/src/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
export { renderToHtml } from "./render";
22
export type { RenderOptions, RenderResolvers, PageContext, ResolvedUser } from "./types";
3+
export { DEFAULT_EMBED_ALLOWLIST } from "./elements/embed-block";

packages/render/src/render.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import { renderFootnoteRef, renderFootnoteBlock } from "./elements/footnote";
1515
import { renderMath, renderMathInline, renderEquationRef } from "./elements/math";
1616
import { renderModule } from "./elements/module/index";
1717
import { renderEmbed } from "./elements/embed";
18+
import { renderEmbedBlock } from "./elements/embed-block";
1819
import { renderUser } from "./elements/user";
1920
import { renderBibliographyCite, renderBibliographyBlock } from "./elements/bibliography";
2021
import { renderTableOfContents } from "./elements/toc";
@@ -134,6 +135,9 @@ export function renderElement(ctx: RenderContext, element: Element): void {
134135
case "embed":
135136
renderEmbed(ctx, element.data);
136137
break;
138+
case "embed-block":
139+
renderEmbedBlock(ctx, element.data);
140+
break;
137141
case "user":
138142
renderUser(ctx, element.data);
139143
break;

packages/render/src/types.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,4 +68,11 @@ export interface RenderOptions {
6868
* - Allow scripts: htmlBlockSandbox: "allow-scripts allow-same-origin"
6969
*/
7070
htmlBlockSandbox?: string | null;
71+
/**
72+
* Allowlist patterns for [[embed]] content.
73+
* Only content matching at least one pattern will be rendered.
74+
* If not provided, uses default allowlist (YouTube, Vimeo, etc.).
75+
* Set to empty array to block all embeds.
76+
*/
77+
embedAllowlist?: RegExp[];
7178
}
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
{
2+
"elements": [
3+
{
4+
"element": "container",
5+
"data": {
6+
"type": "paragraph",
7+
"attributes": {},
8+
"elements": [
9+
{
10+
"element": "embed-block",
11+
"data": {
12+
"contents": "<iframe width=\"1208\" height=\"680\" src=\"https://www.youtube.com/embed/dQw4w9WgXcQ\" title=\"Rick Astley - Never Gonna Give You Up (Official Video) (4K Remaster)\" frameborder=\"0\" allow=\"accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share\" referrerpolicy=\"strict-origin-when-cross-origin\" allowfullscreen></iframe>"
13+
}
14+
}
15+
]
16+
}
17+
},
18+
{
19+
"element": "footnote-block",
20+
"data": {
21+
"title": null,
22+
"hide": false
23+
}
24+
}
25+
]
26+
}
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
[[embed]]
2+
<iframe width="1208" height="680" src="https://www.youtube.com/embed/dQw4w9WgXcQ" title="Rick Astley - Never Gonna Give You Up (Official Video) (4K Remaster)" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share" referrerpolicy="strict-origin-when-cross-origin" allowfullscreen></iframe>
3+
[[/embed]]
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
<p><iframe width="1208" height="680" src="https://www.youtube.com/embed/dQw4w9WgXcQ" title="Rick Astley - Never Gonna Give You Up (Official Video) (4K Remaster)" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share" referrerpolicy="strict-origin-when-cross-origin" allowfullscreen="allowfullscreen"></iframe></p>

0 commit comments

Comments
 (0)