Skip to content

Commit beb1d34

Browse files
committed
refactor: parseBlocksUntilにexcludedBlockNamesオプションを追加
#33対応: 段落分割の根本的な解決として、parseBlocksUntilレベルで ネスト不可ブロック名を除外する仕組みを導入。 - ParseContextにexcludedBlockNamesフィールドを追加 - parseBlocksUntilがrule dispatchとcontextの両方で除外を適用 - parseInlineUntilがBLOCK_OPEN/BLOCK_END_OPENの段落境界判定で 除外ブロック名をスキップ - collapsible.tsからmergeSplitParagraphs後処理を削除
1 parent 2d554a6 commit beb1d34

File tree

4 files changed

+60
-79
lines changed

4 files changed

+60
-79
lines changed

packages/parser/src/parser/rules/block/collapsible.ts

Lines changed: 11 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -137,69 +137,8 @@ function consumeCloseTag(ctx: ParseContext, pos: number): number {
137137
return closeConsumed;
138138
}
139139

140-
/**
141-
* Merges consecutive paragraph containers that were split by unrecognised
142-
* block tokens back into the preceding paragraph.
143-
*
144-
* When a `[[collapsible]]` token appears inside a collapsible body (the rule
145-
* is filtered out to prevent nesting), the paragraph parser treats the
146-
* `BLOCK_OPEN` or `BLOCK_END_OPEN` as a paragraph boundary, splitting content
147-
* that Wikidot keeps in a single paragraph. This function detects those
148-
* artificial splits — paragraphs whose first text element is `"[["` or
149-
* `"[[/"` — and merges them back, inserting a line-break between runs.
150-
*
151-
* Paragraphs separated by blank lines (double newline) do NOT start with
152-
* block-open text and are therefore left as separate paragraphs.
153-
*/
154-
function mergeSplitParagraphs(elements: Element[]): Element[] {
155-
const result: Element[] = [];
156-
157-
for (const elem of elements) {
158-
if (
159-
elem.element !== "container" ||
160-
!elem.data ||
161-
typeof elem.data !== "object" ||
162-
!("type" in elem.data) ||
163-
elem.data.type !== "paragraph" ||
164-
!("elements" in elem.data) ||
165-
!Array.isArray(elem.data.elements)
166-
) {
167-
result.push(elem);
168-
continue;
169-
}
170-
171-
// Check if this paragraph starts with "[[" or "[[/" (unrecognised block token)
172-
const firstElem = elem.data.elements[0];
173-
const startsWithBlockOpen =
174-
firstElem?.element === "text" &&
175-
typeof firstElem.data === "string" &&
176-
(firstElem.data === "[[" || firstElem.data === "[[/");
177-
178-
if (!startsWithBlockOpen) {
179-
result.push(elem);
180-
continue;
181-
}
182-
183-
// Try to merge into the previous paragraph
184-
const prev = result[result.length - 1];
185-
if (
186-
prev?.element === "container" &&
187-
prev.data &&
188-
typeof prev.data === "object" &&
189-
"type" in prev.data &&
190-
prev.data.type === "paragraph" &&
191-
"elements" in prev.data &&
192-
Array.isArray(prev.data.elements)
193-
) {
194-
prev.data.elements.push({ element: "line-break" });
195-
prev.data.elements.push(...elem.data.elements);
196-
} else {
197-
result.push(elem);
198-
}
199-
}
200-
201-
return result;
202-
}
140+
/** Block names excluded from rule dispatch and paragraph-boundary detection. */
141+
const EXCLUDED_BLOCKS = new Set(["collapsible"]);
203142

204143
/**
205144
* Block rule for `[[collapsible ...]]...[[/collapsible]]`.
@@ -208,7 +147,7 @@ function mergeSplitParagraphs(elements: Element[]): Element[] {
208147
* 1. Match BLOCK_OPEN + name "collapsible".
209148
* 2. Parse multiline attributes (show, hide, folded, hideLocation, etc.).
210149
* 3. If a NEWLINE follows the opening tag, parse body as block content
211-
* with the collapsible rule itself removed (to prevent nesting).
150+
* with the collapsible rule itself excluded (to prevent nesting).
212151
* Otherwise, parse inline content until close tag or end of line
213152
* (inline form).
214153
* 4. Consume the `[[/collapsible]]` closing tag.
@@ -299,24 +238,22 @@ export const collapsibleRule: BlockRule = {
299238
}
300239
} else {
301240
// Block form: parse content recursively until [[/collapsible]]
302-
// Collapsible cannot be nested in Wikidot - nested [[collapsible]] becomes plain text
303-
const bodyCtx: ParseContext = {
304-
...ctx,
305-
pos,
306-
blockRules: ctx.blockRules.filter((r) => r.name !== "collapsible"),
307-
};
241+
// Collapsible cannot be nested in Wikidot - nested [[collapsible]] becomes plain text.
242+
// excludedBlockNames removes the collapsible rule from dispatch AND prevents
243+
// [[collapsible]] / [[/collapsible]] tokens from triggering paragraph splits.
244+
const bodyCtx: ParseContext = { ...ctx, pos };
308245

309246
const closeCondition = (checkCtx: ParseContext): boolean => {
310247
return isCollapsibleClose(checkCtx, checkCtx.pos);
311248
};
312249

313-
const bodyResult = parseBlocksUntil(bodyCtx, closeCondition);
250+
const bodyResult = parseBlocksUntil(bodyCtx, closeCondition, {
251+
excludedBlockNames: EXCLUDED_BLOCKS,
252+
});
314253
consumed += bodyResult.consumed;
315254
pos += bodyResult.consumed;
316255

317-
// Merge paragraphs that were artificially split by unrecognised
318-
// [[collapsible]] tokens (nested collapsible is treated as plain text)
319-
bodyElements = mergeSplitParagraphs(bodyResult.elements);
256+
bodyElements = bodyResult.elements;
320257
}
321258

322259
// Check for missing close tag

packages/parser/src/parser/rules/block/utils.ts

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -82,17 +82,29 @@ export function canApplyBlockRule(rule: BlockRule, token: Token): boolean {
8282
*
8383
* @param ctx - Parse context positioned at the start of the body.
8484
* @param closeCondition - Predicate that signals the end of the block body.
85+
* @param options - Optional settings.
86+
* @param options.excludedBlockNames - Block names that should be excluded
87+
* from both rule dispatch and paragraph-boundary detection. The named
88+
* rules are filtered out of `blockRules`, and the set is propagated to
89+
* the inline parser via `ParseContext.excludedBlockNames` so that
90+
* `BLOCK_OPEN` / `BLOCK_END_OPEN` tokens for these names do not trigger
91+
* paragraph breaks.
8592
* @returns Parsed elements and total tokens consumed.
8693
*/
8794
export function parseBlocksUntil(
8895
ctx: ParseContext,
8996
closeCondition: (ctx: ParseContext) => boolean,
97+
options?: { excludedBlockNames?: ReadonlySet<string> },
9098
): BlockParseResult {
9199
const elements: Element[] = [];
92100
let consumed = 0;
93101
let pos = ctx.pos;
94102

95-
const { blockRules, blockFallbackRule } = ctx;
103+
const excluded = options?.excludedBlockNames;
104+
const blockRules = excluded
105+
? ctx.blockRules.filter((r) => !excluded.has(r.name))
106+
: ctx.blockRules;
107+
const { blockFallbackRule } = ctx;
96108

97109
while (pos < ctx.tokens.length) {
98110
const token = ctx.tokens[pos];
@@ -122,8 +134,14 @@ export function parseBlocksUntil(
122134

123135
// Try each block rule
124136
let matched = false;
125-
// Pass close condition to context so paragraph parser can respect it
126-
const blockCtx: ParseContext = { ...ctx, pos, blockCloseCondition: closeCondition };
137+
// Pass close condition and excluded names to context
138+
const blockCtx: ParseContext = {
139+
...ctx,
140+
pos,
141+
blockRules,
142+
blockCloseCondition: closeCondition,
143+
excludedBlockNames: excluded,
144+
};
127145

128146
for (const rule of blockRules) {
129147
if (canApplyBlockRule(rule, token)) {

packages/parser/src/parser/rules/inline/utils.ts

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,19 @@ import type { TokenType, Token } from "../../../lexer";
22
import type { Element } from "@wdprlib/ast";
33
import type { ParseContext, InlineRule } from "../types";
44
import { BLOCK_START_TOKENS } from "../../constants";
5+
import { parseBlockName } from "../utils";
6+
7+
/**
8+
* Checks whether the block token at `tokenPos` (BLOCK_OPEN or BLOCK_END_OPEN)
9+
* names a block in the excluded set.
10+
*/
11+
function isExcludedBlockToken(ctx: ParseContext, tokenPos: number): boolean {
12+
if (!ctx.excludedBlockNames?.size) return false;
13+
const token = ctx.tokens[tokenPos];
14+
if (token?.type !== "BLOCK_OPEN" && token?.type !== "BLOCK_END_OPEN") return false;
15+
const nameResult = parseBlockName(ctx, tokenPos + 1);
16+
return nameResult !== null && ctx.excludedBlockNames.has(nameResult.name);
17+
}
518

619
/**
720
* Result of parsing inline content
@@ -146,16 +159,24 @@ export function parseInlineUntil(ctx: ParseContext, endType: TokenType): InlineP
146159
}
147160
}
148161

162+
// Check if this block token names an excluded block (e.g. nested collapsible)
163+
const isExcludedBlock =
164+
(nextMeaningfulToken?.type === "BLOCK_OPEN" ||
165+
nextMeaningfulToken?.type === "BLOCK_END_OPEN") &&
166+
isExcludedBlockToken(ctx, pos + lookAhead);
167+
149168
// Stop at double NEWLINE, EOF, or block start token (at line start)
150-
// But don't stop at [[/span]], [[# name]], [[>/[[<, or invalid headings
169+
// But don't stop at [[/span]], [[# name]], [[>/[[<, invalid headings,
170+
// or excluded block names
151171
const isBlockStart =
152172
nextMeaningfulToken &&
153173
BLOCK_START_TOKENS.includes(nextMeaningfulToken.type) &&
154174
nextMeaningfulToken.lineStart &&
155175
!isOrphanCloseSpan &&
156176
!isAnchorName &&
157177
!isInvalidBlockOpen &&
158-
!isInvalidHeading;
178+
!isInvalidHeading &&
179+
!isExcludedBlock;
159180
if (
160181
!nextMeaningfulToken ||
161182
nextMeaningfulToken.type === "NEWLINE" ||

packages/parser/src/parser/rules/types.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,11 @@ export interface ParseContext {
2626
inlineRules: InlineRule[];
2727
// Close condition for current block (passed to paragraph parser)
2828
blockCloseCondition?: (ctx: ParseContext) => boolean;
29+
// Block names excluded from paragraph-boundary detection.
30+
// When a BLOCK_OPEN/BLOCK_END_OPEN for an excluded name appears at
31+
// line start, the inline parser does NOT treat it as a paragraph break.
32+
// Used by collapsible to prevent nested [[collapsible]] from splitting paragraphs.
33+
excludedBlockNames?: ReadonlySet<string>;
2934
// Diagnostics collected during parsing
3035
diagnostics: Diagnostic[];
3136
// Budget for div nesting: tracks how many more nested divs can open.

0 commit comments

Comments
 (0)