refactor: parseBlocksUntilにexcludedBlockNamesオプションを追加

r74tech · r74tech · commit beb1d3423709 · 2026-03-16T00:56:02.000+09:00
#33対応: 段落分割の根本的な解決として、parseBlocksUntilレベルで
ネスト不可ブロック名を除外する仕組みを導入。
- ParseContextにexcludedBlockNamesフィールドを追加
- parseBlocksUntilがrule dispatchとcontextの両方で除外を適用
- parseInlineUntilがBLOCK_OPEN/BLOCK_END_OPENの段落境界判定で
  除外ブロック名をスキップ
- collapsible.tsからmergeSplitParagraphs後処理を削除
diff --git a/packages/parser/src/parser/rules/block/collapsible.ts b/packages/parser/src/parser/rules/block/collapsible.ts
@@ -137,69 +137,8 @@ function consumeCloseTag(ctx: ParseContext, pos: number): number {
   return closeConsumed;
 }
 
-/**
- * Merges consecutive paragraph containers that were split by unrecognised
- * block tokens back into the preceding paragraph.
- *
- * When a `[[collapsible]]` token appears inside a collapsible body (the rule
- * is filtered out to prevent nesting), the paragraph parser treats the
- * `BLOCK_OPEN` or `BLOCK_END_OPEN` as a paragraph boundary, splitting content
- * that Wikidot keeps in a single paragraph. This function detects those
- * artificial splits — paragraphs whose first text element is `"[["` or
- * `"[[/"` — and merges them back, inserting a line-break between runs.
- *
- * Paragraphs separated by blank lines (double newline) do NOT start with
- * block-open text and are therefore left as separate paragraphs.
- */
-function mergeSplitParagraphs(elements: Element[]): Element[] {
-  const result: Element[] = [];
-
-  for (const elem of elements) {
-    if (
-      elem.element !== "container" ||
-      !elem.data ||
-      typeof elem.data !== "object" ||
-      !("type" in elem.data) ||
-      elem.data.type !== "paragraph" ||
-      !("elements" in elem.data) ||
-      !Array.isArray(elem.data.elements)
-    ) {
-      result.push(elem);
-      continue;
-    }
-
-    // Check if this paragraph starts with "[[" or "[[/" (unrecognised block token)
-    const firstElem = elem.data.elements[0];
-    const startsWithBlockOpen =
-      firstElem?.element === "text" &&
-      typeof firstElem.data === "string" &&
-      (firstElem.data === "[[" || firstElem.data === "[[/");
-
-    if (!startsWithBlockOpen) {
-      result.push(elem);
-      continue;
-    }
-
-    // Try to merge into the previous paragraph
-    const prev = result[result.length - 1];
-    if (
-      prev?.element === "container" &&
-      prev.data &&
-      typeof prev.data === "object" &&
-      "type" in prev.data &&
-      prev.data.type === "paragraph" &&
-      "elements" in prev.data &&
-      Array.isArray(prev.data.elements)
-    ) {
-      prev.data.elements.push({ element: "line-break" });
-      prev.data.elements.push(...elem.data.elements);
-    } else {
-      result.push(elem);
-    }
-  }
-
-  return result;
-}
+/** Block names excluded from rule dispatch and paragraph-boundary detection. */
+const EXCLUDED_BLOCKS = new Set(["collapsible"]);
 
 /**
  * Block rule for `[[collapsible ...]]...[[/collapsible]]`.
@@ -208,7 +147,7 @@ function mergeSplitParagraphs(elements: Element[]): Element[] {
  * 1. Match BLOCK_OPEN + name "collapsible".
  * 2. Parse multiline attributes (show, hide, folded, hideLocation, etc.).
  * 3. If a NEWLINE follows the opening tag, parse body as block content
- *    with the collapsible rule itself removed (to prevent nesting).
+ *    with the collapsible rule itself excluded (to prevent nesting).
  *    Otherwise, parse inline content until close tag or end of line
  *    (inline form).
  * 4. Consume the `[[/collapsible]]` closing tag.
@@ -299,24 +238,22 @@ export const collapsibleRule: BlockRule = {
       }
     } else {
       // Block form: parse content recursively until [[/collapsible]]
-      // Collapsible cannot be nested in Wikidot - nested [[collapsible]] becomes plain text
-      const bodyCtx: ParseContext = {
-        ...ctx,
-        pos,
-        blockRules: ctx.blockRules.filter((r) => r.name !== "collapsible"),
-      };
+      // Collapsible cannot be nested in Wikidot - nested [[collapsible]] becomes plain text.
+      // excludedBlockNames removes the collapsible rule from dispatch AND prevents
+      // [[collapsible]] / [[/collapsible]] tokens from triggering paragraph splits.
+      const bodyCtx: ParseContext = { ...ctx, pos };
 
       const closeCondition = (checkCtx: ParseContext): boolean => {
         return isCollapsibleClose(checkCtx, checkCtx.pos);
       };
 
-      const bodyResult = parseBlocksUntil(bodyCtx, closeCondition);
+      const bodyResult = parseBlocksUntil(bodyCtx, closeCondition, {
+        excludedBlockNames: EXCLUDED_BLOCKS,
+      });
       consumed += bodyResult.consumed;
       pos += bodyResult.consumed;
 
-      // Merge paragraphs that were artificially split by unrecognised
-      // [[collapsible]] tokens (nested collapsible is treated as plain text)
-      bodyElements = mergeSplitParagraphs(bodyResult.elements);
+      bodyElements = bodyResult.elements;
     }
 
     // Check for missing close tag
diff --git a/packages/parser/src/parser/rules/block/utils.ts b/packages/parser/src/parser/rules/block/utils.ts
@@ -82,17 +82,29 @@ export function canApplyBlockRule(rule: BlockRule, token: Token): boolean {
  *
  * @param ctx            - Parse context positioned at the start of the body.
  * @param closeCondition - Predicate that signals the end of the block body.
+ * @param options        - Optional settings.
+ * @param options.excludedBlockNames - Block names that should be excluded
+ *   from both rule dispatch and paragraph-boundary detection. The named
+ *   rules are filtered out of `blockRules`, and the set is propagated to
+ *   the inline parser via `ParseContext.excludedBlockNames` so that
+ *   `BLOCK_OPEN` / `BLOCK_END_OPEN` tokens for these names do not trigger
+ *   paragraph breaks.
  * @returns Parsed elements and total tokens consumed.
  */
 export function parseBlocksUntil(
   ctx: ParseContext,
   closeCondition: (ctx: ParseContext) => boolean,
+  options?: { excludedBlockNames?: ReadonlySet<string> },
 ): BlockParseResult {
   const elements: Element[] = [];
   let consumed = 0;
   let pos = ctx.pos;
 
-  const { blockRules, blockFallbackRule } = ctx;
+  const excluded = options?.excludedBlockNames;
+  const blockRules = excluded
+    ? ctx.blockRules.filter((r) => !excluded.has(r.name))
+    : ctx.blockRules;
+  const { blockFallbackRule } = ctx;
 
   while (pos < ctx.tokens.length) {
     const token = ctx.tokens[pos];
@@ -122,8 +134,14 @@ export function parseBlocksUntil(
 
     // Try each block rule
     let matched = false;
-    // Pass close condition to context so paragraph parser can respect it
-    const blockCtx: ParseContext = { ...ctx, pos, blockCloseCondition: closeCondition };
+    // Pass close condition and excluded names to context
+    const blockCtx: ParseContext = {
+      ...ctx,
+      pos,
+      blockRules,
+      blockCloseCondition: closeCondition,
+      excludedBlockNames: excluded,
+    };
 
     for (const rule of blockRules) {
       if (canApplyBlockRule(rule, token)) {
diff --git a/packages/parser/src/parser/rules/inline/utils.ts b/packages/parser/src/parser/rules/inline/utils.ts
@@ -2,6 +2,19 @@ import type { TokenType, Token } from "../../../lexer";
 import type { Element } from "@wdprlib/ast";
 import type { ParseContext, InlineRule } from "../types";
 import { BLOCK_START_TOKENS } from "../../constants";
+import { parseBlockName } from "../utils";
+
+/**
+ * Checks whether the block token at `tokenPos` (BLOCK_OPEN or BLOCK_END_OPEN)
+ * names a block in the excluded set.
+ */
+function isExcludedBlockToken(ctx: ParseContext, tokenPos: number): boolean {
+  if (!ctx.excludedBlockNames?.size) return false;
+  const token = ctx.tokens[tokenPos];
+  if (token?.type !== "BLOCK_OPEN" && token?.type !== "BLOCK_END_OPEN") return false;
+  const nameResult = parseBlockName(ctx, tokenPos + 1);
+  return nameResult !== null && ctx.excludedBlockNames.has(nameResult.name);
+}
 
 /**
  * Result of parsing inline content
@@ -146,16 +159,24 @@ export function parseInlineUntil(ctx: ParseContext, endType: TokenType): InlineP
         }
       }
 
+      // Check if this block token names an excluded block (e.g. nested collapsible)
+      const isExcludedBlock =
+        (nextMeaningfulToken?.type === "BLOCK_OPEN" ||
+          nextMeaningfulToken?.type === "BLOCK_END_OPEN") &&
+        isExcludedBlockToken(ctx, pos + lookAhead);
+
       // Stop at double NEWLINE, EOF, or block start token (at line start)
-      // But don't stop at [[/span]], [[# name]], [[>/[[<, or invalid headings
+      // But don't stop at [[/span]], [[# name]], [[>/[[<, invalid headings,
+      // or excluded block names
       const isBlockStart =
         nextMeaningfulToken &&
         BLOCK_START_TOKENS.includes(nextMeaningfulToken.type) &&
         nextMeaningfulToken.lineStart &&
         !isOrphanCloseSpan &&
         !isAnchorName &&
         !isInvalidBlockOpen &&
-        !isInvalidHeading;
+        !isInvalidHeading &&
+        !isExcludedBlock;
       if (
         !nextMeaningfulToken ||
         nextMeaningfulToken.type === "NEWLINE" ||
diff --git a/packages/parser/src/parser/rules/types.ts b/packages/parser/src/parser/rules/types.ts
@@ -26,6 +26,11 @@ export interface ParseContext {
   inlineRules: InlineRule[];
   // Close condition for current block (passed to paragraph parser)
   blockCloseCondition?: (ctx: ParseContext) => boolean;
+  // Block names excluded from paragraph-boundary detection.
+  // When a BLOCK_OPEN/BLOCK_END_OPEN for an excluded name appears at
+  // line start, the inline parser does NOT treat it as a paragraph break.
+  // Used by collapsible to prevent nested [[collapsible]] from splitting paragraphs.
+  excludedBlockNames?: ReadonlySet<string>;
   // Diagnostics collected during parsing
   diagnostics: Diagnostic[];
   // Budget for div nesting: tracks how many more nested divs can open.