Skip to content

Commit 3aec16b

Browse files
committed
fix: miscテスト修正(HR4+制限・backslash改行・末尾空白除去・bibliography除外)
1 parent e39f048 commit 3aec16b

File tree

11 files changed

+71
-37
lines changed

11 files changed

+71
-37
lines changed

packages/parser/src/lexer/lexer.ts

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -310,8 +310,8 @@ export class Lexer {
310310
return;
311311
}
312312

313-
// Horizontal rule --- or more (check before --)
314-
if (isLineStart && this.match("---")) {
313+
// Horizontal rule ---- or more (4+ hyphens, check before --)
314+
if (isLineStart && this.match("----")) {
315315
let dashes = "";
316316
while (this.current() === "-") {
317317
dashes += this.advance();
@@ -582,6 +582,13 @@ export class Lexer {
582582
return;
583583
}
584584

585+
// Backslash line break marker (U+E000, inserted by preproc)
586+
if (char.charCodeAt(0) === 0xe000) {
587+
this.advance();
588+
this.addToken("BACKSLASH_BREAK", char);
589+
return;
590+
}
591+
585592
// Identifier: alphanumeric sequence
586593
if (this.isAlphanumeric(char)) {
587594
let ident = "";

packages/parser/src/lexer/tokens.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ export type TokenType =
7373

7474
// Other
7575
| "UNDERSCORE" // _ (single underscore, for line break)
76+
| "BACKSLASH_BREAK" // U+E000 (preproc marker for \ at end of line)
7677

7778
// Comment
7879
| "COMMENT_OPEN" // [!--

packages/parser/src/parser/preprocess/whitespace.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,9 @@ export function substitute(text: string): string {
5959
// Strip lines with only whitespace
6060
result = result.replace(WHITESPACE_ONLY_LINE, "");
6161

62-
// Join concatenated lines (ending with '\')
63-
result = result.replace(CONCAT_LINES, "");
62+
// Backslash at end of line → line break marker (U+E000)
63+
// Wikidot treats \ at end of line as <br />
64+
result = result.replace(CONCAT_LINES, String.fromCharCode(0xe000));
6465

6566
// Tabs to spaces
6667
result = result.replace(TABS, " ");

packages/parser/src/parser/rules/block/paragraph.ts

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,16 @@ export const paragraphRule: BlockRule = {
9191
elements.pop();
9292
}
9393

94+
// Remove trailing whitespace-only text nodes
95+
while (
96+
elements.length > 0 &&
97+
elements[elements.length - 1]?.element === "text" &&
98+
typeof elements[elements.length - 1]?.data === "string" &&
99+
(elements[elements.length - 1]?.data as string).trim() === ""
100+
) {
101+
elements.pop();
102+
}
103+
94104
// Remove leading line-breaks
95105
while (elements.length > 0 && elements[0]?.element === "line-break") {
96106
elements.shift();

packages/parser/src/parser/rules/inline/index.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ import { linkSingleRule } from "./link-single";
1111
import { linkAnchorRule } from "./link-anchor";
1212
import { linkStarRule } from "./link-star";
1313
import { colorRule } from "./color";
14-
import { newlineLineBreakRule, underscoreLineBreakRule } from "./line-break";
14+
import { backslashLineBreakRule, newlineLineBreakRule, underscoreLineBreakRule } from "./line-break";
1515
import { commentRule } from "./comment";
1616
import { rawRule } from "./raw";
1717
import { spanRule, closeSpanRule } from "./span";
@@ -39,7 +39,7 @@ export { linkSingleRule } from "./link-single";
3939
export { linkAnchorRule } from "./link-anchor";
4040
export { linkStarRule } from "./link-star";
4141
export { colorRule } from "./color";
42-
export { newlineLineBreakRule, underscoreLineBreakRule } from "./line-break";
42+
export { backslashLineBreakRule, newlineLineBreakRule, underscoreLineBreakRule } from "./line-break";
4343
export { commentRule } from "./comment";
4444
export { rawRule } from "./raw";
4545
export { spanRule, closeSpanRule } from "./span";
@@ -71,6 +71,7 @@ export const inlineRules: InlineRule[] = [
7171
linkAnchorRule,
7272
linkStarRule,
7373
colorRule,
74+
backslashLineBreakRule,
7475
underscoreLineBreakRule,
7576
newlineLineBreakRule, // Must come after underscore rule
7677
commentRule,

packages/parser/src/parser/rules/inline/line-break.ts

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@
66
* - " _\n" pattern → line-break
77
* - "^_\n" pattern → line-break (underscore at start of line)
88
*
9-
* Note: Backslash line break (\ at end of line) is handled by preproc,
10-
* which removes \\\n and joins lines.
9+
* Note: Backslash line break (\ at end of line) is preprocessed to U+E000
10+
* by preproc, then handled by backslashLineBreakRule.
1111
*/
1212
import type { Element } from "@wdpr/ast";
1313
import type { InlineRule, ParseContext, RuleResult } from "../types";
@@ -103,6 +103,29 @@ export const newlineLineBreakRule: InlineRule = {
103103
},
104104
};
105105

106+
/**
107+
* Backslash line break: \ at end of line (preprocessed to U+E000)
108+
*/
109+
export const backslashLineBreakRule: InlineRule = {
110+
name: "backslashLineBreak",
111+
startTokens: ["BACKSLASH_BREAK"],
112+
113+
parse(ctx: ParseContext): RuleResult<Element> {
114+
const currentTok = ctx.tokens[ctx.pos];
115+
if (!currentTok || currentTok.type !== "BACKSLASH_BREAK") {
116+
return { success: false };
117+
}
118+
119+
const lb: any = { element: "line-break" };
120+
lb._preservedTrailingBreak = true;
121+
return {
122+
success: true,
123+
elements: [lb],
124+
consumed: 1,
125+
};
126+
},
127+
};
128+
106129
/**
107130
* Underscore line break: _ at end of line
108131
* Syntax: " _\n" (space + underscore + newline)

tests/fixtures/misc/bibliography/expected.json

Lines changed: 13 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -214,25 +214,19 @@
214214
}
215215
},
216216
{
217-
"element": "container",
218-
"data": {
219-
"type": "paragraph",
220-
"attributes": {},
221-
"elements": [
222-
{
223-
"element": "text",
224-
"data": "[["
225-
},
226-
{
227-
"element": "text",
228-
"data": "bibliography"
229-
},
230-
{
231-
"element": "text",
232-
"data": "]]"
233-
}
234-
]
235-
}
217+
"element": "text",
218+
"data": "[["
219+
},
220+
{
221+
"element": "text",
222+
"data": "bibliography"
223+
},
224+
{
225+
"element": "text",
226+
"data": "]]"
227+
},
228+
{
229+
"element": "line-break"
236230
},
237231
{
238232
"element": "definition-list",

tests/fixtures/misc/comment/expected.json

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,6 @@
2020
{
2121
"element": "text",
2222
"data": "Empty"
23-
},
24-
{
25-
"element": "text",
26-
"data": " "
27-
},
28-
{
29-
"element": "text",
30-
"data": " "
3123
}
3224
]
3325
}

tests/fixtures/misc/hr/expected.json

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,11 @@
127127
},
128128
{
129129
"element": "text",
130-
"data": "—-"
130+
"data": ""
131+
},
132+
{
133+
"element": "text",
134+
"data": "-"
131135
}
132136
]
133137
}
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
`~!@#$%^&amp;*()-=+[]{}|/<br />
1+
<p>`~!@#$%^&amp;*()-=+[]{}|/<br /></p>

0 commit comments

Comments
 (0)