@@ -77,15 +77,31 @@ export const newlineLineBreakRule: InlineRule = {
7777 }
7878 }
7979
80+ // Check if there's a BACKSLASH_BREAK ahead (skip whitespace)
81+ // Pattern: NEWLINE + WHITESPACE? + BACKSLASH_BREAK
82+ // In this case, the BACKSLASH_BREAK rule will handle the line-break
83+ let hasBackslashBreak = false ;
84+ {
85+ let ahead = 1 ;
86+ while ( ctx . tokens [ ctx . pos + ahead ] ?. type === "WHITESPACE" ) {
87+ ahead ++ ;
88+ }
89+ if ( ctx . tokens [ ctx . pos + ahead ] ?. type === "BACKSLASH_BREAK" ) {
90+ hasBackslashBreak = true ;
91+ }
92+ }
93+
8094 // Skip line-break if:
8195 // - End of input
8296 // - Another NEWLINE (paragraph break will handle this)
8397 // - Valid block start token
98+ // - BACKSLASH_BREAK ahead (that rule will create the line-break)
8499 if (
85100 ! nextMeaningfulToken ||
86101 nextMeaningfulToken . type === "EOF" ||
87102 nextMeaningfulToken . type === "NEWLINE" ||
88- isValidBlock
103+ isValidBlock ||
104+ hasBackslashBreak
89105 ) {
90106 // Don't generate line-break, return empty array
91107 return {
@@ -105,24 +121,71 @@ export const newlineLineBreakRule: InlineRule = {
105121
106122/**
107123 * Backslash line break: \ at end of line (preprocessed to U+E000)
124+ *
125+ * In Wikidot, " \" at end of line creates a line break.
126+ * The space before the backslash is preserved after the line break.
127+ *
128+ * Since preprocessing converts "\\\n" → U+E000, the actual token sequence is:
129+ * - NEWLINE + WHITESPACE + BACKSLASH_BREAK + content
130+ *
131+ * This rule is triggered by WHITESPACE when followed by BACKSLASH_BREAK,
132+ * producing: line-break + space (in that order).
133+ *
134+ * Also handles standalone BACKSLASH_BREAK (without preceding whitespace).
108135 */
109136export const backslashLineBreakRule : InlineRule = {
110137 name : "backslashLineBreak" ,
111- startTokens : [ "BACKSLASH_BREAK" ] ,
138+ startTokens : [ "WHITESPACE" , " BACKSLASH_BREAK"] ,
112139
113140 parse ( ctx : ParseContext ) : RuleResult < Element > {
114141 const currentTok = ctx . tokens [ ctx . pos ] ;
115- if ( ! currentTok || currentTok . type !== "BACKSLASH_BREAK" ) {
142+ if ( ! currentTok ) {
116143 return { success : false } ;
117144 }
118145
119- const lb : any = { element : "line-break" } ;
120- lb . _preservedTrailingBreak = true ;
121- return {
122- success : true ,
123- elements : [ lb ] ,
124- consumed : 1 ,
125- } ;
146+ // Pattern: WHITESPACE + BACKSLASH_BREAK → line-break + text(" ")
147+ // But if followed by underscore line-break pattern, don't include the space
148+ if ( currentTok . type === "WHITESPACE" ) {
149+ const nextTok = ctx . tokens [ ctx . pos + 1 ] ;
150+ if ( nextTok ?. type === "BACKSLASH_BREAK" ) {
151+ // Check if followed by " _\n" pattern (underscore line-break)
152+ const afterBreak = ctx . tokens [ ctx . pos + 2 ] ;
153+ const afterAfter = ctx . tokens [ ctx . pos + 3 ] ;
154+ const afterAfterAfter = ctx . tokens [ ctx . pos + 4 ] ;
155+
156+ const isFollowedByUnderscoreBreak =
157+ afterBreak ?. type === "WHITESPACE" &&
158+ afterAfter ?. type === "UNDERSCORE" &&
159+ ( afterAfterAfter ?. type === "NEWLINE" || afterAfterAfter ?. type === "EOF" ) ;
160+
161+ if ( isFollowedByUnderscoreBreak ) {
162+ // Don't include the space, let underscore rule handle the rest
163+ return {
164+ success : true ,
165+ elements : [ { element : "line-break" } ] ,
166+ consumed : 2 ,
167+ } ;
168+ }
169+
170+ return {
171+ success : true ,
172+ elements : [ { element : "line-break" } , { element : "text" , data : " " } ] ,
173+ consumed : 2 ,
174+ } ;
175+ }
176+ return { success : false } ;
177+ }
178+
179+ // Standalone BACKSLASH_BREAK
180+ if ( currentTok . type === "BACKSLASH_BREAK" ) {
181+ return {
182+ success : true ,
183+ elements : [ { element : "line-break" } ] ,
184+ consumed : 1 ,
185+ } ;
186+ }
187+
188+ return { success : false } ;
126189 } ,
127190} ;
128191
0 commit comments