Skip to content

Commit 3ce05b7

Browse files
authored
Fix small text not being escapable and triggering in code blocks. (#782)
<!-- Please read https://github.com/SableClient/Sable/blob/dev/CONTRIBUTING.md before submitting your pull request --> ### Description <!-- Please include a summary of the change. Please also include relevant motivation and context. List any dependencies that are required for this change. --> Title. #### Type of change - [ ] Bug fix (non-breaking change which fixes an issue) - [ ] New feature (non-breaking change which adds functionality) - [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected) - [ ] This change requires a documentation update ### Checklist: - [ ] My code follows the style guidelines of this project - [ ] I have performed a self-review of my own code - [ ] I have commented my code, particularly in hard-to-understand areas - [ ] I have made corresponding changes to the documentation - [ ] My changes generate no new warnings ### AI disclosure: - [x] Partially AI assisted (clarify which code was AI assisted and briefly explain what it does). - [ ] Fully AI generated (explain what all the generated code does in moderate detail). <!-- Write any explanation required here, but do not generate the explanation using AI!! You must prove you understand what the code in this PR does. --> Tests were AI generated
2 parents 9d264ee + 91434ad commit 3ce05b7

7 files changed

Lines changed: 97 additions & 21 deletions

File tree

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
default: patch
3+
---
4+
5+
Fix small text being parsed in code blocks and not being escapeable.

src/app/plugins/markdown/bidirectional.test.ts

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,16 @@ describe('bidirectional round-trip', () => {
107107
expect(result).toContain('||hidden message||');
108108
});
109109

110+
it('round-trips literal line-start -# (escaped) in a paragraph', () => {
111+
const markdown = '\\-# not small text';
112+
const html = markdownToHtml(markdown);
113+
expect(html).not.toContain('<sub');
114+
const injected = injectDataMd(html);
115+
const result = htmlToMarkdown(injected);
116+
expect(result).toContain('\\-#');
117+
expect(result).toContain('not small text');
118+
});
119+
110120
it('round-trips inline math', () => {
111121
const markdown = '$E = mc^2$';
112122
const html = markdownToHtml(markdown);

src/app/plugins/markdown/extensions/matrix-math.ts

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,22 @@ import type { TokenizerExtension, RendererExtension } from 'marked';
33
/** Private-use char so math extensions do not match `$` / `$$` inside code spans. Not U+E000–U+E002 (emoticon placeholders). {@link shieldDollarRunsForMarked} uses U+E021–U+E022. */
44
export const MATH_CODE_DOLLAR_MASK = '\uE020';
55

6+
/**
7+
* Replaces the `-` of line-start `-# …` inside markdown code so the Matrix subscript block
8+
* extension does not match before marked's `fences` rule (custom block extensions run first).
9+
* {@link unmaskSubscriptCodeLinePlaceholders} restores output HTML.
10+
*/
11+
export const SUBSCRIPT_CODE_LINE_MASK = '\uE023';
12+
13+
function maskSubscriptLineStartsInCodeInner(inner: string): string {
14+
return inner.replace(/(^|\n)-#( +)/g, `$1${SUBSCRIPT_CODE_LINE_MASK}#$2`);
15+
}
16+
17+
/** Applies {@link MATH_CODE_DOLLAR_MASK} and subscript masking inside a fence or inline-code region. */
18+
function maskMathAndSubscriptInCodeInner(inner: string): string {
19+
return maskSubscriptLineStartsInCodeInner(inner.replace(/\$/g, MATH_CODE_DOLLAR_MASK));
20+
}
21+
622
function findSameLineFenceClose(md: string, from: number, tick: string, minLen: number): number {
723
let j = from;
824
while (j < md.length && md[j] !== '\n') {
@@ -62,11 +78,11 @@ function tryConsumeFence(md: string, i: number): { text: string; end: number } |
6278
const close = findMultilineFenceEnd(md, contentStart, tick, openLen);
6379
if (!close) {
6480
const inner = md.slice(contentStart, md.length);
65-
const masked = inner.replace(/\$/g, MATH_CODE_DOLLAR_MASK);
81+
const masked = maskMathAndSubscriptInCodeInner(inner);
6682
return { text: md.slice(i, contentStart) + masked, end: md.length };
6783
}
6884
const inner = md.slice(contentStart, close.contentEnd);
69-
const maskedInner = inner.replace(/\$/g, MATH_CODE_DOLLAR_MASK);
85+
const maskedInner = maskMathAndSubscriptInCodeInner(inner);
7086
return {
7187
text: md.slice(i, contentStart) + maskedInner + md.slice(close.contentEnd, close.blockEnd),
7288
end: close.blockEnd,
@@ -80,7 +96,7 @@ function tryConsumeFence(md: string, i: number): { text: string; end: number } |
8096
while (closeIdx + closeRun < md.length && md[closeIdx + closeRun] === tick) closeRun++;
8197

8298
const inner = md.slice(afterOpen, closeIdx);
83-
const maskedInner = inner.replace(/\$/g, MATH_CODE_DOLLAR_MASK);
99+
const maskedInner = maskMathAndSubscriptInCodeInner(inner);
84100
return {
85101
text: md.slice(i, afterOpen) + maskedInner + md.slice(closeIdx, closeIdx + closeRun),
86102
end: closeIdx + closeRun,
@@ -99,7 +115,7 @@ function tryConsumeInlineCode(md: string, i: number): { text: string; end: numbe
99115
while (j + cr < md.length && md[j + cr] === '`') cr++;
100116
if (cr === run) {
101117
const inner = md.slice(contentStart, j);
102-
const maskedInner = inner.replace(/\$/g, MATH_CODE_DOLLAR_MASK);
118+
const maskedInner = maskMathAndSubscriptInCodeInner(inner);
103119
return {
104120
text: md.slice(i, contentStart) + maskedInner + md.slice(j, j + run),
105121
end: j + run,
@@ -155,6 +171,10 @@ export function unmaskMathCodeDollarPlaceholders(html: string): string {
155171
return html.replaceAll(MATH_CODE_DOLLAR_MASK, '$');
156172
}
157173

174+
export function unmaskSubscriptCodeLinePlaceholders(html: string): string {
175+
return html.replaceAll(`${SUBSCRIPT_CODE_LINE_MASK}#`, '-#');
176+
}
177+
158178
const MARKED_MATH_BLOCK_SHIELD = '\uE021';
159179
const MARKED_MATH_BLOCK_SHIELD_END = '\uE022';
160180

src/app/plugins/markdown/extensions/matrix-subscript.ts

Lines changed: 14 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4,25 +4,24 @@ import type { TokenizerExtension, RendererExtension, Tokens } from 'marked';
44
export const matrixSubscriptExtension = {
55
name: 'subscript',
66
level: 'block',
7-
start(src: string) {
8-
return src.indexOf('-#');
9-
},
107
tokenizer(
11-
this: { lexer: { inlineTokens: (t: string, tokens: Tokens.Generic[]) => void } },
8+
this: {
9+
lexer: { inlineTokens: (t: string, tokens: Tokens.Generic[]) => void };
10+
},
1211
src: string
1312
) {
14-
const match = /^-# +(.+)/.exec(src);
15-
if (match) {
16-
const token = {
17-
type: 'subscript',
18-
raw: match[0],
19-
text: match[1],
20-
tokens: [] as Tokens.Generic[],
21-
};
22-
this.lexer.inlineTokens(token.text!, token.tokens);
23-
return token;
13+
const match = /^-# +([^\n]+)/.exec(src);
14+
if (!match) {
15+
return undefined;
2416
}
25-
return undefined;
17+
const token = {
18+
type: 'subscript',
19+
raw: match[0],
20+
text: match[1],
21+
tokens: [] as Tokens.Generic[],
22+
};
23+
this.lexer.inlineTokens(token.text!, token.tokens);
24+
return token;
2625
},
2726
renderer(
2827
this: { parser: { parseInline: (tokens: Tokens.Generic[]) => string } },

src/app/plugins/markdown/markdownToHtml.test.ts

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import { describe, expect, it } from 'vitest';
2+
import { htmlToMarkdown } from './htmlToMarkdown';
23
import { markdownToHtml } from './markdownToHtml';
34

45
describe('markdownToHtml', () => {
@@ -89,6 +90,41 @@ describe('markdownToHtml', () => {
8990
expect(result).toContain('$$test$$');
9091
});
9192

93+
it('converts -# small/sub syntax outside code', () => {
94+
const result = markdownToHtml('-# caption');
95+
expect(result).toContain('<sub');
96+
expect(result).toContain('data-md="-#"');
97+
expect(result).toContain('caption');
98+
});
99+
100+
it('does not parse -# inside fenced code as subscript', () => {
101+
expect(markdownToHtml('```\n-# not sub\n```')).not.toContain('<sub');
102+
expect(markdownToHtml('```\n-# not sub\n```')).toContain('-# not sub');
103+
});
104+
105+
it('does not parse -# inside inline code as subscript', () => {
106+
expect(markdownToHtml('`-# lit`')).not.toContain('<sub');
107+
expect(markdownToHtml('`-# lit`')).toContain('-# lit');
108+
});
109+
110+
it('parses -# as single-line only so fenced code below stays code', () => {
111+
const html = markdownToHtml('-# caption\n```\nfenced\n```');
112+
expect(html).toContain('caption');
113+
expect(html).toContain('<pre>');
114+
expect(html).toContain('fenced');
115+
});
116+
117+
it('does not parse escaped \\-# as small/sub', () => {
118+
const result = markdownToHtml('\\-# literal caption');
119+
expect(result).not.toContain('<sub');
120+
expect(result).not.toContain('data-md="-#"');
121+
expect(result).toContain('literal caption');
122+
});
123+
124+
it('escapes literal -# when converting paragraph HTML to markdown', () => {
125+
expect(htmlToMarkdown('<p>-# plain words</p>')).toContain('\\-#');
126+
});
127+
92128
it('converts block math syntax', () => {
93129
const result = markdownToHtml('$$\\frac{a}{b}$$');
94130
expect(result).toContain('data-mx-maths');

src/app/plugins/markdown/markdownToHtml.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import {
77
maskDollarSignsInsideMarkdownCode,
88
shieldDollarRunsForMarked,
99
unmaskMathCodeDollarPlaceholders,
10+
unmaskSubscriptCodeLinePlaceholders,
1011
} from './extensions/matrix-math';
1112
import { matrixSubscriptExtension } from './extensions/matrix-subscript';
1213
import { matrixEmoticonExtension, preprocessEmoticon } from './extensions/matrix-emoticon';
@@ -150,7 +151,7 @@ export function markdownToHtml(markdown: string): string {
150151

151152
DOMPurify.removeHook('afterSanitizeAttributes');
152153

153-
const unmasked = unmaskMathCodeDollarPlaceholders(sanitized);
154+
const unmasked = unmaskSubscriptCodeLinePlaceholders(unmaskMathCodeDollarPlaceholders(sanitized));
154155

155156
// DOMPurify's Node/JSdom build can drop <img> size attributes even when allowlisted.
156157
// For Matrix custom emojis, always emit a stable height so outgoing messages have

src/app/plugins/markdown/utils.ts

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,12 @@ export const escapeMarkdownInlineSequences = (text: string): string => {
7878
(t) => t
7979
);
8080

81-
return parts.join('');
81+
let out = parts.join('');
82+
out = out.replace(
83+
/(^|\n)-(# +)/gm,
84+
(_, lineStart: string, hashSp: string) => `${lineStart}\\-${hashSp}`
85+
);
86+
return out;
8287
};
8388

8489
/**

0 commit comments

Comments
 (0)