Skip to content
Merged
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
import { stripInvisibleUnicode } from 'roosterjs-content-model-dom';

/**
* @internal Check if there is XSS attack in the link
* @param link The link to be checked
* @returns The safe link, or empty string if there is XSS attack
* @remarks This function checks for patterns like s\nc\nr\ni\np\nt: to prevent XSS attacks. This may block some valid links,
* @returns The safe link with invisible Unicode characters stripped, or empty string if there is XSS attack
* @remarks This function strips invisible Unicode characters (zero-width chars, Unicode Tags, etc.)
* and checks for patterns like s\nc\nr\ni\np\nt: to prevent XSS attacks. This may block some valid links,
* but it is necessary for security reasons. We treat the word "script" as safe if there are "/" before it.
*/
export function checkXss(link: string): string {
return link.match(/^[^\/]*s\n*c\n*r\n*i\n*p\n*t\n*:/i) ? '' : link;
// Defense-in-depth: strip invisible Unicode even if already handled elsewhere
const sanitized = stripInvisibleUnicode(link);
return sanitized.match(/^[^\/]*s\n*c\n*r\n*i\n*p\n*t\n*:/i) ? '' : sanitized;
}
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,36 @@ describe('checkXss', () => {
const link = 'https://example.com/script:.js';
expect(checkXss(link)).toBe(link);
});

it('should strip invisible Unicode from link', () => {
const link = 'https://www\u200B.example\u200C.com';
expect(checkXss(link)).toBe('https://www.example.com');
});

it('should strip invisible Unicode from mailto link', () => {
const link = 'mailto:\u200Buser@example.com';
expect(checkXss(link)).toBe('mailto:user@example.com');
});

it('should detect XSS hidden behind invisible Unicode in script:', () => {
// script: with zero-width spaces between characters should still be caught
const link = 's\u200Bc\u200Cr\u200Di\u200Ep\u200Ft:alert(1)';
expect(checkXss(link)).toBe('');
});

it('should strip Unicode Tags (supplementary plane) from link', () => {
// U+E0061 = \uDB40\uDC61 (Tag Latin Small Letter A)
const link = 'mailto:\uDB40\uDC61user@example.com';
expect(checkXss(link)).toBe('mailto:user@example.com');
});

it('should strip bidirectional marks from link', () => {
const link = 'mailto:\u202Auser\u202E@example.com';
expect(checkXss(link)).toBe('mailto:user@example.com');
});

it('should strip invisible Unicode from mailto subject and body', () => {
const link = 'mailto:user@example.com?subject=Hello\u200BWorld&body=Test\u200CContent';
expect(checkXss(link)).toBe('mailto:user@example.com?subject=HelloWorld&body=TestContent');
});
});
3 changes: 3 additions & 0 deletions packages/roosterjs-content-model-core/lib/editor/Editor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import {
transformColor,
createDomToModelContextWithConfig,
domToContentModel,
sanitizeInvisibleUnicode,
} from 'roosterjs-content-model-dom';
import type {
ContentModelDocument,
Expand Down Expand Up @@ -52,6 +53,8 @@ export class Editor implements IEditor {
const initialModel =
options.initialModel ?? createEmptyModel(this.core.format.defaultFormat);

sanitizeInvisibleUnicode(initialModel);

this.core.api.setContentModel(
this.core,
initialModel,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,12 @@ describe('Editor', () => {
updateKnownColorSpy = jasmine.createSpy('updateKnownColor');
createEditorCoreSpy = spyOn(createEditorCore, 'createEditorCore').and.callThrough();
setContentModelSpy = jasmine.createSpy('setContentModel');
createEmptyModelSpy = spyOn(createEmptyModel, 'createEmptyModel');
createEmptyModelSpy = spyOn(createEmptyModel, 'createEmptyModel').and.callThrough();
});

it('ctor and dispose, no options', () => {
const div = document.createElement('div');

createEmptyModelSpy.and.callThrough();

const editor = new Editor(div);

expect(createEditorCoreSpy).toHaveBeenCalledWith(div, {});
Expand Down Expand Up @@ -67,7 +65,7 @@ describe('Editor', () => {
} as any;
const setContentModelSpy = jasmine.createSpy('setContentModel');
const disposeErrorHandlerSpy = jasmine.createSpy('disposeErrorHandler');
const mockedInitialModel = 'INITMODEL' as any;
const mockedInitialModel = { blocks: [] } as any;
const options: EditorOptions = {
plugins: [mockedPlugin1, mockedPlugin2],
disposeErrorHandler: disposeErrorHandlerSpy,
Expand All @@ -78,8 +76,6 @@ describe('Editor', () => {
},
};

createEmptyModelSpy.and.callThrough();

const editor = new Editor(div, options);

expect(createEditorCoreSpy).toHaveBeenCalledWith(div, options);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
const INVISIBLE_UNICODE_REGEX =
// eslint-disable-next-line no-misleading-character-class
/[\u00AD\u034F\u061C\u115F\u1160\u17B4\u17B5\u180B-\u180E\u200B-\u200F\u202A-\u202E\u2028\u2029\u2060-\u2064\u2066-\u2069\u3164\uFEFF\uFFA0\uFFF9-\uFFFB]|\uDB40[\uDC01-\uDCFF]/g;

/**
* Strip invisible Unicode characters from a string.
* This removes zero-width characters, bidirectional marks, Unicode Tags (U+E0001-U+E00FF),
* interlinear annotation anchors, Mongolian free variation selectors,
* and other invisible formatting characters that can be used to hide content in links.
*
* @remarks This function strips ZWJ (U+200D) which may affect emoji sequences.
* It should only be applied to href attributes, not to visible text content.
* @param value The string to strip invisible characters from
* @returns The string with invisible characters removed
*/
export function stripInvisibleUnicode(value: string): string {
return value.replace(INVISIBLE_UNICODE_REGEX, '');
}
2 changes: 2 additions & 0 deletions packages/roosterjs-content-model-dom/lib/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ export { addTextSegment } from './modelApi/common/addTextSegment';
export { normalizeParagraph } from './modelApi/common/normalizeParagraph';

export { normalizeContentModel } from './modelApi/common/normalizeContentModel';
export { sanitizeInvisibleUnicode } from './modelApi/common/sanitizeInvisibleUnicode';
export { isGeneralSegment } from './modelApi/typeCheck/isGeneralSegment';
export { unwrapBlock } from './modelApi/common/unwrapBlock';
export { addSegment } from './modelApi/common/addSegment';
Expand Down Expand Up @@ -118,6 +119,7 @@ export { isCharacterValue, isModifierKey, isCursorMovingKey } from './domUtils/e
export { getNodePositionFromEvent } from './domUtils/event/getNodePositionFromEvent';
export { combineBorderValue, extractBorderValues } from './domUtils/style/borderValues';
export { isPunctuation, isSpace, normalizeText } from './domUtils/stringUtil';
export { stripInvisibleUnicode } from './domUtils/stripInvisibleUnicode';
export { parseTableCells } from './domUtils/table/parseTableCells';
export { readFile } from './domUtils/readFile';
export { retrieveDocumentMetadata } from './domUtils/retrieveDocumentMetadata';
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
import { stripInvisibleUnicode } from '../../domUtils/stripInvisibleUnicode';
import type {
ContentModelBlock,
ContentModelBlockGroup,
ContentModelDocument,
ContentModelSegment,
} from 'roosterjs-content-model-types';

/**
* Strip invisible Unicode characters from all text and link hrefs in a content model.
* This sanitizes the model at initialization time to prevent hidden content in links
* or text (e.g. zero-width chars, bidirectional marks, Unicode Tags).
* For General segments, all Text nodes under the element are also sanitized.
* @param model The content model document to sanitize in-place
*/
export function sanitizeInvisibleUnicode(model: ContentModelDocument): void {
sanitizeBlockGroup(model);
}

function sanitizeBlockGroup(group: ContentModelBlockGroup): void {
for (const block of group.blocks) {
sanitizeBlock(block);
}
}

function sanitizeBlock(block: ContentModelBlock): void {
switch (block.blockType) {
case 'Paragraph':
for (const segment of block.segments) {
sanitizeSegment(segment);
}
break;

case 'Table':
for (const row of block.rows) {
for (const cell of row.cells) {
sanitizeBlockGroup(cell);
}
}
break;

case 'BlockGroup':
sanitizeBlockGroup(block);

if (block.blockGroupType === 'General' && block.element) {
sanitizeTextNodes(block.element);
}
break;

case 'Entity':
case 'Divider':
break;
}
}

function sanitizeSegment(segment: ContentModelSegment): void {
if (segment.link?.format.href) {
segment.link.format.href = stripInvisibleUnicode(segment.link.format.href);
}

switch (segment.segmentType) {
case 'Text':
segment.text = stripInvisibleUnicode(segment.text);
break;

case 'General':
sanitizeTextNodes(segment.element);
sanitizeBlockGroup(segment);
break;

case 'Image':
case 'Entity':
case 'Br':
case 'SelectionMarker':
break;
}
}

function sanitizeTextNodes(element: HTMLElement): void {
const walker = element.ownerDocument.createTreeWalker(element, NodeFilter.SHOW_TEXT);

let node: Text | null;

while ((node = walker.nextNode() as Text | null)) {
if (node.nodeValue) {
node.nodeValue = stripInvisibleUnicode(node.nodeValue);
}
}
}
Loading
Loading