microsoft · romanisa · Mar 17, 2026 · Mar 5, 2026 · Mar 5, 2026 · Mar 5, 2026
diff --git a/packages/roosterjs-content-model-api/lib/publicApi/utils/checkXss.ts b/packages/roosterjs-content-model-api/lib/publicApi/utils/checkXss.ts
@@ -1,10 +1,15 @@
+import { stripInvisibleUnicode } from 'roosterjs-content-model-dom';
+
 /**
  * @internal Check if there is XSS attack in the link
  * @param link The link to be checked
- * @returns The safe link, or empty string if there is XSS attack
- * @remarks This function checks for patterns like s\nc\nr\ni\np\nt: to prevent XSS attacks. This may block some valid links,
+ * @returns The safe link with invisible Unicode characters stripped, or empty string if there is XSS attack
+ * @remarks This function strips invisible Unicode characters (zero-width chars, Unicode Tags, etc.)
+ * and checks for patterns like s\nc\nr\ni\np\nt: to prevent XSS attacks. This may block some valid links,
  * but it is necessary for security reasons. We treat the word "script" as safe if there are "/" before it.
  */
 export function checkXss(link: string): string {
-    return link.match(/^[^\/]*s\n*c\n*r\n*i\n*p\n*t\n*:/i) ? '' : link;
+    // Defense-in-depth: strip invisible Unicode even if already handled elsewhere
+    const sanitized = stripInvisibleUnicode(link);
+    return sanitized.match(/^[^\/]*s\n*c\n*r\n*i\n*p\n*t\n*:/i) ? '' : sanitized;
 }
diff --git a/packages/roosterjs-content-model-api/test/publicApi/utils/checkXssTest.ts b/packages/roosterjs-content-model-api/test/publicApi/utils/checkXssTest.ts
@@ -30,4 +30,36 @@ describe('checkXss', () => {
         const link = 'https://example.com/script:.js';
         expect(checkXss(link)).toBe(link);
     });
+
+    it('should strip invisible Unicode from link', () => {
+        const link = 'https://www\u200B.example\u200C.com';
+        expect(checkXss(link)).toBe('https://www.example.com');
+    });
+
+    it('should strip invisible Unicode from mailto link', () => {
+        const link = 'mailto:\u200Buser@example.com';
+        expect(checkXss(link)).toBe('mailto:user@example.com');
+    });
+
+    it('should detect XSS hidden behind invisible Unicode in script:', () => {
+        // script: with zero-width spaces between characters should still be caught
+        const link = 's\u200Bc\u200Cr\u200Di\u200Ep\u200Ft:alert(1)';
+        expect(checkXss(link)).toBe('');
+    });
+
+    it('should strip Unicode Tags (supplementary plane) from link', () => {
+        // U+E0061 = \uDB40\uDC61 (Tag Latin Small Letter A)
+        const link = 'mailto:\uDB40\uDC61user@example.com';
+        expect(checkXss(link)).toBe('mailto:user@example.com');
+    });
+
+    it('should strip bidirectional marks from link', () => {
+        const link = 'mailto:\u202Auser\u202E@example.com';
+        expect(checkXss(link)).toBe('mailto:user@example.com');
+    });
+
+    it('should strip invisible Unicode from mailto subject and body', () => {
+        const link = 'mailto:user@example.com?subject=Hello\u200BWorld&body=Test\u200CContent';
+        expect(checkXss(link)).toBe('mailto:user@example.com?subject=HelloWorld&body=TestContent');
+    });
 });
diff --git a/packages/roosterjs-content-model-core/lib/editor/Editor.ts b/packages/roosterjs-content-model-core/lib/editor/Editor.ts
@@ -6,6 +6,7 @@ import {
     transformColor,
     createDomToModelContextWithConfig,
     domToContentModel,
+    sanitizeInvisibleUnicode,
 } from 'roosterjs-content-model-dom';
 import type {
     ContentModelDocument,
@@ -52,6 +53,8 @@ export class Editor implements IEditor {
         const initialModel =
             options.initialModel ?? createEmptyModel(this.core.format.defaultFormat);
 
+        sanitizeInvisibleUnicode(initialModel);
+
         this.core.api.setContentModel(
             this.core,
             initialModel,

diff --git a/packages/roosterjs-content-model-core/test/editor/EditorTest.ts b/packages/roosterjs-content-model-core/test/editor/EditorTest.ts
@@ -26,14 +26,12 @@ describe('Editor', () => {
         updateKnownColorSpy = jasmine.createSpy('updateKnownColor');
         createEditorCoreSpy = spyOn(createEditorCore, 'createEditorCore').and.callThrough();
         setContentModelSpy = jasmine.createSpy('setContentModel');
-        createEmptyModelSpy = spyOn(createEmptyModel, 'createEmptyModel');
+        createEmptyModelSpy = spyOn(createEmptyModel, 'createEmptyModel').and.callThrough();
     });
 
     it('ctor and dispose, no options', () => {
         const div = document.createElement('div');
 
-        createEmptyModelSpy.and.callThrough();
-
         const editor = new Editor(div);
 
         expect(createEditorCoreSpy).toHaveBeenCalledWith(div, {});
@@ -67,7 +65,7 @@ describe('Editor', () => {
         } as any;
         const setContentModelSpy = jasmine.createSpy('setContentModel');
         const disposeErrorHandlerSpy = jasmine.createSpy('disposeErrorHandler');
-        const mockedInitialModel = 'INITMODEL' as any;
+        const mockedInitialModel = { blocks: [] } as any;
         const options: EditorOptions = {
             plugins: [mockedPlugin1, mockedPlugin2],
             disposeErrorHandler: disposeErrorHandlerSpy,
@@ -78,8 +76,6 @@ describe('Editor', () => {
             },
         };
 
-        createEmptyModelSpy.and.callThrough();
-
         const editor = new Editor(div, options);
 
         expect(createEditorCoreSpy).toHaveBeenCalledWith(div, options);

diff --git a/packages/roosterjs-content-model-dom/lib/domUtils/stripInvisibleUnicode.ts b/packages/roosterjs-content-model-dom/lib/domUtils/stripInvisibleUnicode.ts
@@ -0,0 +1,18 @@
+const INVISIBLE_UNICODE_REGEX =
+    // eslint-disable-next-line no-misleading-character-class
+    /[\u00AD\u034F\u061C\u115F\u1160\u17B4\u17B5\u180B-\u180E\u200B-\u200F\u202A-\u202E\u2028\u2029\u2060-\u2064\u2066-\u2069\u3164\uFEFF\uFFA0\uFFF9-\uFFFB]|\uDB40[\uDC01-\uDCFF]/g;
+
+/**
+ * Strip invisible Unicode characters from a string.
+ * This removes zero-width characters, bidirectional marks, Unicode Tags (U+E0001-U+E00FF),
+ * interlinear annotation anchors, Mongolian free variation selectors,
+ * and other invisible formatting characters that can be used to hide content in links.
+ *
+ * @remarks This function strips ZWJ (U+200D) which may affect emoji sequences.
+ * It should only be applied to href attributes, not to visible text content.
+ * @param value The string to strip invisible characters from
+ * @returns The string with invisible characters removed
+ */
+export function stripInvisibleUnicode(value: string): string {
+    return value.replace(INVISIBLE_UNICODE_REGEX, '');
+}
diff --git a/packages/roosterjs-content-model-dom/lib/index.ts b/packages/roosterjs-content-model-dom/lib/index.ts
@@ -70,6 +70,7 @@ export { addTextSegment } from './modelApi/common/addTextSegment';
 export { normalizeParagraph } from './modelApi/common/normalizeParagraph';
 
 export { normalizeContentModel } from './modelApi/common/normalizeContentModel';
+export { sanitizeInvisibleUnicode } from './modelApi/common/sanitizeInvisibleUnicode';
 export { isGeneralSegment } from './modelApi/typeCheck/isGeneralSegment';
 export { unwrapBlock } from './modelApi/common/unwrapBlock';
 export { addSegment } from './modelApi/common/addSegment';
@@ -118,6 +119,7 @@ export { isCharacterValue, isModifierKey, isCursorMovingKey } from './domUtils/e
 export { getNodePositionFromEvent } from './domUtils/event/getNodePositionFromEvent';
 export { combineBorderValue, extractBorderValues } from './domUtils/style/borderValues';
 export { isPunctuation, isSpace, normalizeText } from './domUtils/stringUtil';
+export { stripInvisibleUnicode } from './domUtils/stripInvisibleUnicode';
 export { parseTableCells } from './domUtils/table/parseTableCells';
 export { readFile } from './domUtils/readFile';
 export { retrieveDocumentMetadata } from './domUtils/retrieveDocumentMetadata';

diff --git a/packages/roosterjs-content-model-dom/lib/modelApi/common/sanitizeInvisibleUnicode.ts b/packages/roosterjs-content-model-dom/lib/modelApi/common/sanitizeInvisibleUnicode.ts
@@ -0,0 +1,89 @@
+import { stripInvisibleUnicode } from '../../domUtils/stripInvisibleUnicode';
+import type {
+    ContentModelBlock,
+    ContentModelBlockGroup,
+    ContentModelDocument,
+    ContentModelSegment,
+} from 'roosterjs-content-model-types';
+
+/**
+ * Strip invisible Unicode characters from all text and link hrefs in a content model.
+ * This sanitizes the model at initialization time to prevent hidden content in links
+ * or text (e.g. zero-width chars, bidirectional marks, Unicode Tags).
+ * For General segments, all Text nodes under the element are also sanitized.
+ * @param model The content model document to sanitize in-place
+ */
+export function sanitizeInvisibleUnicode(model: ContentModelDocument): void {
+    sanitizeBlockGroup(model);
+}
+
+function sanitizeBlockGroup(group: ContentModelBlockGroup): void {
+    for (const block of group.blocks) {
+        sanitizeBlock(block);
+    }
+}
+
+function sanitizeBlock(block: ContentModelBlock): void {
+    switch (block.blockType) {
+        case 'Paragraph':
+            for (const segment of block.segments) {
+                sanitizeSegment(segment);
+            }
+            break;
+
+        case 'Table':
+            for (const row of block.rows) {
+                for (const cell of row.cells) {
+                    sanitizeBlockGroup(cell);
+                }
+            }
+            break;
+
+        case 'BlockGroup':
+            sanitizeBlockGroup(block);
+
+            if (block.blockGroupType === 'General' && block.element) {
+                sanitizeTextNodes(block.element);
+            }
+            break;
+
+        case 'Entity':
+        case 'Divider':
+            break;
+    }
+}
+
+function sanitizeSegment(segment: ContentModelSegment): void {
+    if (segment.link?.format.href) {
+        segment.link.format.href = stripInvisibleUnicode(segment.link.format.href);
+    }
+
+    switch (segment.segmentType) {
+        case 'Text':
+            segment.text = stripInvisibleUnicode(segment.text);
+            break;
+
+        case 'General':
+            sanitizeTextNodes(segment.element);
+            sanitizeBlockGroup(segment);
+            break;
+
+        case 'Image':
+        case 'Entity':
+        case 'Br':
+        case 'SelectionMarker':
+            break;
+    }
+}
+
+function sanitizeTextNodes(element: HTMLElement): void {
+    const walker = element.ownerDocument.createTreeWalker(element, NodeFilter.SHOW_TEXT);
+
+    let node: Text | null;
+
+    while ((node = walker.nextNode() as Text | null)) {
+        if (node.nodeValue) {
+            node.nodeValue = stripInvisibleUnicode(node.nodeValue);
+        }
+    }
+}