Skip to content

Commit e2423e2

Browse files
committed
Add MS footnotes support to paste from office.
1 parent 69f3cbe commit e2423e2

File tree

3 files changed

+745
-0
lines changed

3 files changed

+745
-0
lines changed
Lines changed: 254 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,254 @@
1+
/**
2+
* @license Copyright (c) 2003-2025, CKSource Holding sp. z o.o. All rights reserved.
3+
* For licensing, see LICENSE.md or https://ckeditor.com/legal/ckeditor-licensing-options
4+
*/
5+
6+
/**
7+
* @module paste-from-office/filters/replacemsfootnotes
8+
*/
9+
10+
import type { ViewDocumentFragment, ViewElement, ViewText, ViewUpcastWriter } from 'ckeditor5/src/engine.js';
11+
12+
/**
13+
* Replaces MS Word specific footnotes references and definitions with proper elements.
14+
*
15+
* Things to know about MS Word footnotes:
16+
*
17+
* * Footnote references in Word are marked with `mso-footnote-id` style.
18+
* * Word does not support nested footnotes, so references within definitions are ignored.
19+
* * Word appends extra spaces after footnote references within definitions, which are trimmed.
20+
* * Footnote definitions list is marked with `mso-element: footnote-list` style it contain `mso-element: footnote` elements.
21+
* * Footnote definition might contain tables, lists and other elements, not only text. They are placed directly within `li` element,
22+
* without any wrapper (in opposition to text content of the definition, which is placed within `MsoFootnoteText` element).
23+
*
24+
* Example pseudo document showing MS Word footnote structure:
25+
*
26+
* ```html
27+
* <p>Text with footnote<a style='mso-footnote-id:ftn1'>[1]</a> reference.</p>
28+
*
29+
* <div style='mso-element:footnote-list'>
30+
* <div style='mso-element:footnote' id=ftn1>
31+
* <p class=MsoFootnoteText><a style='mso-footnote-id:ftn1'>[1]</a> Footnote content</p>
32+
* <table class="MsoTableGrid">...</table>
33+
* </div>
34+
* </div>
35+
* ```
36+
*
37+
* Will be transformed into:
38+
*
39+
* ```html
40+
* <p>Text with footnote<sup class="footnote"><a id="ref-footnote-ftn1" href="#footnote-ftn1">1</a></sup> reference.</p>
41+
*
42+
* <ol class="footnotes">
43+
* <li class="footnote-definition" id="footnote-ftn1">
44+
* <a href="#ref-footnote-ftn1" class="footnote-backlink">^</a>
45+
* <div class="footnote-content">
46+
* <p>Footnote content</p>
47+
* <table>...</table>
48+
* </div>
49+
* </li>
50+
* </ol>
51+
* ```
52+
*
53+
* @param documentFragment element `data.content` obtained from clipboard
54+
* @param writer The view writer instance.
55+
* @internal
56+
*/
57+
export function replaceMSFootnotes( documentFragment: ViewDocumentFragment, writer: ViewUpcastWriter ): void {
58+
const msFootnotesRefs = new Map<string, ViewElement>();
59+
const msFootnotesDefs = new Map<string, ViewElement>();
60+
let msFootnotesDefinitionsList: ViewElement | null = null;
61+
62+
// Phase 1: Collect all footnotes references and definitions. Find the footnotes definitions list element.
63+
for ( const { item } of writer.createRangeIn( documentFragment ) ) {
64+
if ( !item.is( 'element' ) ) {
65+
continue;
66+
}
67+
68+
// If spot a footnotes definitions element, let's store it. It'll be replaced later.
69+
// There should be only one such element in the document.
70+
if ( item.getStyle( 'mso-element' ) === 'footnote-list' ) {
71+
msFootnotesDefinitionsList = item;
72+
continue;
73+
}
74+
75+
// If spot a footnote reference or definition, store it in the corresponding map.
76+
if ( item.hasStyle( 'mso-footnote-id' ) ) {
77+
const msFootnoteDef = item.findAncestor( 'element', el => el.getStyle( 'mso-element' ) === 'footnote' );
78+
79+
if ( msFootnoteDef ) {
80+
// If it's a reference within a definition, ignore it and track only the definition.
81+
// MS Word do not support nested footnotes, so it's safe to assume that all references within
82+
// a definition point to the same definition.
83+
const msFootnoteDefId = msFootnoteDef.getAttribute( 'id' )!;
84+
85+
msFootnotesDefs.set( msFootnoteDefId, msFootnoteDef );
86+
} else {
87+
// If it's a reference outside of a definition, track it as a reference.
88+
const msFootnoteRefId = item.getStyle( 'mso-footnote-id' )!;
89+
90+
msFootnotesRefs.set( msFootnoteRefId, item );
91+
}
92+
93+
continue;
94+
}
95+
}
96+
97+
// If there are no footnotes references or definitions, or no definitions list, there's nothing to normalize.
98+
if ( !msFootnotesRefs.size || !msFootnotesDefinitionsList ) {
99+
return;
100+
}
101+
102+
// Phase 2: Replace footnotes definitions list with proper element.
103+
const footnotesDefinitionsList = createFootnotesListViewElement( writer );
104+
105+
writer.replace( msFootnotesDefinitionsList, footnotesDefinitionsList );
106+
107+
// Phase 3: Replace all footnotes references and add matching definitions to the definitions list.
108+
for ( const [ footnoteId, msFootnoteRef ] of msFootnotesRefs ) {
109+
const msFootnoteDef = msFootnotesDefs.get( footnoteId );
110+
111+
if ( !msFootnoteDef ) {
112+
continue;
113+
}
114+
115+
// Replace footnote reference.
116+
writer.replace( msFootnoteRef, createFootnoteRefViewElement( writer, footnoteId ) );
117+
118+
// Append found matching definition to the definitions list.
119+
// Order doesn't matter here, as it'll be fixed in the post-fixer.
120+
const defElements = createFootnoteDefViewElement( writer, footnoteId );
121+
122+
removeMSReferences( writer, msFootnoteDef );
123+
124+
// Insert content within the `MsoFootnoteText` element. It's usually a definition text content.
125+
for ( const child of msFootnoteDef.getChildren() ) {
126+
let clonedChild = child;
127+
128+
if ( child.is( 'element' ) ) {
129+
clonedChild = writer.clone( child, true );
130+
}
131+
132+
writer.appendChild( clonedChild, defElements.content );
133+
}
134+
135+
writer.appendChild( defElements.listItem, footnotesDefinitionsList );
136+
}
137+
}
138+
139+
/**
140+
* Removes all MS Office specific references from the given element.
141+
*
142+
* It also removes leading space from text nodes following the references, as MS Word adds
143+
* them to separate the reference from the rest of the text.
144+
*
145+
* @param writer The view writer.
146+
* @param element The element to trim.
147+
* @returns The trimmed element.
148+
*/
149+
function removeMSReferences( writer: ViewUpcastWriter, element: ViewElement ): ViewElement {
150+
const elementsToRemove: Array<ViewElement> = [];
151+
const textNodesToTrim: Array<ViewText> = [];
152+
153+
for ( const { item } of writer.createRangeIn( element ) ) {
154+
if ( item.is( 'element' ) && item.getStyle( 'mso-footnote-id' ) ) {
155+
elementsToRemove.unshift( item );
156+
157+
// MS Word used to add spaces after footnote references within definitions. Let's check if there's a space after
158+
// the footnote reference and mark it for trimming.
159+
const { nextSibling } = item;
160+
161+
if ( nextSibling?.is( '$text' ) && nextSibling.data.startsWith( ' ' ) ) {
162+
textNodesToTrim.unshift( nextSibling );
163+
}
164+
}
165+
}
166+
167+
for ( const element of elementsToRemove ) {
168+
writer.remove( element );
169+
}
170+
171+
// Remove only the leading space from text nodes following reference within definition, preserve the rest of the text.
172+
for ( const textNode of textNodesToTrim ) {
173+
const trimmedData = textNode.data.substring( 1 );
174+
175+
if ( trimmedData.length > 0 ) {
176+
// Create a new text node and replace the old one.
177+
const parent = textNode.parent!;
178+
const index = parent.getChildIndex( textNode );
179+
const newTextNode = writer.createText( trimmedData );
180+
181+
writer.remove( textNode );
182+
writer.insertChild( index, newTextNode, parent );
183+
} else {
184+
// If the text node contained only a space, remove it entirely.
185+
writer.remove( textNode );
186+
}
187+
}
188+
189+
return element;
190+
}
191+
192+
/**
193+
* Creates a footnotes list view element.
194+
*
195+
* @param writer The view writer instance.
196+
* @returns The footnotes list view element.
197+
*/
198+
function createFootnotesListViewElement( writer: ViewUpcastWriter ): ViewElement {
199+
return writer.createElement( 'ol', { class: 'footnotes' } );
200+
}
201+
202+
/**
203+
* Creates a footnote reference view element.
204+
*
205+
* @param writer The view writer instance.
206+
* @param footnoteId The footnote ID.
207+
* @returns The footnote reference view element.
208+
*/
209+
function createFootnoteRefViewElement( writer: ViewUpcastWriter, footnoteId: string ): ViewElement {
210+
const sup = writer.createElement( 'sup', { class: 'footnote' } );
211+
const link = writer.createElement( 'a', {
212+
id: `ref-${ footnoteId }`,
213+
href: `#${ footnoteId }`
214+
} );
215+
216+
writer.appendChild( link, sup );
217+
218+
return sup;
219+
}
220+
221+
/**
222+
* Creates a footnote definition view element with a backlink and a content container.
223+
*
224+
* @param writer The view writer instance.
225+
* @param footnoteId The footnote ID.
226+
* @returns An object containing the list item element, backlink and content container.
227+
*/
228+
function createFootnoteDefViewElement( writer: ViewUpcastWriter, footnoteId: string ): {
229+
listItem: ViewElement;
230+
content: ViewElement;
231+
} {
232+
const listItem = writer.createElement( 'li', {
233+
id: footnoteId,
234+
class: 'footnote-definition'
235+
} );
236+
237+
const backLink = writer.createElement( 'a', {
238+
href: `#ref-${ footnoteId }`,
239+
class: 'footnote-backlink'
240+
} );
241+
242+
const content = writer.createElement( 'div', {
243+
class: 'footnote-content'
244+
} );
245+
246+
writer.appendChild( writer.createText( '^' ), backLink );
247+
writer.appendChild( backLink, listItem );
248+
writer.appendChild( content, listItem );
249+
250+
return {
251+
listItem,
252+
content
253+
};
254+
}

packages/ckeditor5-paste-from-office/src/normalizers/mswordnormalizer.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import { replaceImagesSourceWithBase64 } from '../filters/image.js';
1313
import { removeMSAttributes } from '../filters/removemsattributes.js';
1414
import { transformTables } from '../filters/table.js';
1515
import { removeInvalidTableWidth } from '../filters/removeinvalidtablewidth.js';
16+
import { replaceMSFootnotes } from '../filters/replacemsfootnotes.js';
1617
import { ViewUpcastWriter, type ViewDocument } from 'ckeditor5/src/engine.js';
1718
import type { PasteFromOfficeNormalizer, PasteFromOfficeNormalizerData } from '../normalizer.js';
1819

@@ -56,6 +57,7 @@ export class PasteFromOfficeMSWordNormalizer implements PasteFromOfficeNormalize
5657
replaceImagesSourceWithBase64( documentFragment, data.dataTransfer.getData( 'text/rtf' ) );
5758
transformTables( documentFragment, writer );
5859
removeInvalidTableWidth( documentFragment, writer );
60+
replaceMSFootnotes( documentFragment, writer );
5961
removeMSAttributes( documentFragment );
6062

6163
data.content = documentFragment;

0 commit comments

Comments
 (0)