Skip to content

Commit 0e98260

Browse files
authored
Merge pull request #4411 from udecode/claude/issue-1481-20250626_174106
2 parents a494347 + 9527c7f commit 0e98260

File tree

4 files changed

+365
-0
lines changed

4 files changed

+365
-0
lines changed
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
'@platejs/core': patch
3+
---
4+
5+
- Fixed BR tags between block elements from Google Docs creating two empty paragraphs instead of one. The deserialization now correctly converts BR tags between blocks to single empty paragraphs.

packages/core/src/lib/plugins/html/utils/deserializeHtml.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ export const deserializeHtml = (
2525
if (typeof element === 'string') {
2626
element = htmlStringToDOMNode(element);
2727
}
28+
2829
if (shouldCollapseWhiteSpace) {
2930
element = collapseWhiteSpace(element);
3031
}

packages/core/src/lib/plugins/html/utils/deserializeHtmlNode.ts

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,69 @@ import { htmlBrToNewLine } from './htmlBrToNewLine';
77
import { htmlElementToElement } from './htmlElementToElement';
88
import { htmlElementToLeaf } from './htmlElementToLeaf';
99
import { htmlTextNodeToString } from './htmlTextNodeToString';
10+
import { inlineTagNames } from './inlineTagNames';
1011
import { isHtmlElement } from './isHtmlElement';
1112

13+
/** Check if an element is a block-level element. */
14+
const isBlockElement = (element: Element | null): boolean => {
15+
if (!element) return false;
16+
17+
return !inlineTagNames.has(element.tagName);
18+
};
19+
20+
/** Check if a BR tag should be converted to an empty paragraph. */
21+
const shouldBrBecomeEmptyParagraph = (node: Element): boolean => {
22+
if (node.nodeName !== 'BR') return false;
23+
24+
// Skip Apple-interchange-newline BR tags
25+
if ((node as HTMLBRElement).className === 'Apple-interchange-newline') {
26+
return false;
27+
}
28+
29+
const parent = node.parentElement;
30+
if (!parent) return false;
31+
32+
// Check immediate parent for text-containing elements
33+
// BR tags inside P or SPAN should remain as line breaks
34+
if (parent.tagName === 'P' || parent.tagName === 'SPAN') {
35+
return false;
36+
}
37+
38+
// Check if BR has adjacent text content at the same DOM level
39+
const hasAdjacentText = () => {
40+
// Check previous siblings for direct text nodes only
41+
let sibling: Node | null = node.previousSibling;
42+
while (sibling) {
43+
if (sibling.nodeType === Node.TEXT_NODE && sibling.textContent?.trim()) {
44+
return true;
45+
}
46+
// Don't check element content, only direct text nodes
47+
sibling = sibling.previousSibling;
48+
}
49+
50+
// Check next siblings for direct text nodes only
51+
sibling = node.nextSibling;
52+
while (sibling) {
53+
if (sibling.nodeType === Node.TEXT_NODE && sibling.textContent?.trim()) {
54+
return true;
55+
}
56+
// Don't check element content, only direct text nodes
57+
sibling = sibling.nextSibling;
58+
}
59+
60+
return false;
61+
};
62+
63+
// If BR has adjacent text, it should be a line break
64+
if (hasAdjacentText()) {
65+
return false;
66+
}
67+
68+
// For Google Docs: standalone BR tags inside structural elements (B, TD, DIV, etc.)
69+
// should become empty paragraphs
70+
return true;
71+
};
72+
1273
/** Deserialize HTML element or child node. */
1374
export const deserializeHtmlNode =
1475
(editor: SlateEditor) =>
@@ -18,6 +79,22 @@ export const deserializeHtmlNode =
1879
if (textNode) return textNode;
1980
if (!isHtmlElement(node)) return null;
2081

82+
// Convert BR tags to empty paragraphs when appropriate (e.g., from Google Docs)
83+
if (shouldBrBecomeEmptyParagraph(node)) {
84+
return {
85+
children: [{ text: '' }],
86+
type: editor.getType('p'),
87+
};
88+
}
89+
90+
// Skip Apple-interchange-newline BR tags (already handled in shouldBrBecomeEmptyParagraph)
91+
if (
92+
node.nodeName === 'BR' &&
93+
(node as HTMLBRElement).className === 'Apple-interchange-newline'
94+
) {
95+
return null;
96+
}
97+
2198
// break line
2299
const breakLine = htmlBrToNewLine(node);
23100

Lines changed: 282 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,282 @@
1+
/** @jsx jsxt */
2+
3+
import { getHtmlDocument, jsxt } from '@platejs/test-utils';
4+
5+
import { createPlateEditor } from '../../../../react';
6+
import { deserializeHtml } from './deserializeHtml';
7+
8+
jsxt;
9+
10+
describe('deserializeHtml - Google Docs', () => {
11+
it('should create single empty paragraphs from BR tags between paragraphs', () => {
12+
const editor = createPlateEditor({ plugins: [] });
13+
14+
// HTML structure from Google Docs with BR tags between paragraphs
15+
const html = `
16+
<p>Hello world</p>
17+
<br />
18+
<p>Hello World</p>
19+
<br />
20+
<p>Hello World</p>
21+
<br />
22+
<p>Hello World</p>
23+
`;
24+
25+
const element = getHtmlDocument(html).body;
26+
27+
const output = (
28+
<editor>
29+
<hp>Hello world</hp>
30+
<hp>
31+
<htext />
32+
</hp>
33+
<hp>Hello World</hp>
34+
<hp>
35+
<htext />
36+
</hp>
37+
<hp>Hello World</hp>
38+
<hp>
39+
<htext />
40+
</hp>
41+
<hp>Hello World</hp>
42+
</editor>
43+
) as any;
44+
45+
const result = deserializeHtml(editor, { element });
46+
47+
expect(result).toEqual(output.children);
48+
});
49+
50+
it('should preserve BR tags within paragraphs', () => {
51+
const editor = createPlateEditor({ plugins: [] });
52+
53+
const html = `<p>Line 1<br />Line 2</p>`;
54+
const element = getHtmlDocument(html).body;
55+
56+
const output = (
57+
<editor>
58+
<hp>Line 1{'\n'}Line 2</hp>
59+
</editor>
60+
) as any;
61+
62+
const result = deserializeHtml(editor, { element });
63+
64+
expect(result).toEqual(output.children);
65+
});
66+
67+
it('should handle complex Google Docs HTML', () => {
68+
const editor = createPlateEditor({ plugins: [] });
69+
70+
// Actual HTML structure from the issue
71+
const html = `
72+
<b style="font-weight:normal;" id="docs-internal-guid-0753e24d-7fff-d209-84cc-3361f30177bf">
73+
<p dir="ltr" style="line-height:1.38;margin-top:0pt;margin-bottom:0pt;">
74+
<span style="font-size:11pt;font-family:Arial;color:#000000;background-color:transparent;font-weight:400;font-style:normal;font-variant:normal;text-decoration:none;vertical-align:baseline;white-space:pre;white-space:pre-wrap;">Hello world</span>
75+
</p>
76+
<br />
77+
<p dir="ltr" style="line-height:1.38;margin-top:0pt;margin-bottom:0pt;">
78+
<span style="font-size:11pt;font-family:Arial;color:#000000;background-color:transparent;font-weight:400;font-style:normal;font-variant:normal;text-decoration:none;vertical-align:baseline;white-space:pre;white-space:pre-wrap;">Hello World</span>
79+
</p>
80+
<br />
81+
<p dir="ltr" style="line-height:1.38;margin-top:0pt;margin-bottom:0pt;">
82+
<span style="font-size:11pt;font-family:Arial;color:#000000;background-color:transparent;font-weight:400;font-style:normal;font-variant:normal;text-decoration:none;vertical-align:baseline;white-space:pre;white-space:pre-wrap;">Hello World</span>
83+
</p>
84+
<br />
85+
<p dir="ltr" style="line-height:1.38;margin-top:0pt;margin-bottom:0pt;">
86+
<span style="font-size:11pt;font-family:Arial;color:#000000;background-color:transparent;font-weight:400;font-style:normal;font-variant:normal;text-decoration:none;vertical-align:baseline;white-space:pre;white-space:pre-wrap;">Hello World</span>
87+
</p>
88+
</b>
89+
<br class="Apple-interchange-newline">
90+
`;
91+
92+
const element = getHtmlDocument(html).body;
93+
const result = deserializeHtml(editor, { element });
94+
95+
// Should have 7 elements: 4 paragraphs with content + 3 empty paragraphs from BR tags
96+
expect(result).toHaveLength(7);
97+
expect(result[0].type).toBe('p');
98+
expect(result[1].type).toBe('p');
99+
expect(result[2].type).toBe('p');
100+
expect(result[3].type).toBe('p');
101+
expect(result[4].type).toBe('p');
102+
expect(result[5].type).toBe('p');
103+
expect(result[6].type).toBe('p');
104+
105+
// Check that empty paragraphs are at the right positions
106+
expect((result[1] as any).children[0].text).toBe('');
107+
expect((result[3] as any).children[0].text).toBe('');
108+
expect((result[5] as any).children[0].text).toBe('');
109+
});
110+
111+
it('should preserve BR tags within paragraphs as separate text nodes', () => {
112+
const editor = createPlateEditor({ plugins: [] });
113+
114+
const html = `<p><span>Hello</span><br /><span>World</span></p>`;
115+
const element = getHtmlDocument(html).body;
116+
117+
const result = deserializeHtml(editor, { element });
118+
119+
// BR tags are converted to newline text nodes
120+
// Note: Text nodes are not merged during deserialization
121+
expect(result).toHaveLength(1);
122+
expect((result[0] as any).type).toBe('p');
123+
expect((result[0] as any).children).toHaveLength(3);
124+
expect((result[0] as any).children[0].text).toBe('Hello');
125+
expect((result[0] as any).children[1].text).toBe('\n');
126+
expect((result[0] as any).children[2].text).toBe('World');
127+
});
128+
129+
it('should handle two consecutive BR tags between paragraphs', () => {
130+
const editor = createPlateEditor({ plugins: [] });
131+
132+
const html = `
133+
<p>First paragraph</p>
134+
<br />
135+
<br />
136+
<p>Second paragraph</p>
137+
`;
138+
139+
const element = getHtmlDocument(html).body;
140+
141+
const output = (
142+
<editor>
143+
<hp>First paragraph</hp>
144+
<hp>
145+
<htext />
146+
</hp>
147+
<hp>
148+
<htext />
149+
</hp>
150+
<hp>Second paragraph</hp>
151+
</editor>
152+
) as any;
153+
154+
const result = deserializeHtml(editor, { element });
155+
156+
expect(result).toEqual(output.children);
157+
});
158+
159+
it('should handle three consecutive BR tags between paragraphs', () => {
160+
const editor = createPlateEditor({ plugins: [] });
161+
162+
const html = `
163+
<p>First paragraph</p>
164+
<br />
165+
<br />
166+
<br />
167+
<p>Second paragraph</p>
168+
`;
169+
170+
const element = getHtmlDocument(html).body;
171+
172+
const output = (
173+
<editor>
174+
<hp>First paragraph</hp>
175+
<hp>
176+
<htext />
177+
</hp>
178+
<hp>
179+
<htext />
180+
</hp>
181+
<hp>
182+
<htext />
183+
</hp>
184+
<hp>Second paragraph</hp>
185+
</editor>
186+
) as any;
187+
188+
const result = deserializeHtml(editor, { element });
189+
190+
expect(result).toEqual(output.children);
191+
});
192+
193+
it('should handle multiple consecutive BR tags in complex Google Docs HTML', () => {
194+
const editor = createPlateEditor({ plugins: [] });
195+
196+
const html = `
197+
<b style="font-weight:normal;">
198+
<p dir="ltr">Content 1</p>
199+
<br />
200+
<br />
201+
<p dir="ltr">Content 2</p>
202+
<br />
203+
<br />
204+
<br />
205+
<p dir="ltr">Content 3</p>
206+
</b>
207+
`;
208+
209+
const element = getHtmlDocument(html).body;
210+
const result = deserializeHtml(editor, { element });
211+
212+
// Should have 8 elements: 3 paragraphs with content + 5 empty paragraphs from BR tags
213+
expect(result).toHaveLength(8);
214+
215+
// Check content paragraphs
216+
expect((result[0] as any).children[0].text).toBe('Content 1');
217+
expect((result[3] as any).children[0].text).toBe('Content 2');
218+
expect((result[7] as any).children[0].text).toBe('Content 3');
219+
220+
// Check empty paragraphs from BR tags
221+
expect((result[1] as any).children[0].text).toBe('');
222+
expect((result[2] as any).children[0].text).toBe('');
223+
expect((result[4] as any).children[0].text).toBe('');
224+
expect((result[5] as any).children[0].text).toBe('');
225+
expect((result[6] as any).children[0].text).toBe('');
226+
});
227+
228+
it('should handle three consecutive BR tags not between blocks', () => {
229+
const editor = createPlateEditor({ plugins: [] });
230+
231+
// 3 BR tags at the start, not between blocks
232+
const html = `
233+
<br />
234+
<br />
235+
<br />
236+
`;
237+
238+
const element = getHtmlDocument(html).body;
239+
240+
const output = (
241+
<editor>
242+
<hp>
243+
<htext />
244+
</hp>
245+
<hp>
246+
<htext />
247+
</hp>
248+
<hp>
249+
<htext />
250+
</hp>
251+
</editor>
252+
) as any;
253+
254+
const result = deserializeHtml(editor, { element });
255+
256+
expect(result).toEqual(output.children);
257+
});
258+
259+
it('should handle BR tags in various contexts within a div', () => {
260+
const editor = createPlateEditor({ plugins: [] });
261+
262+
const html = `
263+
<div>
264+
<br />
265+
<br />
266+
<br />
267+
<p>Content</p>
268+
</div>
269+
`;
270+
271+
const element = getHtmlDocument(html).body;
272+
273+
const result = deserializeHtml(editor, { element });
274+
275+
// Should have 4 elements: 3 empty paragraphs from BR tags + 1 paragraph with content
276+
expect(result).toHaveLength(4);
277+
expect((result[0] as any).children[0].text).toBe('');
278+
expect((result[1] as any).children[0].text).toBe('');
279+
expect((result[2] as any).children[0].text).toBe('');
280+
expect((result[3] as any).children[0].text).toBe('Content');
281+
});
282+
});

0 commit comments

Comments
 (0)