Skip to content

Commit 3e9e42f

Browse files
authored
Fix splitCssText again (#1640)
Fixes a browser 'lock up' at record time due to a presence of large amounts of css in <style> elements, which are split over multiple text nodes, which triggers the new code added in #1437 (see that PR for full explanation of why this all exists). #1437 was not written with performance in mind as it was believed to be an edge case, but things like Grammarly browser extension (#1603) among other scenarios were triggering pathological behavior, some of which was solved in #1615. See also #1640 (comment) for further discussion. * Fix the case when there are multiple matches and we end up not finding a unique one - just go with the best guess when there are many splits by looking at the previous chunk's size * Also add '0px' -> '0' stylesheet normalization, which also fixes the sample problem in a different way * Add new test and modify it so that it can trigger a failure in the absence of the '0px' normalization; there may be other unknown ways of triggering a similar bug, so ensure that the primary 'best guess' method doesn't suffer a regression * Leverage the 'best guess' method so that we can quit after 100 iterations trying to find a unique substring; hopefully this bit along with the `iterLimit` already added will prevent any future pathological cases. Failing example extracted from large files identified by Paul D'Ambra (Posthog) ... see comment from MartinWorkfully: PostHog/posthog-js#1668
1 parent 47a7c3f commit 3e9e42f

File tree

3 files changed

+151
-16
lines changed

3 files changed

+151
-16
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
---
2+
"rrweb-snapshot": patch
3+
"rrweb": patch
4+
---
5+
6+
Improve performance of splitCssText for <style> elements with large css content - see #1603

packages/rrweb-snapshot/src/utils.ts

+62-15
Original file line numberDiff line numberDiff line change
@@ -450,8 +450,19 @@ export function absolutifyURLs(cssText: string | null, href: string): string {
450450
* Intention is to normalize by remove spaces, semicolons and CSS comments
451451
* so that we can compare css as authored vs. output of stringifyStylesheet
452452
*/
453-
export function normalizeCssString(cssText: string): string {
454-
return cssText.replace(/(\/\*[^*]*\*\/)|[\s;]/g, '');
453+
export function normalizeCssString(
454+
cssText: string,
455+
/**
456+
* _testNoPxNorm: only used as part of the 'substring matching going from many to none'
457+
* test case so that it will trigger a failure if the conditions that let to the creation of that test arise again
458+
*/
459+
_testNoPxNorm = false,
460+
): string {
461+
if (_testNoPxNorm) {
462+
return cssText.replace(/(\/\*[^*]*\*\/)|[\s;]/g, '');
463+
} else {
464+
return cssText.replace(/(\/\*[^*]*\*\/)|[\s;]/g, '').replace(/0px/g, '0');
465+
}
455466
}
456467

457468
/**
@@ -463,19 +474,24 @@ export function normalizeCssString(cssText: string): string {
463474
export function splitCssText(
464475
cssText: string,
465476
style: HTMLStyleElement,
477+
_testNoPxNorm = false,
466478
): string[] {
467479
const childNodes = Array.from(style.childNodes);
468480
const splits: string[] = [];
469-
let iterLimit = 0;
481+
let iterCount = 0;
470482
if (childNodes.length > 1 && cssText && typeof cssText === 'string') {
471-
let cssTextNorm = normalizeCssString(cssText);
483+
let cssTextNorm = normalizeCssString(cssText, _testNoPxNorm);
472484
const normFactor = cssTextNorm.length / cssText.length;
473485
for (let i = 1; i < childNodes.length; i++) {
474486
if (
475487
childNodes[i].textContent &&
476488
typeof childNodes[i].textContent === 'string'
477489
) {
478-
const textContentNorm = normalizeCssString(childNodes[i].textContent!);
490+
const textContentNorm = normalizeCssString(
491+
childNodes[i].textContent!,
492+
_testNoPxNorm,
493+
);
494+
const jLimit = 100; // how many iterations for the first part of searching
479495
let j = 3;
480496
for (; j < textContentNorm.length; j++) {
481497
if (
@@ -489,31 +505,62 @@ export function splitCssText(
489505
break;
490506
}
491507
for (; j < textContentNorm.length; j++) {
492-
const bit = textContentNorm.substring(0, j);
508+
let startSubstring = textContentNorm.substring(0, j);
493509
// this substring should appears only once in overall text too
494-
const bits = cssTextNorm.split(bit);
510+
let cssNormSplits = cssTextNorm.split(startSubstring);
495511
let splitNorm = -1;
496-
if (bits.length === 2) {
497-
splitNorm = cssTextNorm.indexOf(bit);
512+
if (cssNormSplits.length === 2) {
513+
splitNorm = cssNormSplits[0].length;
498514
} else if (
499-
bits.length > 2 &&
500-
bits[0] === '' &&
515+
cssNormSplits.length > 2 &&
516+
cssNormSplits[0] === '' &&
501517
childNodes[i - 1].textContent !== ''
502518
) {
503519
// this childNode has same starting content as previous
504-
splitNorm = cssTextNorm.indexOf(bit, 1);
520+
splitNorm = cssTextNorm.indexOf(startSubstring, 1);
521+
} else if (cssNormSplits.length === 1) {
522+
// try to roll back to get multiple matches again
523+
startSubstring = startSubstring.substring(
524+
0,
525+
startSubstring.length - 1,
526+
);
527+
cssNormSplits = cssTextNorm.split(startSubstring);
528+
if (cssNormSplits.length <= 1) {
529+
// no split possible
530+
splits.push(cssText);
531+
return splits;
532+
}
533+
j = jLimit + 1; // trigger end of search
534+
} else if (j === textContentNorm.length - 1) {
535+
// we're about to end loop without a split point
536+
splitNorm = cssTextNorm.indexOf(startSubstring);
537+
}
538+
if (cssNormSplits.length >= 2 && j > jLimit) {
539+
const prevTextContent = childNodes[i - 1].textContent;
540+
if (prevTextContent && typeof prevTextContent === 'string') {
541+
// pick the first matching point which respects the previous chunk's approx size
542+
const prevMinLength = normalizeCssString(prevTextContent).length;
543+
splitNorm = cssTextNorm.indexOf(startSubstring, prevMinLength);
544+
}
545+
if (splitNorm === -1) {
546+
// fall back to pick the first matching point of many
547+
splitNorm = cssNormSplits[0].length;
548+
}
505549
}
506550
if (splitNorm !== -1) {
507551
// find the split point in the original text
508552
let k = Math.floor(splitNorm / normFactor);
509553
for (; k > 0 && k < cssText.length; ) {
510-
iterLimit += 1;
511-
if (iterLimit > 50 * childNodes.length) {
554+
iterCount += 1;
555+
if (iterCount > 50 * childNodes.length) {
512556
// quit for performance purposes
513557
splits.push(cssText);
514558
return splits;
515559
}
516-
const normPart = normalizeCssString(cssText.substring(0, k));
560+
const normPart = normalizeCssString(
561+
cssText.substring(0, k),
562+
_testNoPxNorm,
563+
);
517564
if (normPart.length === splitNorm) {
518565
splits.push(cssText.substring(0, k));
519566
cssText = cssText.substring(k);

packages/rrweb-snapshot/test/css.test.ts

+83-1
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,6 @@ describe('css splitter', () => {
178178
transition: all 4s ease;
179179
}`),
180180
);
181-
// TODO: splitCssText can't handle it yet if both start with .x
182181
style.appendChild(
183182
JSDOM.fragment(`.y {
184183
-moz-transition: all 5s ease;
@@ -227,6 +226,89 @@ describe('css splitter', () => {
227226
}
228227
expect(splitCssText(cssText, style)).toEqual(sections);
229228
});
229+
230+
it('finds css textElement splits correctly, with substring matching going from many to none', () => {
231+
const window = new Window({ url: 'https://localhost:8080' });
232+
const document = window.document;
233+
document.head.innerHTML = `<style>
234+
.section-news-v3-detail .news-cnt-wrapper :where(p):not(:where([class~="not-prose"], [class~="not-prose"] *)) {
235+
margin-top: 0px;
236+
margin-bottom: 0px;
237+
}
238+
239+
.section-news-v3-detail .news-cnt-wrapper .plugins-wrapper2 :where(figure):not(:where([class~="not-prose"],[class~="not-prose"] *)) {
240+
margin-top: 2em;
241+
margin-bottom: 2em;
242+
}
243+
244+
.section-news-v3-detail .news-cnt-wrapper .plugins-wrapper2 :where(.prose > :first-child):not(:where([class~="not-prose"],[cl</style>`;
245+
const style = document.querySelector('style');
246+
if (style) {
247+
// happydom? bug avoid: strangely a greater than symbol in the template string below
248+
// e.g. '.prose > :last-child' causes more than one child to be appended
249+
style.append(`ass~="not-prose"] *)) {
250+
margin-top: 0; /* cssRules transforms this to '0px' which was preventing matching prior to normalization */
251+
}
252+
253+
.section-news-v3-detail .news-cnt-wrapper .plugins-wrapper2 :where(.prose :last-child):not(:where([class~="not-prose"],[class~="not-prose"] *)) {
254+
margin-bottom: 0;
255+
}
256+
257+
.section-news-v3-detail .news-cnt-wrapper .plugins-wrapper2 {
258+
width: 100%;
259+
overflow-wrap: break-word;
260+
}
261+
262+
.section-home {
263+
height: 100%;
264+
overflow-y: auto;
265+
}
266+
`);
267+
268+
expect(style.childNodes.length).toEqual(2);
269+
270+
const expected = [
271+
'.section-news-v3-detail .news-cnt-wrapper :where(p):not(:where([class~="not-prose"], [class~="not-prose"] *)) { margin-top: 0px; margin-bottom: 0px; }.section-news-v3-detail .news-cnt-wrapper .plugins-wrapper2 :where(figure):not(:where([class~="not-prose"],[class~="not-prose"] *)) { margin-top: 2em; margin-bottom: 2em; }.section-news-v3-detail .news-cnt-wrapper .plugins-wrapper2 :where(.prose > :first-child):not(:where([class~="not-prose"],[cl',
272+
'ass~="not-prose"] *)) { margin-top: 0px; }.section-news-v3-detail .news-cnt-wrapper .plugins-wrapper2 :where(.prose :last-child):not(:where([class~="not-prose"],[class~="not-prose"] *)) { margin-bottom: 0px; }.section-news-v3-detail .news-cnt-wrapper .plugins-wrapper2 { width: 100%; overflow-wrap: break-word; }.section-home { height: 100%; overflow-y: auto; }',
273+
];
274+
const browserSheet = expected.join('');
275+
expect(stringifyStylesheet(style.sheet!)).toEqual(browserSheet);
276+
let _testNoPxNorm = true; // trigger the original motivating scenario for this test
277+
expect(splitCssText(browserSheet, style, _testNoPxNorm)).toEqual(
278+
expected,
279+
);
280+
_testNoPxNorm = false; // this case should also be solved by normalizing '0px' -> '0'
281+
expect(splitCssText(browserSheet, style, _testNoPxNorm)).toEqual(
282+
expected,
283+
);
284+
}
285+
});
286+
287+
it('finds css textElement splits correctly, even with repeated sections', () => {
288+
const window = new Window({ url: 'https://localhost:8080' });
289+
const document = window.document;
290+
document.head.innerHTML =
291+
'<style>.a{background-color: black; } </style>';
292+
const style = document.querySelector('style');
293+
if (style) {
294+
style.append('.x{background-color:red;}');
295+
style.append('.b {background-color:black;}');
296+
style.append('.x{background-color:red;}');
297+
style.append('.c{ background-color: black}');
298+
299+
const expected = [
300+
'.a { background-color: black; }',
301+
'.x { background-color: red; }',
302+
'.b { background-color: black; }',
303+
'.x { background-color: red; }',
304+
'.c { background-color: black; }',
305+
];
306+
const browserSheet = expected.join('');
307+
expect(stringifyStylesheet(style.sheet!)).toEqual(browserSheet);
308+
309+
expect(splitCssText(browserSheet, style)).toEqual(expected);
310+
}
311+
});
230312
});
231313

232314
describe('applyCssSplits css rejoiner', function () {

0 commit comments

Comments
 (0)