Fix splitCssText again (#1640)

eoghanmurray · web-flow · commit 3e9e42fdfd63 · 2025-02-06T13:46:46.000Z
Fixes a browser 'lock up' at record time due to a presence of large amounts of css in <style> elements, which are split over multiple text nodes, which triggers the new code added in #1437 (see that PR for full explanation of why this all exists). #1437 was not written with performance in mind as it was believed to be an edge case, but things like Grammarly browser extension (#1603) among other scenarios were triggering pathological behavior, some of which was solved in #1615. See also #1640 (comment) for further discussion. * Fix the case when there are multiple matches and we end up not finding a unique one - just go with the best guess when there are many splits by looking at the previous chunk's size * Also add '0px' -> '0' stylesheet normalization, which also fixes the sample problem in a different way * Add new test and modify it so that it can trigger a failure in the absence of the '0px' normalization; there may be other unknown ways of triggering a similar bug, so ensure that the primary 'best guess' method doesn't suffer a regression * Leverage the 'best guess' method so that we can quit after 100 iterations trying to find a unique substring; hopefully this bit along with the `iterLimit` already added will prevent any future pathological cases. Failing example extracted from large files identified by Paul D'Ambra (Posthog) ... see comment from MartinWorkfully: PostHog/posthog-js#1668
diff --git a/.changeset/efficiently-splitCssText-1640.md b/.changeset/efficiently-splitCssText-1640.md
@@ -0,0 +1,6 @@
+---
+"rrweb-snapshot": patch
+"rrweb": patch
+---
+
+Improve performance of splitCssText for <style> elements with large css content - see #1603
diff --git a/packages/rrweb-snapshot/src/utils.ts b/packages/rrweb-snapshot/src/utils.ts
@@ -450,8 +450,19 @@ export function absolutifyURLs(cssText: string | null, href: string): string {
  * Intention is to normalize by remove spaces, semicolons and CSS comments
  * so that we can compare css as authored vs. output of stringifyStylesheet
  */
-export function normalizeCssString(cssText: string): string {
-  return cssText.replace(/(\/\*[^*]*\*\/)|[\s;]/g, '');
+export function normalizeCssString(
+  cssText: string,
+  /**
+   * _testNoPxNorm: only used as part of the 'substring matching going from many to none'
+   * test case so that it will trigger a failure if the conditions that let to the creation of that test arise again
+   */
+  _testNoPxNorm = false,
+): string {
+  if (_testNoPxNorm) {
+    return cssText.replace(/(\/\*[^*]*\*\/)|[\s;]/g, '');
+  } else {
+    return cssText.replace(/(\/\*[^*]*\*\/)|[\s;]/g, '').replace(/0px/g, '0');
+  }
 }
 
 /**
@@ -463,19 +474,24 @@ export function normalizeCssString(cssText: string): string {
 export function splitCssText(
   cssText: string,
   style: HTMLStyleElement,
+  _testNoPxNorm = false,
 ): string[] {
   const childNodes = Array.from(style.childNodes);
   const splits: string[] = [];
-  let iterLimit = 0;
+  let iterCount = 0;
   if (childNodes.length > 1 && cssText && typeof cssText === 'string') {
-    let cssTextNorm = normalizeCssString(cssText);
+    let cssTextNorm = normalizeCssString(cssText, _testNoPxNorm);
     const normFactor = cssTextNorm.length / cssText.length;
     for (let i = 1; i < childNodes.length; i++) {
       if (
         childNodes[i].textContent &&
         typeof childNodes[i].textContent === 'string'
       ) {
-        const textContentNorm = normalizeCssString(childNodes[i].textContent!);
+        const textContentNorm = normalizeCssString(
+          childNodes[i].textContent!,
+          _testNoPxNorm,
+        );
+        const jLimit = 100; // how many iterations for the first part of searching
         let j = 3;
         for (; j < textContentNorm.length; j++) {
           if (
@@ -489,31 +505,62 @@ export function splitCssText(
           break;
         }
         for (; j < textContentNorm.length; j++) {
-          const bit = textContentNorm.substring(0, j);
+          let startSubstring = textContentNorm.substring(0, j);
           // this substring should appears only once in overall text too
-          const bits = cssTextNorm.split(bit);
+          let cssNormSplits = cssTextNorm.split(startSubstring);
           let splitNorm = -1;
-          if (bits.length === 2) {
-            splitNorm = cssTextNorm.indexOf(bit);
+          if (cssNormSplits.length === 2) {
+            splitNorm = cssNormSplits[0].length;
           } else if (
-            bits.length > 2 &&
-            bits[0] === '' &&
+            cssNormSplits.length > 2 &&
+            cssNormSplits[0] === '' &&
             childNodes[i - 1].textContent !== ''
           ) {
             // this childNode has same starting content as previous
-            splitNorm = cssTextNorm.indexOf(bit, 1);
+            splitNorm = cssTextNorm.indexOf(startSubstring, 1);
+          } else if (cssNormSplits.length === 1) {
+            // try to roll back to get multiple matches again
+            startSubstring = startSubstring.substring(
+              0,
+              startSubstring.length - 1,
+            );
+            cssNormSplits = cssTextNorm.split(startSubstring);
+            if (cssNormSplits.length <= 1) {
+              // no split possible
+              splits.push(cssText);
+              return splits;
+            }
+            j = jLimit + 1; // trigger end of search
+          } else if (j === textContentNorm.length - 1) {
+            // we're about to end loop without a split point
+            splitNorm = cssTextNorm.indexOf(startSubstring);
+          }
+          if (cssNormSplits.length >= 2 && j > jLimit) {
+            const prevTextContent = childNodes[i - 1].textContent;
+            if (prevTextContent && typeof prevTextContent === 'string') {
+              // pick the first matching point which respects the previous chunk's approx size
+              const prevMinLength = normalizeCssString(prevTextContent).length;
+              splitNorm = cssTextNorm.indexOf(startSubstring, prevMinLength);
+            }
+            if (splitNorm === -1) {
+              // fall back to pick the first matching point of many
+              splitNorm = cssNormSplits[0].length;
+            }
           }
           if (splitNorm !== -1) {
             // find the split point in the original text
             let k = Math.floor(splitNorm / normFactor);
             for (; k > 0 && k < cssText.length; ) {
-              iterLimit += 1;
-              if (iterLimit > 50 * childNodes.length) {
+              iterCount += 1;
+              if (iterCount > 50 * childNodes.length) {
                 // quit for performance purposes
                 splits.push(cssText);
                 return splits;
               }
-              const normPart = normalizeCssString(cssText.substring(0, k));
+              const normPart = normalizeCssString(
+                cssText.substring(0, k),
+                _testNoPxNorm,
+              );
               if (normPart.length === splitNorm) {
                 splits.push(cssText.substring(0, k));
                 cssText = cssText.substring(k);
diff --git a/packages/rrweb-snapshot/test/css.test.ts b/packages/rrweb-snapshot/test/css.test.ts
@@ -178,7 +178,6 @@ describe('css splitter', () => {
   transition: all 4s ease;
 }`),
       );
-      // TODO: splitCssText can't handle it yet if both start with .x
       style.appendChild(
         JSDOM.fragment(`.y {
   -moz-transition: all 5s ease;
@@ -227,6 +226,89 @@ describe('css splitter', () => {
     }
     expect(splitCssText(cssText, style)).toEqual(sections);
   });
+
+  it('finds css textElement splits correctly, with substring matching going from many to none', () => {
+    const window = new Window({ url: 'https://localhost:8080' });
+    const document = window.document;
+    document.head.innerHTML = `<style>
+.section-news-v3-detail .news-cnt-wrapper :where(p):not(:where([class~="not-prose"], [class~="not-prose"] *)) {
+    margin-top: 0px;
+    margin-bottom: 0px;
+}
+
+.section-news-v3-detail .news-cnt-wrapper .plugins-wrapper2 :where(figure):not(:where([class~="not-prose"],[class~="not-prose"] *)) {
+    margin-top: 2em;
+    margin-bottom: 2em;
+}
+
+.section-news-v3-detail .news-cnt-wrapper .plugins-wrapper2 :where(.prose > :first-child):not(:where([class~="not-prose"],[cl</style>`;
+    const style = document.querySelector('style');
+    if (style) {
+      // happydom? bug avoid: strangely a greater than symbol in the template string below
+      // e.g. '.prose > :last-child' causes more than one child to be appended
+      style.append(`ass~="not-prose"] *)) {
+    margin-top: 0;  /* cssRules transforms this to '0px' which was preventing matching prior to normalization */
+}
+
+.section-news-v3-detail .news-cnt-wrapper .plugins-wrapper2 :where(.prose :last-child):not(:where([class~="not-prose"],[class~="not-prose"] *)) {
+    margin-bottom: 0;
+}
+
+.section-news-v3-detail .news-cnt-wrapper .plugins-wrapper2 {
+    width: 100%;
+    overflow-wrap: break-word;
+}
+
+.section-home {
+    height: 100%;
+    overflow-y: auto;
+}
+`);
+
+      expect(style.childNodes.length).toEqual(2);
+
+      const expected = [
+        '.section-news-v3-detail .news-cnt-wrapper :where(p):not(:where([class~="not-prose"], [class~="not-prose"] *)) { margin-top: 0px; margin-bottom: 0px; }.section-news-v3-detail .news-cnt-wrapper .plugins-wrapper2 :where(figure):not(:where([class~="not-prose"],[class~="not-prose"] *)) { margin-top: 2em; margin-bottom: 2em; }.section-news-v3-detail .news-cnt-wrapper .plugins-wrapper2 :where(.prose > :first-child):not(:where([class~="not-prose"],[cl',
+        'ass~="not-prose"] *)) { margin-top: 0px; }.section-news-v3-detail .news-cnt-wrapper .plugins-wrapper2 :where(.prose :last-child):not(:where([class~="not-prose"],[class~="not-prose"] *)) { margin-bottom: 0px; }.section-news-v3-detail .news-cnt-wrapper .plugins-wrapper2 { width: 100%; overflow-wrap: break-word; }.section-home { height: 100%; overflow-y: auto; }',
+      ];
+      const browserSheet = expected.join('');
+      expect(stringifyStylesheet(style.sheet!)).toEqual(browserSheet);
+      let _testNoPxNorm = true; // trigger the original motivating scenario for this test
+      expect(splitCssText(browserSheet, style, _testNoPxNorm)).toEqual(
+        expected,
+      );
+      _testNoPxNorm = false; // this case should also be solved by normalizing '0px' -> '0'
+      expect(splitCssText(browserSheet, style, _testNoPxNorm)).toEqual(
+        expected,
+      );
+    }
+  });
+
+  it('finds css textElement splits correctly, even with repeated sections', () => {
+    const window = new Window({ url: 'https://localhost:8080' });
+    const document = window.document;
+    document.head.innerHTML =
+      '<style>.a{background-color: black; }        </style>';
+    const style = document.querySelector('style');
+    if (style) {
+      style.append('.x{background-color:red;}');
+      style.append('.b      {background-color:black;}');
+      style.append('.x{background-color:red;}');
+      style.append('.c{      background-color:                     black}');
+
+      const expected = [
+        '.a { background-color: black; }',
+        '.x { background-color: red; }',
+        '.b { background-color: black; }',
+        '.x { background-color: red; }',
+        '.c { background-color: black; }',
+      ];
+      const browserSheet = expected.join('');
+      expect(stringifyStylesheet(style.sheet!)).toEqual(browserSheet);
+
+      expect(splitCssText(browserSheet, style)).toEqual(expected);
+    }
+  });
 });
 
 describe('applyCssSplits css rejoiner', function () {