Fix flaky buck2 core golden tests by stripping spurious glog lines

scottcao · meta-codesync[bot] · commit 5479de18e768 · 2026-06-26T23:26:28.000-07:00
Summary:
Three buck2 core tests were intermittently failing because C++ libraries linked into the buck2 binary (and the tools it spawns) emit a glog-format INFO line to stderr during process init, e.g. `I0623 15:40:41.926481 128942 Hash.cpp:327] tiHash seed: 12824087ull` (from `ti/common/utils/Hash.cpp`). Whether and when these lines appear in captured stderr varies between runs, so they leak non-deterministically into the output that golden tests compare against:

- `test_errors.py::test_package_listing_errors`
- `test_formatting.py::test_bxl_with_stacktrace`
- `test_error_categorization.py::test_init_data_timeout`

The line uses the standard glog prefix (`[WIEF]MMDD HH:MM:SS.dddddd`), which is distinct from buck2's own tracing-subscriber `[&lt;TIMESTAMP&gt;]` format, so it can be filtered safely. This adds a shared `strip_glog_lines` helper to `golden.py` that drops whole lines carrying the glog marker while preserving all other newlines (so callers' trailing/blank-line structure is unchanged), and applies it in the central `sanitize_stderr` plus the two tests that use custom sanitizers. The existing copy-pasted glog filter in `test_oom_detection.py` is consolidated onto the shared helper.

The marker is matched anywhere in the line so indented occurrences (e.g. when nested inside an embedded daemon stderr block, as in `test_init_data_timeout`) are also stripped.

___

Differential Revision: D109498020

fbshipit-source-id: 463221a583984f1fdf3ef40e234c889de325bc85
diff --git a/tests/core/errors/test_errors.py b/tests/core/errors/test_errors.py
@@ -12,7 +12,7 @@
 from buck2.tests.e2e_util.api.buck import Buck
 from buck2.tests.e2e_util.asserts import expect_failure
 from buck2.tests.e2e_util.buck_workspace import buck_test, env
-from buck2.tests.e2e_util.helper.golden import golden
+from buck2.tests.e2e_util.helper.golden import golden, strip_glog_lines
 
 
 @buck_test()
@@ -72,7 +72,10 @@ async def test_package_listing_errors(buck: Buck) -> None:
         )
         outs.append(stripped_stderr)
 
-    golden(output="\n\n\n".join(outs), rel_path="package_listing/expected.golden.out")
+    golden(
+        output=strip_glog_lines("\n\n\n".join(outs)),
+        rel_path="package_listing/expected.golden.out",
+    )
 
 
 @buck_test(
diff --git a/tests/core/errors/test_formatting.py b/tests/core/errors/test_formatting.py
@@ -14,10 +14,11 @@
 from buck2.tests.e2e_util.api.buck import Buck
 from buck2.tests.e2e_util.asserts import expect_failure
 from buck2.tests.e2e_util.buck_workspace import buck_test
-from buck2.tests.e2e_util.helper.golden import golden
+from buck2.tests.e2e_util.helper.golden import golden, strip_glog_lines
 
 
 def _sanitize(s: str) -> str:
+    s = strip_glog_lines(s)
     # Remove configuration hashes
     s = re.sub(r"\b[0-9a-f]{16}\b", "<HASH>", s)
     # And action digests
diff --git a/tests/e2e_util/helper/golden.py b/tests/e2e_util/helper/golden.py
@@ -169,7 +169,25 @@ def sanitize_hashes(s: str) -> str:
     return re.sub(r"\b[0-9a-f]{40}:[0-9]{1,3}\b", "<DIGEST>", s)
 
 
+# C++ libraries linked into buck2 (and the tools it spawns) can emit glog-format
+# lines such as `I0623 15:40:41.926481 128942 Hash.cpp:327] tiHash seed: ...ull`
+# to stderr during process init. Their timestamps, PIDs, and source locations
+# vary between runs, so they leak non-deterministically into captured stderr and
+# must be dropped before comparing against goldens. The marker may be indented
+# (e.g. when nested inside an embedded daemon stderr block), so we match it
+# anywhere in the line.
+_GLOG_LINE_RE = re.compile(r".*[WIEF]\d{4} \d{2}:\d{2}:\d{2}\.\d{6}.*\n?")
+
+
+def strip_glog_lines(s: str) -> str:
+    # Drops whole matching lines (leading indentation and trailing newline
+    # included) while leaving every other newline intact, so callers' trailing
+    # and blank-line structure is preserved.
+    return _GLOG_LINE_RE.sub("", s)
+
+
 def sanitize_stderr(s: str) -> str:
+    s = strip_glog_lines(s)
     # Remove all timestamps
     s = re.sub(r"\[.{29}\]", "[<TIMESTAMP>]", s)
     # Remove all UUIDs