cBioPortal · alisman · May 15, 2026
diff --git a/end-to-end-test-playwright/.gitignore b/end-to-end-test-playwright/.gitignore
@@ -6,3 +6,9 @@ playwright/.cache/
 # Host-mode snapshots (generated by `npm test` / `npm run test:update`).
 # Only Docker-generated snapshots under __snapshots__/ are authoritative.
 tests/**/__local_snapshots__/
+
+# HAR fixtures recorded via PW_HAR_MODE=record. Large binary blobs
+# (cbioportal API responses + assets); committing them would bloat the
+# repo. For now record/replay is a local-only workflow — see the
+# "HAR record/replay" section of README.md.
+tests/**/__hars__/
diff --git a/end-to-end-test-playwright/README.md b/end-to-end-test-playwright/README.md
@@ -86,6 +86,50 @@ localdb tests without waiting for a Docker pull.
 script name is forwarded to `playwright test`, so flags like
 `--debug`, `--headed`, `--grep`, `--trace on` all work.
 
+## HAR record/replay (optional)
+
+Backend response timing is a major source of remote_e2e flakiness — the
+same test, same code, but a slightly different DOM at screenshot time
+because a dozen XHRs landed in a slightly different order. The
+`fixtures.ts` `context` fixture can route `*.cbioportal.org` traffic
+through a per-test [Playwright HAR
+file](https://playwright.dev/docs/mock#mocking-with-har-files) so the
+network is byte-identical run to run.
+
+```bash
+# Record fixtures for a spec (passes through to the real network AND
+# saves responses to tests/__hars__/<spec>/<test-slug>.har plus a few
+# sibling response-body files).
+PW_HAR_MODE=record pnpm exec playwright test tests/config.spec.ts
+
+# Replay fixtures (network is mocked; missing entries fall back to the
+# real network so an incomplete HAR still works while you grow it).
+PW_HAR_MODE=replay pnpm exec playwright test tests/config.spec.ts
+
+# Default — `PW_HAR_MODE` unset → the fixture is a no-op and tests hit
+# the real backend, same as today.
+pnpm exec playwright test tests/config.spec.ts
+```
+
+Scope is restricted to `*.cbioportal.org` so the local frontend bundle
+(`localhost:3000` in LOCALDEV mode) and third-party APIs (OncoKB,
+Genome Nexus) flow through untouched — the HAR captures only the
+variable-timing backend, not the code under test.
+
+HAR fixtures are large (~5 MB per test) and currently **gitignored**;
+treat record/replay as a local-only workflow for now. Whether to commit
+them, LFS them, or run a periodic refresh job in CI is an open
+question. Once a workflow is chosen, this section will be updated and
+CI can flip `PW_HAR_MODE=replay` to lock the backend behind
+deterministic fixtures.
+
+When fixtures are present, screenshot baselines should be regenerated
+under `PW_HAR_MODE=replay`. The first screenshot capture under HAR
+replay differs slightly from the recorded one (one-off render-order
+difference); subsequent replays then match the new baseline byte for
+byte. Anecdotally on `patient-screenshot.spec.ts`: three back-to-back
+replays produced identical pixels every time.
+
 ## Updating references when a real visual change lands
 
 1. Land the code change.

diff --git a/end-to-end-test-playwright/fixtures.ts b/end-to-end-test-playwright/fixtures.ts
@@ -1,10 +1,13 @@
+import fs from 'fs';
+import path from 'path';
 import {
     test as baseTest,
     expect,
     Browser,
     BrowserContext,
     Locator,
     Page,
+    TestInfo,
 } from '@playwright/test';
 
 // LOCALDEV defaults ON: the suite runs against the public cbioportal
@@ -79,11 +82,90 @@ function patchBrowser(browser: Browser): Browser {
     return browser;
 }
 
+// HAR record/replay against *.cbioportal.org. Backend response timing is a
+// significant source of remote_e2e flakiness — same test, slightly
+// different DOM at screenshot time depending on which order/latency a
+// dozen XHRs finished in. Routing those XHRs through a per-test HAR
+// fixture takes the network out of the loop on replay: identical bytes,
+// identical timing, identical screenshots.
+//
+// PW_HAR_MODE controls behavior:
+//   record  — pass requests through to the real backend AND write them
+//             to tests/__hars__/<spec>/<test-slug>.har. Use this to
+//             refresh fixtures.
+//   replay  — serve from the HAR. Requests missing from it pass through
+//             to the network (notFound: 'fallback'); this lets a partial
+//             HAR still work while you grow the corpus.
+//   off (or unset) — fixture is a no-op; tests hit the real backend.
+//
+// Scope is restricted by URL to *.cbioportal.org so the local frontend
+// bundle (localhost:3000 in LOCALDEV mode) and third-party origins
+// (oncokb.org, genomenexus.org) flow through untouched. Limiting scope
+// keeps HARs small and keeps "the frontend code under test" out of the
+// frozen-in-time fixture.
+const HAR_MODE = (process.env.PW_HAR_MODE || '').toLowerCase();
+const HAR_URL_PATTERN = /^https?:\/\/[^/]*cbioportal\.org\//i;
+
+function harPathFor(testInfo: TestInfo): string {
+    // Per-test HAR (not per-spec) because routeFromHAR in `update: true`
+    // overwrites the file on context close — multiple tests sharing one
+    // HAR would clobber each other's recordings. Per-test files mean
+    // `record` mode is idempotent: rerun a single test, only its HAR
+    // moves.
+    const specBasename = path.basename(testInfo.file).replace(/\.spec\.[tj]sx?$/, '');
+    const slug = testInfo.titlePath
+        .slice(1)
+        .join('-')
+        .replace(/[^a-zA-Z0-9]+/g, '-')
+        .replace(/^-+|-+$/g, '')
+        .toLowerCase()
+        .slice(0, 120);
+    return path.join(
+        path.dirname(testInfo.file),
+        '__hars__',
+        specBasename,
+        `${slug}.har`
+    );
+}
+
+async function setupHar(
+    context: BrowserContext,
+    testInfo: TestInfo
+): Promise<void> {
+    if (HAR_MODE !== 'record' && HAR_MODE !== 'replay') return;
+    const harPath = harPathFor(testInfo);
+    const recording = HAR_MODE === 'record';
+    if (!recording && !fs.existsSync(harPath)) {
+        // No fixture yet → don't install the route at all. The first
+        // record run for this test will create it.
+        return;
+    }
+    await context.routeFromHAR(harPath, {
+        url: HAR_URL_PATTERN,
+        update: recording,
+        // 'attach' stores response bodies as separate sibling files
+        // rather than base64-inlining them, keeping the .har JSON
+        // small/diffable and the binary blobs as opaque files git can
+        // still LFS-handle if needed.
+        updateContent: 'attach',
+        // 'minimal' drops timing, server IP, and other fields that
+        // would otherwise churn on every re-record without changing
+        // replay behavior.
+        updateMode: 'minimal',
+        // Fallback to the network on a miss in replay mode rather than
+        // failing the request. Stricter ('abort') would surface gaps
+        // immediately but breaks the test on the first new endpoint;
+        // fallback lets the suite limp along until a fresh record pass.
+        ...(recording ? {} : { notFound: 'fallback' as const }),
+    });
+}
+
 export const test = baseTest.extend({
     browser: async ({ browser }, use) => {
         await use(patchBrowser(browser));
     },
-    context: async ({ context }, use) => {
+    context: async ({ context }, use, testInfo) => {
+        await setupHar(context, testInfo);
         await use(patchContext(context));
     },
     page: async ({ page }, use) => {