DataDog · thomas-lebeau · May 4, 2026 · May 5, 2026 · May 5, 2026 · May 5, 2026
@@ -30,6 +30,7 @@ generated-docs/
 /playwright/.cache/
 test-results/
 playwright-report/
+test/e2e/.pinned-browsers/
 .vscode
 
 # Claude Code local files

@@ -1,7 +1,7 @@
 variables:
   CURRENT_STAGING: staging-19
   APP: 'browser-sdk'
-  CURRENT_CI_IMAGE: 105
+  CURRENT_CI_IMAGE: 106
   BUILD_STABLE_REGISTRY: 'registry.ddbuild.io'
   CI_IMAGE: '$BUILD_STABLE_REGISTRY/ci/$APP:$CURRENT_CI_IMAGE'
   GIT_REPOSITORY: 'git@github.com:DataDog/browser-sdk.git'
@@ -127,7 +127,16 @@ ci-image:
       aud: image-integrity
   script:
     - METADATA_FILE=$(mktemp)
-    - docker buildx build --platform linux/amd64 --build-arg CHROME_PACKAGE_VERSION=$CHROME_PACKAGE_VERSION --tag $CI_IMAGE --push --metadata-file $METADATA_FILE .
+    - PLAYWRIGHT_VERSION=$(jq -r '.devDependencies["@playwright/test"]' package.json)
+    - >
+      docker buildx build
+      --platform linux/amd64
+      --build-arg CHROME_PACKAGE_VERSION=$CHROME_PACKAGE_VERSION
+      --build-arg PLAYWRIGHT_VERSION=$PLAYWRIGHT_VERSION
+      --tag $CI_IMAGE
+      --push
+      --metadata-file $METADATA_FILE
+      .
     - ddsign sign $CI_IMAGE --docker-metadata-file $METADATA_FILE
 
 ########################################################################################################################
@@ -234,10 +243,32 @@ e2e:
       junit: test-report/e2e/*.xml
   script:
     - yarn
+    # Browsers are pre-installed in the CI image. If playwright is upgraded without rebuilding
+    # the image, this job will crash — rebuild the image to fix it.
     - FORCE_COLOR=1 yarn test:e2e:ci
   after_script:
     - node ./scripts/test/export-test-result.ts e2e
 
+# Note: e2e and e2e-pinned are kept separate for now. They should eventually be merged into a
+# single job running all browsers.
+e2e-pinned:
+  extends:
+    - .base-configuration
+    - .test-allowed-branches
+  interruptible: true
+  artifacts:
+    when: always
+    reports:
+      junit: test-report/e2e-pinned/*.xml
+  script:
+    - yarn
+    - yarn build && yarn build:apps
+    # Browsers are pre-installed in the CI image. If the pinned playwright version changes
+    # without rebuilding the image, this job will crash — rebuild the image to fix it.
+    - FORCE_COLOR=1 yarn test:e2e:pinned
+  after_script:
+    - node ./scripts/test/export-test-result.ts e2e-pinned
+
 check-licenses:
   extends:
     - .base-configuration

@@ -0,0 +1,61 @@
+# CI Eval Loop Approach
+
+This document describes the autonomous CI eval loop used to iteratively develop GitLab CI jobs without human review between iterations.
+
+## Strategy
+
+An implementation agent executes changes in a tight loop:
+
+0. Create a branch and push it to remote (one-time setup)
+1. Modify `.gitlab-ci.yml` (and any related config)
+2. Commit and push
+3. Trigger a GitLab pipeline via the MCP server
+4. Wait for the job to complete
+5. Fetch and interpret the logs
+6. Apply fixes and go back to step 1
+
+## Tooling
+
+### GitLab MCP server
+
+The loop relies on the [`gitlab-mcp-server`](https://github.com/DataDog/gitlab-mcp-server) MCP:
+
+- **`create_pipeline`** — triggers a pipeline for a given project + branch directly via the GitLab API.
+- **`get_pipeline_jobs`** — lists jobs in a pipeline to get job IDs.
+- **`wait_for_job`** — polls until a job completes (or a log pattern is matched).
+- **`get_job_logs`** — fetches the raw log output; use `tail_lines` to get just the relevant end of the log.
+
+The following tools must be in the `allowedTools` list in `.claude/settings.local.json` to avoid approval prompts during the loop:
+
+```json
+"mcp__gitlab-mcp-server__create_pipeline",
+"mcp__gitlab-mcp-server__get_pipeline_jobs",
+"mcp__gitlab-mcp-server__wait_for_job",
+"mcp__gitlab-mcp-server__get_job_logs"
+```
+
+### Typical call sequence
+
+```
+# One-time setup
+git checkout -b <branch> && git push -u origin <branch>
+
+# Loop
+create_pipeline(project_id="<org>/<repo>", ref="<branch>")
+  → pipeline.id
+
+get_pipeline_jobs(project_id, pipeline.id)
+  → job.id for the target job
+
+wait_for_job(project_id, job.id)
+
+get_job_logs(project_id, job.id, tail_lines=50)
+  → interpret output, apply fixes, commit, push, loop
+```
+
+## Alternative approach
+
+An alternative is to use the `fetch-ci-results` skill from the [Datadog Claude marketplace](https://github.com/datadog/claude-marketplace), which wraps `gh pr checks` + `get_ddci_logs.sh`. Drawbacks compared to the MCP approach:
+- Requires an open GitHub PR, which adds an unnecessary layer
+- Shell commands trigger user approval prompts in Claude Code
+- No "wait for job" capability
@@ -30,6 +30,19 @@ RUN curl --silent --show-error --fail http://dl.google.com/linux/chrome/deb/pool
   && dpkg -i google-chrome.deb \
   && rm google-chrome.deb
 
+# Pre-install Playwright browsers as an optimisation: CI jobs still run `playwright install`
+# themselves, but when the image is up to date that step becomes a fast no-op. If the image is
+# stale (e.g. a browser version was bumped before the image was rebuilt), playwright falls back
+# to downloading the right binaries at job time.
+
+# Current Playwright's Chromium (used by the e2e job)
+ARG PLAYWRIGHT_VERSION
+RUN npx -y playwright@${PLAYWRIGHT_VERSION} install --with-deps chromium
+
+# Pinned Playwright browsers: Firefox 119 + WebKit 17.4 (used by the e2e-pinned job)
+ARG PINNED_PLAYWRIGHT_VERSION=1.40.1
+RUN npx -y playwright@${PINNED_PLAYWRIGHT_VERSION} install --with-deps firefox webkit
+
 
 # Install AWS cli
 # https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html

@@ -32,8 +32,10 @@
     "test:unit:bs": "node --env-file-if-exists=.env ./scripts/test/bs-wrapper.ts karma start test/unit/karma.bs.conf.js",
     "test:e2e:init": "yarn build && yarn build:apps && yarn playwright install chromium --with-deps",
     "test:e2e": "playwright test --config test/e2e/playwright.local.config.ts --project chromium",
+    "test:e2e:pinned:init": "yarn dlx -p playwright@1.40.1 playwright install firefox webkit",
+    "test:e2e:pinned": "playwright test --config test/e2e/playwright.pinned.config.ts",
     "test:e2e:bs": "node --env-file-if-exists=.env ./scripts/test/bs-wrapper.ts playwright test --config test/e2e/playwright.bs.config.ts",
-    "test:e2e:ci": "yarn test:e2e:init && yarn test:e2e",
+    "test:e2e:ci": "yarn build && yarn build:apps && yarn test:e2e",
     "test:e2e:ci:bs": "yarn build && yarn build:apps && yarn test:e2e:bs",
     "test:compat:tsc": "node scripts/check-typescript-compatibility.ts",
     "test:compat:ssr": "scripts/cli check_server_side_rendering_compatibility",

@@ -1,9 +1,8 @@
 import type { Page } from '@playwright/test'
-import { getTestServers, waitForServersIdle } from './httpServers'
+import { waitForServersIdle } from './httpServers'
 import { waitForRequests } from './waitForRequests'
 
 export async function flushEvents(page: Page) {
   await waitForRequests(page)
-  const servers = await getTestServers()
-  await Promise.all([waitForServersIdle(), page.goto(`${servers.base.origin}/flush`)])
+  await Promise.all([waitForServersIdle(), page.goto(new URL('/flush', page.url()).href)])
 }
@@ -0,0 +1,59 @@
+import path from 'path'
+import { defineConfig, devices } from '@playwright/test'
+import { config as baseConfig } from './playwright.base.config'
+
+// Local equivalent of the BrowserStack matrix in browsers.conf.js (Firefox 119 + WebKit 17.4)
+// without BrowserStack. The pipeline:
+//   1) `playwright run-server` on port 5401 from Playwright 1.40.1 (bundles FF 119, WK 17.4).
+//   2) A small translation proxy on port 5400 (test/e2e/scripts/pinned-proxy.mjs) that
+//      spoofs the User-Agent the 1.40 server's version check expects, and patches
+//      __create__ messages so the 1.58 client's strict initializer validators accept
+//      messages produced by the 1.40 server.
+//   3) The current @playwright/test (1.58) client connects to the proxy via wsEndpoint.
+//
+// Initial install of the 1.40 browser binaries:
+//   yarn test:e2e:pinned:init
+const PINNED_WS_ENDPOINT = 'ws://127.0.0.1:5400/'
+
+const proxyDir = path.join(__dirname, 'scripts')
+
+const pinnedWebServers = [
+  {
+    name: 'pinned playwright run-server',
+    stdout: 'pipe' as const,
+    cwd: proxyDir,
+    command: 'yarn dlx -p playwright@1.40.1 playwright run-server --port 5401',
+    wait: { stdout: /Listening on/ },
+  },
+  {
+    name: 'pinned proxy',
+    stdout: 'pipe' as const,
+    cwd: proxyDir,
+    command: 'node pinned-proxy.mjs --listen 5400 --upstream 127.0.0.1:5401',
+    wait: { stdout: /pinned-proxy] listening/ },
+  },
+]
+
+// eslint-disable-next-line import/no-default-export
+export default defineConfig({
+  ...baseConfig,
+  webServer: [...((baseConfig.webServer as object[]) ?? []), ...pinnedWebServers] as never,
+  projects: [
+    {
+      name: 'firefox',
+      metadata: { sessionName: 'Firefox 119', name: 'firefox' },
+      use: {
+        ...devices['Desktop Firefox'],
+        connectOptions: { wsEndpoint: PINNED_WS_ENDPOINT },
+      },
+    },
+    {
+      name: 'webkit',
+      metadata: { sessionName: 'WebKit 17.4', name: 'webkit' },
+      use: {
+        ...devices['Desktop Safari'],
+        connectOptions: { wsEndpoint: PINNED_WS_ENDPOINT },
+      },
+    },
+  ],
+})
@@ -63,7 +63,11 @@ test.describe('Session Stores', () => {
         .withRum({ trackSessionAcrossSubdomains: false })
         .withHostName(FULL_HOSTNAME)
         .withSetup(bundleSetup)
-        .run(async ({ page, baseUrl, browserContext, flushEvents, intakeRegistry, servers }) => {
+        .run(async ({ page, baseUrl, browserContext, flushEvents, intakeRegistry, servers, browserName }) => {
+          test.skip(
+            browserName === 'firefox',
+            "Firefox does not allow setting cookis from iframes without src, so the SDK won't start there"
+          )
           await injectSdkInAnIframe(page, `${servers.crossOrigin.origin}/datadog-rum.js`)
           await flushEvents()
 
@@ -96,7 +100,11 @@ test.describe('Session Stores', () => {
         .withRum({ trackSessionAcrossSubdomains: true })
         .withHostName(FULL_HOSTNAME)
         .withSetup(bundleSetup)
-        .run(async ({ page, baseUrl, browserContext, flushEvents, intakeRegistry, servers }) => {
+        .run(async ({ page, baseUrl, browserContext, flushEvents, intakeRegistry, servers, browserName }) => {
+          test.skip(
+            browserName === 'firefox',
+            "Firefox does not allow setting cookis from iframes without src, so the SDK won't start there"
+          )
           await injectSdkInAnIframe(page, `${servers.crossOrigin.origin}/datadog-rum.js`)
           await flushEvents()