From 5201524573d9371987c353ef4d41d3dfc64c303a Mon Sep 17 00:00:00 2001
From: Tamerlan Gudabayev <tamerlan.gudabayev@elastic.co>
Date: Sat, 16 May 2026 15:06:04 +0200
Subject: [PATCH 01/30] chore: remove old comments that references file that
 doesn't exist anymore

---
 .buildkite/scripts/steps/functional/common.sh | 2 --
 1 file changed, 2 deletions(-)
diff --git a/.buildkite/scripts/steps/functional/common.sh b/.buildkite/scripts/steps/functional/common.sh
index 7ae0f78bdcddf..4be748513d299 100755
--- a/.buildkite/scripts/steps/functional/common.sh
+++ b/.buildkite/scripts/steps/functional/common.sh
@@ -2,8 +2,6 @@
 
 set -euo pipefail
 
-# Note, changes here might also need to be made in other scripts, e.g. uptime.sh
-
 source .buildkite/scripts/common/util.sh
 
 # All functional/integration test steps run Kibana from the distributable,

From 29689fde29e0fe8b78faf7473742b68339c9afa5 Mon Sep 17 00:00:00 2001
From: Tamerlan Gudabayev <tamerlan.gudabayev@elastic.co>
Date: Sat, 16 May 2026 15:11:24 +0200
Subject: [PATCH 02/30] feat(ci): add ftr retry result checker to
 kbn-failed-test-reporter-cli
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds `retry_result_checker.ts` to `@kbn/failed-test-reporter-cli` with
two CLI commands exposed via `scripts/ftr_check_retry_result.js`:

- `list-failures <junit-dir>` — prints the name of every failed test
  found in the JUnit XML files under a directory, one per line.
- `check-intersection --junit-dir <dir> --prev-failures-file <file>` —
  compares the current attempt's failures against a saved list from a
  previous attempt; exits 0 if the intersection is empty (all
  previously-failing tests recovered), exits 1 otherwise.

Also refactors `failed_tests_reporter_cli.ts` to export
`runFailedTestsReporterCli()` rather than executing on import, so the
package can export both CLIs from a single entry point without one
triggering when the other is called.
---
 .../failed_tests_reporter_cli.ts              | 226 +++++++++---------
 .../retry_result_checker.test.ts              | 106 ++++++++
 .../retry_result_checker.ts                   | 123 ++++++++++
 .../kbn-failed-test-reporter-cli/index.ts     |   3 +-
 scripts/ftr_check_retry_result.js             |  11 +
 scripts/report_failed_tests.js                |   2 +-
 6 files changed, 357 insertions(+), 114 deletions(-)
 create mode 100644 packages/kbn-failed-test-reporter-cli/failed_tests_reporter/retry_result_checker.test.ts
 create mode 100644 packages/kbn-failed-test-reporter-cli/failed_tests_reporter/retry_result_checker.ts
 create mode 100644 scripts/ftr_check_retry_result.js

diff --git a/packages/kbn-failed-test-reporter-cli/failed_tests_reporter/failed_tests_reporter_cli.ts b/packages/kbn-failed-test-reporter-cli/failed_tests_reporter/failed_tests_reporter_cli.ts
index 9f06def545352..01e7f2fb18262 100644
--- a/packages/kbn-failed-test-reporter-cli/failed_tests_reporter/failed_tests_reporter_cli.ts
+++ b/packages/kbn-failed-test-reporter-cli/failed_tests_reporter/failed_tests_reporter_cli.ts
@@ -27,133 +27,135 @@ const DEFAULT_PATTERNS = [Path.resolve(REPO_ROOT, 'target/junit/**/*.xml')];
 const DISABLE_MISSING_TEST_REPORT_ERRORS =
   process.env.DISABLE_MISSING_TEST_REPORT_ERRORS === 'true';
 
-run(
-  async ({ log, flags }) => {
-    const indexInEs = Boolean(flags['index-errors']);
-    const reportUpdate = Boolean(flags['report-update']);
-
-    let updateGithub = Boolean(flags['github-update']);
-    if (updateGithub && !process.env.GITHUB_TOKEN) {
-      throw createFailError(
-        'GITHUB_TOKEN environment variable must be set, otherwise use --no-github-update flag'
-      );
-    }
-
-    let branch: string = '';
-    let pipeline: string = '';
-    let prependTitle: string = '';
-    if (updateGithub) {
-      branch = process.env.BUILDKITE_BRANCH || '';
-      pipeline = process.env.BUILDKITE_PIPELINE_SLUG || '';
-      updateGithub = process.env.REPORT_FAILED_TESTS_TO_GITHUB === 'true';
-      prependTitle = process.env.PREPEND_FAILURE_TITLE || '';
-
-      if (!branch) {
+export function runFailedTestsReporterCli() {
+  run(
+    async ({ log, flags }) => {
+      const indexInEs = Boolean(flags['index-errors']);
+      const reportUpdate = Boolean(flags['report-update']);
+
+      let updateGithub = Boolean(flags['github-update']);
+      if (updateGithub && !process.env.GITHUB_TOKEN) {
         throw createFailError(
-          'Unable to determine originating branch from job name or other environment variables'
+          'GITHUB_TOKEN environment variable must be set, otherwise use --no-github-update flag'
         );
       }
-    }
 
-    const githubApi = new GithubApi({
-      log,
-      token: process.env.GITHUB_TOKEN,
-      dryRun: !updateGithub,
-    });
-
-    const bkMeta = getBuildkiteMetadata();
-
-    try {
-      const buildUrl = flags['build-url'] || (updateGithub ? '' : 'http://buildUrl');
-      if (typeof buildUrl !== 'string' || !buildUrl) {
-        throw createFlagError('Missing --build-url or process.env.BUILD_URL');
+      let branch: string = '';
+      let pipeline: string = '';
+      let prependTitle: string = '';
+      if (updateGithub) {
+        branch = process.env.BUILDKITE_BRANCH || '';
+        pipeline = process.env.BUILDKITE_PIPELINE_SLUG || '';
+        updateGithub = process.env.REPORT_FAILED_TESTS_TO_GITHUB === 'true';
+        prependTitle = process.env.PREPEND_FAILURE_TITLE || '';
+
+        if (!branch) {
+          throw createFailError(
+            'Unable to determine originating branch from job name or other environment variables'
+          );
+        }
       }
 
-      const patterns = (flags._.length ? flags._ : DEFAULT_PATTERNS).map((p) =>
-        normalize(Path.resolve(p))
-      );
-      log.info('Searching for reports at', patterns);
-      const reportPaths = await globby(patterns, {
-        absolute: true,
+      const githubApi = new GithubApi({
+        log,
+        token: process.env.GITHUB_TOKEN,
+        dryRun: !updateGithub,
       });
 
-      if (!reportPaths.length && DISABLE_MISSING_TEST_REPORT_ERRORS) {
-        // it is fine for code coverage to not have test results
-        return;
-      }
-
-      if (reportPaths.length) {
-        log.info('found', reportPaths.length, 'reports', reportPaths);
+      const bkMeta = getBuildkiteMetadata();
 
-        // Separate JUnit and Scout reports
-        const junitReports = reportPaths.filter((p) => p.endsWith('.xml'));
-        const scoutReports = reportPaths.filter((p) => p.endsWith('.ndjson'));
+      try {
+        const buildUrl = flags['build-url'] || (updateGithub ? '' : 'http://buildUrl');
+        if (typeof buildUrl !== 'string' || !buildUrl) {
+          throw createFlagError('Missing --build-url or process.env.BUILD_URL');
+        }
 
-        log.info(
-          'Processing',
-          junitReports.length,
-          'JUnit reports and',
-          scoutReports.length,
-          'Scout reports'
+        const patterns = (flags._.length ? flags._ : DEFAULT_PATTERNS).map((p) =>
+          normalize(Path.resolve(p))
         );
-
-        const existingIssues = new ExistingFailedTestIssues(log);
-
-        const processParams: ProcessReportsParams = {
-          log,
-          existingIssues,
-          buildUrl,
-          githubApi,
-          branch,
-          pipeline,
-          prependTitle,
-          updateGithub,
-          indexInEs,
-          reportUpdate,
-          bkMeta,
-        };
-
-        // Process FTR JUnit reports
-        await processJUnitReports(junitReports, processParams);
-
-        // Process Scout reports
-        await processScoutReports(scoutReports, processParams);
-
-        // Generate Scout test failure artifacts after reports are updated (GH issue info, html reports, etc.)
-        await generateScoutTestFailureArtifacts({ log, bkMeta });
+        log.info('Searching for reports at', patterns);
+        const reportPaths = await globby(patterns, {
+          absolute: true,
+        });
+
+        if (!reportPaths.length && DISABLE_MISSING_TEST_REPORT_ERRORS) {
+          // it is fine for code coverage to not have test results
+          return;
+        }
+
+        if (reportPaths.length) {
+          log.info('found', reportPaths.length, 'reports', reportPaths);
+
+          // Separate JUnit and Scout reports
+          const junitReports = reportPaths.filter((p) => p.endsWith('.xml'));
+          const scoutReports = reportPaths.filter((p) => p.endsWith('.ndjson'));
+
+          log.info(
+            'Processing',
+            junitReports.length,
+            'JUnit reports and',
+            scoutReports.length,
+            'Scout reports'
+          );
+
+          const existingIssues = new ExistingFailedTestIssues(log);
+
+          const processParams: ProcessReportsParams = {
+            log,
+            existingIssues,
+            buildUrl,
+            githubApi,
+            branch,
+            pipeline,
+            prependTitle,
+            updateGithub,
+            indexInEs,
+            reportUpdate,
+            bkMeta,
+          };
+
+          // Process FTR JUnit reports
+          await processJUnitReports(junitReports, processParams);
+
+          // Process Scout reports
+          await processScoutReports(scoutReports, processParams);
+
+          // Generate Scout test failure artifacts after reports are updated (GH issue info, html reports, etc.)
+          await generateScoutTestFailureArtifacts({ log, bkMeta });
+        }
+      } finally {
+        await CiStatsReporter.fromEnv(log).metrics([
+          {
+            group: 'github api request count',
+            id: `failed test reporter`,
+            value: githubApi.getRequestCount(),
+            meta: Object.fromEntries(
+              Object.entries(bkMeta).map(
+                ([k, v]) => [`buildkite${k[0].toUpperCase()}${k.slice(1)}`, v] as const
+              )
+            ),
+          },
+        ]);
       }
-    } finally {
-      await CiStatsReporter.fromEnv(log).metrics([
-        {
-          group: 'github api request count',
-          id: `failed test reporter`,
-          value: githubApi.getRequestCount(),
-          meta: Object.fromEntries(
-            Object.entries(bkMeta).map(
-              ([k, v]) => [`buildkite${k[0].toUpperCase()}${k.slice(1)}`, v] as const
-            )
-          ),
+    },
+    {
+      description: `a cli that opens issues or updates existing issues based on junit reports`,
+      flags: {
+        boolean: ['github-update', 'report-update'],
+        string: ['build-url'],
+        default: {
+          'github-update': true,
+          'report-update': true,
+          'index-errors': true,
+          'build-url': process.env.BUILD_URL,
         },
-      ]);
-    }
-  },
-  {
-    description: `a cli that opens issues or updates existing issues based on junit reports`,
-    flags: {
-      boolean: ['github-update', 'report-update'],
-      string: ['build-url'],
-      default: {
-        'github-update': true,
-        'report-update': true,
-        'index-errors': true,
-        'build-url': process.env.BUILD_URL,
-      },
-      help: `
+        help: `
         --no-github-update Execute the CLI without writing to Github
         --no-report-update Execute the CLI without writing to the JUnit reports
         --no-index-errors  Execute the CLI without indexing failures into Elasticsearch
         --build-url        URL of the failed build, defaults to process.env.BUILD_URL
       `,
-    },
-  }
-);
+      },
+    }
+  );
+}
diff --git a/packages/kbn-failed-test-reporter-cli/failed_tests_reporter/retry_result_checker.test.ts b/packages/kbn-failed-test-reporter-cli/failed_tests_reporter/retry_result_checker.test.ts
new file mode 100644
index 0000000000000..20beb274d9c76
--- /dev/null
+++ b/packages/kbn-failed-test-reporter-cli/failed_tests_reporter/retry_result_checker.test.ts
@@ -0,0 +1,106 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+import Fs from 'fs';
+import Os from 'os';
+import Path from 'path';
+
+import { collectFailedTestNames, computeIntersection } from './retry_result_checker';
+
+// Minimal JUnit XML helpers
+const buildXml = (testcases: string) => `<?xml version="1.0" encoding="utf-8"?>
+<testsuites name="ftr">
+  <testsuite>${testcases}</testsuite>
+</testsuites>`;
+
+const failedCase = (name: string) =>
+  `<testcase name="${name}" classname="suite.file" time="1"><failure>error</failure></testcase>`;
+
+const passedCase = (name: string) =>
+  `<testcase name="${name}" classname="suite.file" time="1"></testcase>`;
+
+const hookFailure = (hookName: string) =>
+  `<testcase name='suite "${hookName}" hook' classname="suite.file" time="0"><failure>error</failure></testcase>`;
+
+describe('collectFailedTestNames', () => {
+  let tmpDir: string;
+
+  beforeEach(() => {
+    tmpDir = Fs.mkdtempSync(Path.join(Os.tmpdir(), 'retry-checker-test-'));
+  });
+
+  afterEach(() => {
+    Fs.rmSync(tmpDir, { recursive: true, force: true });
+  });
+
+  it('returns names of failed tests from a single XML', async () => {
+    Fs.writeFileSync(
+      Path.join(tmpDir, 'TEST-report.xml'),
+      buildXml(failedCase('suite myTest') + passedCase('suite otherTest'))
+    );
+    const names = await collectFailedTestNames(tmpDir);
+    expect([...names]).toEqual(['suite myTest']);
+  });
+
+  it('aggregates failures across multiple XML files', async () => {
+    Fs.writeFileSync(Path.join(tmpDir, 'TEST-a.xml'), buildXml(failedCase('test A')));
+    Fs.writeFileSync(Path.join(tmpDir, 'TEST-b.xml'), buildXml(failedCase('test B')));
+    const names = await collectFailedTestNames(tmpDir);
+    expect([...names].sort()).toEqual(['test A', 'test B']);
+  });
+
+  it('returns empty set when all tests pass', async () => {
+    Fs.writeFileSync(Path.join(tmpDir, 'TEST-a.xml'), buildXml(passedCase('test A')));
+    const names = await collectFailedTestNames(tmpDir);
+    expect(names.size).toBe(0);
+  });
+
+  it('returns empty set when no XML files exist', async () => {
+    const names = await collectFailedTestNames(tmpDir);
+    expect(names.size).toBe(0);
+  });
+
+  it('captures hook failure names verbatim', async () => {
+    Fs.writeFileSync(Path.join(tmpDir, 'TEST-a.xml'), buildXml(hookFailure('before all')));
+    const names = await collectFailedTestNames(tmpDir);
+    expect([...names]).toEqual(['suite "before all" hook']);
+  });
+});
+
+describe('computeIntersection', () => {
+  it('returns empty when no overlap', () => {
+    const prev = new Set(['test A', 'test B']);
+    const current = new Set(['test C']);
+    expect(computeIntersection(prev, current)).toEqual([]);
+  });
+
+  it('returns overlapping tests', () => {
+    const prev = new Set(['test A', 'test B']);
+    const current = new Set(['test A', 'test C']);
+    expect(computeIntersection(prev, current)).toEqual(['test A']);
+  });
+
+  it('returns empty when current is empty', () => {
+    const prev = new Set(['test A']);
+    const current = new Set<string>();
+    expect(computeIntersection(prev, current)).toEqual([]);
+  });
+
+  it('returns empty when prev is empty', () => {
+    const prev = new Set<string>();
+    const current = new Set(['test A']);
+    expect(computeIntersection(prev, current)).toEqual([]);
+  });
+
+  it('returns all current failures when all were previously failing', () => {
+    const prev = new Set(['test A', 'test B']);
+    const current = new Set(['test A', 'test B']);
+    expect(computeIntersection(prev, current).sort()).toEqual(['test A', 'test B']);
+  });
+});
diff --git a/packages/kbn-failed-test-reporter-cli/failed_tests_reporter/retry_result_checker.ts b/packages/kbn-failed-test-reporter-cli/failed_tests_reporter/retry_result_checker.ts
new file mode 100644
index 0000000000000..c6e309ff4eb66
--- /dev/null
+++ b/packages/kbn-failed-test-reporter-cli/failed_tests_reporter/retry_result_checker.ts
@@ -0,0 +1,123 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+import Fs from 'fs';
+import Path from 'path';
+
+import { createFlagError } from '@kbn/dev-cli-errors';
+import { run } from '@kbn/dev-cli-runner';
+import globby from 'globby';
+import normalize from 'normalize-path';
+
+import { makeFailedTestCaseIter, readTestReport } from './test_report';
+
+export async function collectFailedTestNames(junitDir: string): Promise<Set<string>> {
+  const xmlPaths = await globby(normalize(Path.resolve(junitDir, '*.xml')), { absolute: true });
+  const names = new Set<string>();
+  for (const xmlPath of xmlPaths) {
+    const report = await readTestReport(xmlPath);
+    for (const testCase of makeFailedTestCaseIter(report)) {
+      names.add(testCase.$.name);
+    }
+  }
+  return names;
+}
+
+export function computeIntersection(prev: Set<string>, current: Set<string>): string[] {
+  return [...current].filter((name) => prev.has(name));
+}
+
+export function runRetryResultCheckerCli() {
+  run(
+    async ({ log, flags }) => {
+      const [command, ...rest] = flags._;
+
+      if (command === 'list-failures') {
+        const [junitDir] = rest;
+        if (!junitDir) {
+          throw createFlagError('Usage: list-failures <junit-dir>');
+        }
+        const names = await collectFailedTestNames(junitDir);
+        if (names.size > 0) {
+          process.stdout.write([...names].join('\n') + '\n');
+        }
+        return;
+      }
+
+      if (command === 'check-intersection') {
+        const junitDir = flags['junit-dir'];
+        const prevFailuresFile = flags['prev-failures-file'];
+
+        if (typeof junitDir !== 'string' || !junitDir) {
+          throw createFlagError('--junit-dir is required');
+        }
+        if (typeof prevFailuresFile !== 'string' || !prevFailuresFile) {
+          throw createFlagError('--prev-failures-file is required');
+        }
+
+        const prevContent = Fs.readFileSync(prevFailuresFile, 'utf8');
+        const prevFailed = new Set(
+          prevContent
+            .split('\n')
+            .map((l) => l.trim())
+            .filter(Boolean)
+        );
+
+        if (prevFailed.size === 0) {
+          log.info('No previously-failing tests found — nothing to intersect');
+          return;
+        }
+
+        const currentFailed = await collectFailedTestNames(junitDir);
+        const intersection = computeIntersection(prevFailed, currentFailed);
+
+        if (intersection.length === 0) {
+          // Known limitation: if a different test fails before reaching the previously-failing test on retry (due to --bail), the intersection will appear empty and the step will be marked green even though the original failing test was never verified.
+          // We could possibly drop the bail flag.
+          log.success(
+            `All ${prevFailed.size} previously-failing test(s) either passed or did not run on retry`
+          );
+          return;
+        }
+
+        log.error(`${intersection.length} test(s) failed in both attempts:`);
+        for (const name of intersection) {
+          log.error(`  ${name}`);
+        }
+        process.exit(1);
+      }
+
+      throw createFlagError(
+        `Unknown command: ${command}. Valid commands: list-failures, check-intersection`
+      );
+    },
+    {
+      description: `
+        Utilities for evaluating FTR retry results.
+
+        Commands:
+          list-failures <junit-dir>
+            Lists all failed test names (one per line) found in *.xml files under
+            the given directory. Used to capture attempt-1 failures before retry.
+
+          check-intersection --junit-dir <dir> --prev-failures-file <file>
+            Compares the failed tests in <dir> against the names in <file>.
+            Exits 0 if the intersection is empty (previously-failing tests recovered).
+            Exits 1 if any previously-failing test still fails.
+      `,
+      flags: {
+        string: ['junit-dir', 'prev-failures-file'],
+        help: `
+          --junit-dir            Directory containing JUnit XML files for the current attempt
+          --prev-failures-file   File with newline-separated test names that failed in attempt 1
+        `,
+      },
+    }
+  );
+}
diff --git a/packages/kbn-failed-test-reporter-cli/index.ts b/packages/kbn-failed-test-reporter-cli/index.ts
index fd33d523318a9..c10d3adb34c1e 100644
--- a/packages/kbn-failed-test-reporter-cli/index.ts
+++ b/packages/kbn-failed-test-reporter-cli/index.ts
@@ -7,4 +7,5 @@
  * License v3.0 only", or the "Server Side Public License, v 1".
  */
 
-import './failed_tests_reporter/failed_tests_reporter_cli';
+export { runFailedTestsReporterCli } from './failed_tests_reporter/failed_tests_reporter_cli';
+export { runRetryResultCheckerCli } from './failed_tests_reporter/retry_result_checker';
diff --git a/scripts/ftr_check_retry_result.js b/scripts/ftr_check_retry_result.js
new file mode 100644
index 0000000000000..ac377c7013b01
--- /dev/null
+++ b/scripts/ftr_check_retry_result.js
@@ -0,0 +1,11 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+require('@kbn/setup-node-env');
+require('@kbn/failed-test-reporter-cli').runRetryResultCheckerCli();
diff --git a/scripts/report_failed_tests.js b/scripts/report_failed_tests.js
index 2b5eb9ffc4821..14226105dda10 100644
--- a/scripts/report_failed_tests.js
+++ b/scripts/report_failed_tests.js
@@ -8,4 +8,4 @@
  */
 
 require('@kbn/setup-node-env');
-require('@kbn/failed-test-reporter-cli');
+require('@kbn/failed-test-reporter-cli').runFailedTestsReporterCli();

From f89d58a458f599a1ab90a422c3d875b7fec274a6 Mon Sep 17 00:00:00 2001
From: Tamerlan Gudabayev <tamerlan.gudabayev@elastic.co>
Date: Sat, 16 May 2026 15:11:37 +0200
Subject: [PATCH 03/30] feat(ci): mark FTR retry green when previously-failing
 tests recover
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On the first automatic retry of a failing FTR step, the step is now
marked green if every test that failed in attempt 1 passes in attempt 2
— even if a different (previously-passing) test happens to fail on
retry, which would indicate a separate flake unrelated to the original
failure.

How it works:
- End of attempt 1: JUnit XML is parsed and the failing test names are
  stored in Buildkite metadata.
- End of attempt 2: the stored names are retrieved and intersected with
  the current attempt's failures. An empty intersection overrides the
  exit code to 0.

Skipped for flaky-test-runner runs (KIBANA_FLAKY_TEST_RUNNER_CONFIG).

Known limitation: if --bail causes attempt 2 to stop on a different
test before reaching the originally-failing test, the intersection will
appear empty and the step will be marked green even though the original
failing test was never verified.
---
 .buildkite/scripts/steps/test/ftr_configs.sh | 44 ++++++++++++++++++++
 1 file changed, 44 insertions(+)

diff --git a/.buildkite/scripts/steps/test/ftr_configs.sh b/.buildkite/scripts/steps/test/ftr_configs.sh
index a14283ef5c7df..5d2505ec4d665 100755
--- a/.buildkite/scripts/steps/test/ftr_configs.sh
+++ b/.buildkite/scripts/steps/test/ftr_configs.sh
@@ -18,6 +18,8 @@ export JOB="$FTR_CONFIG_GROUP_KEY"
 
 FAILED_CONFIGS_KEY="${BUILDKITE_STEP_ID}${FTR_CONFIG_GROUP_KEY}"
 
+FAILED_TESTS_KEY="${BUILDKITE_STEP_ID}${FTR_CONFIG_GROUP_KEY}_failed_tests"
+
 # a FTR failure will result in the script returning an exit code of 10
 exitCode=0
 
@@ -141,6 +143,48 @@ if [[ "$failedConfigs" ]]; then
   buildkite-agent meta-data set "$FAILED_CONFIGS_KEY" "$failedConfigs"
 fi
 
+# --- retry-only-failed feature ---
+# Attempt 1: record the names of failing tests so the retry can evaluate whether they recovered.
+# On the first retry, the step is marked green if every previously-failing test passes — even if
+# a different (previously-passing) test happens to fail on retry.
+if [[ -z "${KIBANA_FLAKY_TEST_RUNNER_CONFIG:-}" && \
+      "${BUILDKITE_RETRY_COUNT:-0}" == "0" && "$exitCode" != "0" ]]; then
+  junitDir="target/junit/$JOB"
+  if [ -d "$junitDir" ]; then
+    failedTestNames=$(node scripts/ftr_check_retry_result list-failures "$junitDir" 2>/dev/null || true)
+    if [[ "$failedTestNames" ]]; then
+      buildkite-agent meta-data set "$FAILED_TESTS_KEY" "$failedTestNames"
+      echo "Stored $(echo "$failedTestNames" | wc -l | tr -d ' ') previously-failing test name(s) for retry evaluation"
+    fi
+  fi
+fi
+
+# Attempt 2: check whether the failures from attempt 1 are still failing.
+# If every previously-failing test now passes, mark the step green.
+if [[ -z "${KIBANA_FLAKY_TEST_RUNNER_CONFIG:-}" && \
+      "${BUILDKITE_RETRY_COUNT:-0}" == "1" && "$exitCode" != "0" ]]; then
+  prevFailedTests=$(buildkite-agent meta-data get "$FAILED_TESTS_KEY" --default '' 2>/dev/null || true)
+  if [[ "$prevFailedTests" ]]; then
+    junitDir="target/junit/$JOB"
+    tmpPrevFile=$(mktemp)
+    printf '%s' "$prevFailedTests" > "$tmpPrevFile"
+    set +e
+    node scripts/ftr_check_retry_result check-intersection \
+      --junit-dir "$junitDir" \
+      --prev-failures-file "$tmpPrevFile"
+    intersectionCode=$?
+    set -e
+    rm -f "$tmpPrevFile"
+    if [[ "$intersectionCode" == "0" ]]; then
+      echo "--- [retry-only-failed] All previously-failing tests recovered on retry — marking step green"
+      exitCode=0
+      failedConfigs=""
+      buildkite-agent meta-data set "$FAILED_CONFIGS_KEY" "" 2>/dev/null || true
+    fi
+  fi
+fi
+# --- end retry-only-failed feature ---
+
 echo "--- FTR configs complete"
 printf "%s\n" "${results[@]}"
 echo ""

From 54726fbc2ddbc1085996b108e80c4bf90ae18ad0 Mon Sep 17 00:00:00 2001
From: Tamerlan Gudabayev <tamerlan.gudabayev@elastic.co>
Date: Sat, 16 May 2026 16:42:57 +0200
Subject: [PATCH 04/30] =?UTF-8?q?test(ci):=20TEMP=20add=20retry-validation?=
 =?UTF-8?q?=20fixture=20=E2=80=94=20DELETE=20BEFORE=20MERGE?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Plants a deliberately-flaky test pair inside the unused_urls_task FTR
config to validate the retry intersection logic introduced in this PR.

Setup (relies on FTR's --bail and BUILDKITE_RETRY_COUNT):
- Attempt 1: TEST_A fails, --bail stops the run. JUnit records TEST_A.
- Attempt 2: TEST_A passes (recovered). TEST_B now fails, --bail stops.
  JUnit records TEST_B.

Stored prev failures: {TEST_A}.  Current failures: {TEST_B}.
Intersection is empty → ftr_configs.sh overrides exit code to 0 and
the step turns green.

Expected outcome in CI: red attempt 1, red attempt 2 internally, but
the step ends green because no previously-failing test failed again.

DELETE before merging.
---
 .../apis/unused_urls_task/index.ts            |  2 +
 .../retry_validation_delete_before_merge.ts   | 41 +++++++++++++++++++
 2 files changed, 43 insertions(+)
 create mode 100644 src/platform/test/api_integration/apis/unused_urls_task/retry_validation_delete_before_merge.ts

diff --git a/src/platform/test/api_integration/apis/unused_urls_task/index.ts b/src/platform/test/api_integration/apis/unused_urls_task/index.ts
index c6210ed44023d..eae3bf1227ce4 100644
--- a/src/platform/test/api_integration/apis/unused_urls_task/index.ts
+++ b/src/platform/test/api_integration/apis/unused_urls_task/index.ts
@@ -11,6 +11,8 @@ import type { FtrProviderContext } from '../../ftr_provider_context';
 
 export default function ({ loadTestFile }: FtrProviderContext) {
   describe('unused_urls_task', () => {
+    // TEMPORARY: validates FTR retry intersection logic. Delete before merging this PR.
+    loadTestFile(require.resolve('./retry_validation_delete_before_merge'));
     loadTestFile(require.resolve('./run'));
   });
 }
diff --git a/src/platform/test/api_integration/apis/unused_urls_task/retry_validation_delete_before_merge.ts b/src/platform/test/api_integration/apis/unused_urls_task/retry_validation_delete_before_merge.ts
new file mode 100644
index 0000000000000..6a7efea7dcbf4
--- /dev/null
+++ b/src/platform/test/api_integration/apis/unused_urls_task/retry_validation_delete_before_merge.ts
@@ -0,0 +1,41 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+// TEMPORARY: validates the FTR retry intersection logic from this PR. Delete before merge.
+//
+// Scenario:
+//   - Attempt 1 (BUILDKITE_RETRY_COUNT unset / '0'): TEST_A fails. --bail stops the run.
+//     JUnit XML records TEST_A as failed; TEST_B is never reached.
+//   - Attempt 2 (BUILDKITE_RETRY_COUNT == '1'): TEST_A passes (recovered).
+//     TEST_B now fails (simulates an unrelated flake on retry). --bail stops the run.
+//     JUnit XML records TEST_B as failed.
+//
+// Stored prev failures: {TEST_A}.  Current failures: {TEST_B}.  Intersection: ∅.
+// Expected: ftr_configs.sh overrides exit code to 0 and the step turns green.
+
+import type { FtrProviderContext } from '../../ftr_provider_context';
+
+const isFirstAttempt =
+  !process.env.BUILDKITE_RETRY_COUNT || process.env.BUILDKITE_RETRY_COUNT === '0';
+
+export default function ({}: FtrProviderContext) {
+  describe('retry-validation', () => {
+    it('TEST_A: intentionally fails on attempt 1, passes on attempt 2', () => {
+      if (isFirstAttempt) {
+        throw new Error('Intentional first-attempt failure (retry validation)');
+      }
+    });
+
+    it('TEST_B: passes on attempt 1, intentionally fails on attempt 2', () => {
+      if (!isFirstAttempt) {
+        throw new Error('Intentional second-attempt failure (retry validation)');
+      }
+    });
+  });
+}

From 1be5cd8a7e5db8ff3ae0f64b1878d17a3e1d5544 Mon Sep 17 00:00:00 2001
From: Tamerlan Gudabayev <tamerlan.gudabayev@elastic.co>
Date: Sat, 16 May 2026 18:33:23 +0200
Subject: [PATCH 05/30] =?UTF-8?q?Revert=20"test(ci):=20TEMP=20add=20retry-?=
 =?UTF-8?q?validation=20fixture=20=E2=80=94=20DELETE=20BEFORE=20MERGE"?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts commit 54726fbc2ddbc1085996b108e80c4bf90ae18ad0.
---
 .../apis/unused_urls_task/index.ts            |  2 -
 .../retry_validation_delete_before_merge.ts   | 41 -------------------
 2 files changed, 43 deletions(-)
 delete mode 100644 src/platform/test/api_integration/apis/unused_urls_task/retry_validation_delete_before_merge.ts

diff --git a/src/platform/test/api_integration/apis/unused_urls_task/index.ts b/src/platform/test/api_integration/apis/unused_urls_task/index.ts
index eae3bf1227ce4..c6210ed44023d 100644
--- a/src/platform/test/api_integration/apis/unused_urls_task/index.ts
+++ b/src/platform/test/api_integration/apis/unused_urls_task/index.ts
@@ -11,8 +11,6 @@ import type { FtrProviderContext } from '../../ftr_provider_context';
 
 export default function ({ loadTestFile }: FtrProviderContext) {
   describe('unused_urls_task', () => {
-    // TEMPORARY: validates FTR retry intersection logic. Delete before merging this PR.
-    loadTestFile(require.resolve('./retry_validation_delete_before_merge'));
     loadTestFile(require.resolve('./run'));
   });
 }
diff --git a/src/platform/test/api_integration/apis/unused_urls_task/retry_validation_delete_before_merge.ts b/src/platform/test/api_integration/apis/unused_urls_task/retry_validation_delete_before_merge.ts
deleted file mode 100644
index 6a7efea7dcbf4..0000000000000
--- a/src/platform/test/api_integration/apis/unused_urls_task/retry_validation_delete_before_merge.ts
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
- * or more contributor license agreements. Licensed under the "Elastic License
- * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
- * Public License v 1"; you may not use this file except in compliance with, at
- * your election, the "Elastic License 2.0", the "GNU Affero General Public
- * License v3.0 only", or the "Server Side Public License, v 1".
- */
-
-// TEMPORARY: validates the FTR retry intersection logic from this PR. Delete before merge.
-//
-// Scenario:
-//   - Attempt 1 (BUILDKITE_RETRY_COUNT unset / '0'): TEST_A fails. --bail stops the run.
-//     JUnit XML records TEST_A as failed; TEST_B is never reached.
-//   - Attempt 2 (BUILDKITE_RETRY_COUNT == '1'): TEST_A passes (recovered).
-//     TEST_B now fails (simulates an unrelated flake on retry). --bail stops the run.
-//     JUnit XML records TEST_B as failed.
-//
-// Stored prev failures: {TEST_A}.  Current failures: {TEST_B}.  Intersection: ∅.
-// Expected: ftr_configs.sh overrides exit code to 0 and the step turns green.
-
-import type { FtrProviderContext } from '../../ftr_provider_context';
-
-const isFirstAttempt =
-  !process.env.BUILDKITE_RETRY_COUNT || process.env.BUILDKITE_RETRY_COUNT === '0';
-
-export default function ({}: FtrProviderContext) {
-  describe('retry-validation', () => {
-    it('TEST_A: intentionally fails on attempt 1, passes on attempt 2', () => {
-      if (isFirstAttempt) {
-        throw new Error('Intentional first-attempt failure (retry validation)');
-      }
-    });
-
-    it('TEST_B: passes on attempt 1, intentionally fails on attempt 2', () => {
-      if (!isFirstAttempt) {
-        throw new Error('Intentional second-attempt failure (retry validation)');
-      }
-    });
-  });
-}

From 27b352450d7f27a1cb1d1352eb5da0fb8a76941d Mon Sep 17 00:00:00 2001
From: Tamerlan Gudabayev <tamerlan.gudabayev@elastic.co>
Date: Mon, 18 May 2026 10:00:33 +0200
Subject: [PATCH 06/30] [CI] Add job annotation to FTR configs summary

Surface per-config status, failures, and retry recovery on the job
detail page via a job-scoped Buildkite annotation, so that retry/pass
/fail outcomes aren't buried in the step logs.
---
 .buildkite/scripts/steps/test/ftr_configs.sh | 45 ++++++++++++++++++++
 1 file changed, 45 insertions(+)

diff --git a/.buildkite/scripts/steps/test/ftr_configs.sh b/.buildkite/scripts/steps/test/ftr_configs.sh
index 5d2505ec4d665..5c22b90d1e5af 100755
--- a/.buildkite/scripts/steps/test/ftr_configs.sh
+++ b/.buildkite/scripts/steps/test/ftr_configs.sh
@@ -23,6 +23,11 @@ FAILED_TESTS_KEY="${BUILDKITE_STEP_ID}${FTR_CONFIG_GROUP_KEY}_failed_tests"
 # a FTR failure will result in the script returning an exit code of 10
 exitCode=0
 
+# Per-config rows for the job annotation summary, plus a flag set when
+# the retry-only-failed logic marks an otherwise-red step green.
+annotation_rows=()
+retry_recovered=false
+
 configs="${FTR_CONFIG:-}"
 
 # The first retry should only run the configs that failed in the previous attempt
@@ -64,6 +69,7 @@ while read -r config; do
 
   if [[ "$IS_CONFIG_EXECUTION" == "true" && "$IS_FLAKY_TEST_RUN" == "false" ]]; then
     echo "--- [ already-tested ] $FULL_COMMAND"
+    annotation_rows+=("| \`${config}\` | — | skipped (already-tested) |")
     continue
   else
     echo "--- $ $FULL_COMMAND"
@@ -126,6 +132,7 @@ while read -r config; do
   if [ $lastCode -eq 0 ]; then
     # Test was successful, so mark it as executed
     buildkite-agent meta-data set "$CONFIG_EXECUTION_KEY" "true"
+    annotation_rows+=("| \`${config}\` | ${duration} | passed |")
   else
     exitCode=10
     echo "FTR exited with code $lastCode"
@@ -136,6 +143,7 @@ while read -r config; do
     else
       failedConfigs="$config"
     fi
+    annotation_rows+=("| \`${config}\` | ${duration} | **failed** |")
   fi
 done <<< "$configs"
 
@@ -179,6 +187,7 @@ if [[ -z "${KIBANA_FLAKY_TEST_RUNNER_CONFIG:-}" && \
       echo "--- [retry-only-failed] All previously-failing tests recovered on retry — marking step green"
       exitCode=0
       failedConfigs=""
+      retry_recovered=true
       buildkite-agent meta-data set "$FAILED_CONFIGS_KEY" "" 2>/dev/null || true
     fi
   fi
@@ -189,4 +198,40 @@ echo "--- FTR configs complete"
 printf "%s\n" "${results[@]}"
 echo ""
 
+write_job_annotation() {
+  local style attempt_num
+  attempt_num=$((${BUILDKITE_RETRY_COUNT:-0} + 1))
+
+  if [[ "$exitCode" == "0" ]]; then
+    style="success"
+  else
+    style="error"
+  fi
+
+  {
+    echo "### FTR Configs — \`${JOB}\` (attempt ${attempt_num})"
+    echo ""
+    if [[ "$retry_recovered" == "true" ]]; then
+      echo "**Recovered on retry** — all previously-failing tests passed; step marked green."
+      echo ""
+    elif [[ -n "$failedConfigs" ]]; then
+      echo "**Failed configs:**"
+      while IFS= read -r f; do
+        [[ -n "$f" ]] && echo "- \`$f\`"
+      done <<< "$failedConfigs"
+      echo ""
+    fi
+    if [[ ${#annotation_rows[@]} -gt 0 ]]; then
+      echo "| Config | Duration | Status |"
+      echo "| --- | --- | --- |"
+      printf "%s\n" "${annotation_rows[@]}"
+    fi
+  } | buildkite-agent annotate \
+        --scope job \
+        --context "ftr-summary" \
+        --style "${style}" || true
+}
+
+write_job_annotation
+
 exit $exitCode

From 443e1cf9c135efb67e4737d1b478772de923a0f0 Mon Sep 17 00:00:00 2001
From: Tamerlan Gudabayev <tamerlan.gudabayev@elastic.co>
Date: Mon, 18 May 2026 10:03:54 +0200
Subject: [PATCH 07/30] =?UTF-8?q?Reapply=20"test(ci):=20TEMP=20add=20retry?=
 =?UTF-8?q?-validation=20fixture=20=E2=80=94=20DELETE=20BEFORE=20MERGE"?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts commit 1be5cd8a7e5db8ff3ae0f64b1878d17a3e1d5544.
---
 .../apis/unused_urls_task/index.ts            |  2 +
 .../retry_validation_delete_before_merge.ts   | 41 +++++++++++++++++++
 2 files changed, 43 insertions(+)
 create mode 100644 src/platform/test/api_integration/apis/unused_urls_task/retry_validation_delete_before_merge.ts

diff --git a/src/platform/test/api_integration/apis/unused_urls_task/index.ts b/src/platform/test/api_integration/apis/unused_urls_task/index.ts
index c6210ed44023d..eae3bf1227ce4 100644
--- a/src/platform/test/api_integration/apis/unused_urls_task/index.ts
+++ b/src/platform/test/api_integration/apis/unused_urls_task/index.ts
@@ -11,6 +11,8 @@ import type { FtrProviderContext } from '../../ftr_provider_context';
 
 export default function ({ loadTestFile }: FtrProviderContext) {
   describe('unused_urls_task', () => {
+    // TEMPORARY: validates FTR retry intersection logic. Delete before merging this PR.
+    loadTestFile(require.resolve('./retry_validation_delete_before_merge'));
     loadTestFile(require.resolve('./run'));
   });
 }
diff --git a/src/platform/test/api_integration/apis/unused_urls_task/retry_validation_delete_before_merge.ts b/src/platform/test/api_integration/apis/unused_urls_task/retry_validation_delete_before_merge.ts
new file mode 100644
index 0000000000000..6a7efea7dcbf4
--- /dev/null
+++ b/src/platform/test/api_integration/apis/unused_urls_task/retry_validation_delete_before_merge.ts
@@ -0,0 +1,41 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+// TEMPORARY: validates the FTR retry intersection logic from this PR. Delete before merge.
+//
+// Scenario:
+//   - Attempt 1 (BUILDKITE_RETRY_COUNT unset / '0'): TEST_A fails. --bail stops the run.
+//     JUnit XML records TEST_A as failed; TEST_B is never reached.
+//   - Attempt 2 (BUILDKITE_RETRY_COUNT == '1'): TEST_A passes (recovered).
+//     TEST_B now fails (simulates an unrelated flake on retry). --bail stops the run.
+//     JUnit XML records TEST_B as failed.
+//
+// Stored prev failures: {TEST_A}.  Current failures: {TEST_B}.  Intersection: ∅.
+// Expected: ftr_configs.sh overrides exit code to 0 and the step turns green.
+
+import type { FtrProviderContext } from '../../ftr_provider_context';
+
+const isFirstAttempt =
+  !process.env.BUILDKITE_RETRY_COUNT || process.env.BUILDKITE_RETRY_COUNT === '0';
+
+export default function ({}: FtrProviderContext) {
+  describe('retry-validation', () => {
+    it('TEST_A: intentionally fails on attempt 1, passes on attempt 2', () => {
+      if (isFirstAttempt) {
+        throw new Error('Intentional first-attempt failure (retry validation)');
+      }
+    });
+
+    it('TEST_B: passes on attempt 1, intentionally fails on attempt 2', () => {
+      if (!isFirstAttempt) {
+        throw new Error('Intentional second-attempt failure (retry validation)');
+      }
+    });
+  });
+}

From 11841f74e8984cf5831385145807dd0aec76c98f Mon Sep 17 00:00:00 2001
From: Tamerlan Gudabayev <tamerlan.gudabayev@elastic.co>
Date: Tue, 26 May 2026 15:24:29 +0200
Subject: [PATCH 08/30] improve job annotation

---
 .buildkite/scripts/steps/test/ftr_configs.sh | 62 ++++++++++++++++++--
 1 file changed, 57 insertions(+), 5 deletions(-)

diff --git a/.buildkite/scripts/steps/test/ftr_configs.sh b/.buildkite/scripts/steps/test/ftr_configs.sh
index 5c22b90d1e5af..14050853e4e88 100755
--- a/.buildkite/scripts/steps/test/ftr_configs.sh
+++ b/.buildkite/scripts/steps/test/ftr_configs.sh
@@ -54,6 +54,13 @@ fi
 failedConfigs=""
 results=()
 
+# Capture which configs failed in the previous attempt before the meta-data key is overwritten below.
+# Used in the annotation to distinguish "new failure", "still failing", and "recovered" per config.
+prevRunFailedConfigs=""
+if [[ "${BUILDKITE_RETRY_COUNT:-0}" -ge "1" ]]; then
+  prevRunFailedConfigs=$(buildkite-agent meta-data get "$FAILED_CONFIGS_KEY" --default '' 2>/dev/null || true)
+fi
+
 while read -r config; do
   if [[ ! "$config" ]]; then
     continue;
@@ -69,7 +76,7 @@ while read -r config; do
 
   if [[ "$IS_CONFIG_EXECUTION" == "true" && "$IS_FLAKY_TEST_RUN" == "false" ]]; then
     echo "--- [ already-tested ] $FULL_COMMAND"
-    annotation_rows+=("| \`${config}\` | — | skipped (already-tested) |")
+    annotation_rows+=("| [\`${config}\`](https://github.com/elastic/kibana/blob/${BUILDKITE_COMMIT:-main}/${config}) | — | skipped (already-tested) |")
     continue
   else
     echo "--- $ $FULL_COMMAND"
@@ -129,10 +136,15 @@ while read -r config; do
     duration: ${duration}
     result: ${lastCode}")
 
+  config_link="[\`${config}\`](https://github.com/elastic/kibana/blob/${BUILDKITE_COMMIT:-main}/${config})"
   if [ $lastCode -eq 0 ]; then
     # Test was successful, so mark it as executed
     buildkite-agent meta-data set "$CONFIG_EXECUTION_KEY" "true"
-    annotation_rows+=("| \`${config}\` | ${duration} | passed |")
+    if [[ -n "$prevRunFailedConfigs" ]] && grep -qxF "$config" <<< "$prevRunFailedConfigs"; then
+      annotation_rows+=("| ${config_link} | ${duration} | recovered |")
+    else
+      annotation_rows+=("| ${config_link} | ${duration} | passed |")
+    fi
   else
     exitCode=10
     echo "FTR exited with code $lastCode"
@@ -143,7 +155,13 @@ while read -r config; do
     else
       failedConfigs="$config"
     fi
-    annotation_rows+=("| \`${config}\` | ${duration} | **failed** |")
+    if [[ -n "$prevRunFailedConfigs" ]] && grep -qxF "$config" <<< "$prevRunFailedConfigs"; then
+      annotation_rows+=("| ${config_link} | ${duration} | **still failing** |")
+    elif [[ -n "$prevRunFailedConfigs" ]]; then
+      annotation_rows+=("| ${config_link} | ${duration} | **new failure** (was passing) |")
+    else
+      annotation_rows+=("| ${config_link} | ${duration} | **failed** |")
+    fi
   fi
 done <<< "$configs"
 
@@ -199,8 +217,9 @@ printf "%s\n" "${results[@]}"
 echo ""
 
 write_job_annotation() {
-  local style attempt_num
+  local style attempt_num prev_attempt_num
   attempt_num=$((${BUILDKITE_RETRY_COUNT:-0} + 1))
+  prev_attempt_num=$((attempt_num - 1))
 
   if [[ "$exitCode" == "0" ]]; then
     style="success"
@@ -208,12 +227,44 @@ write_job_annotation() {
     style="error"
   fi
 
+  local job_log_link=""
+  if [[ -n "${BUILDKITE_BUILD_URL:-}" && -n "${BUILDKITE_JOB_ID:-}" ]]; then
+    job_log_link=" — [view logs](${BUILDKITE_BUILD_URL}#${BUILDKITE_JOB_ID})"
+  fi
+
   {
-    echo "### FTR Configs — \`${JOB}\` (attempt ${attempt_num})"
+    echo "### FTR Configs — \`${JOB}\` (attempt ${attempt_num})${job_log_link}"
     echo ""
+
     if [[ "$retry_recovered" == "true" ]]; then
       echo "**Recovered on retry** — all previously-failing tests passed; step marked green."
       echo ""
+    elif [[ -n "$failedConfigs" && -n "$prevRunFailedConfigs" ]]; then
+      # On a retry, split failures into persistent (seen before) vs new (regression)
+      local persistentFailures="" newFailures=""
+      while IFS= read -r f; do
+        [[ -z "$f" ]] && continue
+        if grep -qxF "$f" <<< "$prevRunFailedConfigs"; then
+          persistentFailures="${persistentFailures:+${persistentFailures}$'\n'}$f"
+        else
+          newFailures="${newFailures:+${newFailures}$'\n'}$f"
+        fi
+      done <<< "$failedConfigs"
+
+      if [[ -n "$persistentFailures" ]]; then
+        echo "**Still failing** (attempt ${prev_attempt_num} → attempt ${attempt_num}):"
+        while IFS= read -r f; do
+          [[ -n "$f" ]] && echo "- \`$f\`"
+        done <<< "$persistentFailures"
+        echo ""
+      fi
+      if [[ -n "$newFailures" ]]; then
+        echo "**New failures** (passed attempt ${prev_attempt_num}, failed attempt ${attempt_num}):"
+        while IFS= read -r f; do
+          [[ -n "$f" ]] && echo "- \`$f\`"
+        done <<< "$newFailures"
+        echo ""
+      fi
     elif [[ -n "$failedConfigs" ]]; then
       echo "**Failed configs:**"
       while IFS= read -r f; do
@@ -221,6 +272,7 @@ write_job_annotation() {
       done <<< "$failedConfigs"
       echo ""
     fi
+
     if [[ ${#annotation_rows[@]} -gt 0 ]]; then
       echo "| Config | Duration | Status |"
       echo "| --- | --- | --- |"

From 6931aeb87156700677ab761d3ee93ef727404d99 Mon Sep 17 00:00:00 2001
From: Tamerlan Gudabayev <tamerlan.gudabayev@elastic.co>
Date: Tue, 26 May 2026 15:33:20 +0200
Subject: [PATCH 09/30] remove bail

---
 .buildkite/scripts/steps/test/ftr_configs.sh | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/.buildkite/scripts/steps/test/ftr_configs.sh b/.buildkite/scripts/steps/test/ftr_configs.sh
index 14050853e4e88..d8f23b1a0bcf2 100755
--- a/.buildkite/scripts/steps/test/ftr_configs.sh
+++ b/.buildkite/scripts/steps/test/ftr_configs.sh
@@ -66,7 +66,7 @@ while read -r config; do
     continue;
   fi
 
-  FULL_COMMAND="node scripts/functional_tests --bail --config $config $EXTRA_ARGS"
+  FULL_COMMAND="node scripts/functional_tests --config $config $EXTRA_ARGS"
 
   # see if this config has already been executed successfully
   CONFIG_EXECUTION_KEY="${config}_executed"
@@ -105,7 +105,6 @@ while read -r config; do
   # prevent non-zero exit code from breaking the loop
   set +e;
   node ./scripts/functional_tests \
-    --bail \
     --kibana-install-dir "$KIBANA_BUILD_LOCATION" \
     --config="$config" \
     "$EXTRA_ARGS"

From b137ca216bfa03c7bf438f277b7f5f356943f773 Mon Sep 17 00:00:00 2001
From: Tamerlan Gudabayev <tamerlan.gudabayev@elastic.co>
Date: Tue, 26 May 2026 16:29:43 +0200
Subject: [PATCH 10/30] remove view logs link from job annotation

---
 .buildkite/scripts/steps/test/ftr_configs.sh | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/.buildkite/scripts/steps/test/ftr_configs.sh b/.buildkite/scripts/steps/test/ftr_configs.sh
index d8f23b1a0bcf2..b8072f3580d5d 100755
--- a/.buildkite/scripts/steps/test/ftr_configs.sh
+++ b/.buildkite/scripts/steps/test/ftr_configs.sh
@@ -226,13 +226,8 @@ write_job_annotation() {
     style="error"
   fi
 
-  local job_log_link=""
-  if [[ -n "${BUILDKITE_BUILD_URL:-}" && -n "${BUILDKITE_JOB_ID:-}" ]]; then
-    job_log_link=" — [view logs](${BUILDKITE_BUILD_URL}#${BUILDKITE_JOB_ID})"
-  fi
-
   {
-    echo "### FTR Configs — \`${JOB}\` (attempt ${attempt_num})${job_log_link}"
+    echo "### FTR Configs — \`${JOB}\` (attempt ${attempt_num})"
     echo ""
 
     if [[ "$retry_recovered" == "true" ]]; then

From ad1ae9a53c8f1f627ae99dc9b2ee29c9f3f2c864 Mon Sep 17 00:00:00 2001
From: Tamerlan Gudabayev <tamerlan.gudabayev@elastic.co>
Date: Tue, 26 May 2026 16:42:28 +0200
Subject: [PATCH 11/30] show failing test names per config in job annotation

---
 .buildkite/scripts/steps/test/ftr_configs.sh | 33 ++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/.buildkite/scripts/steps/test/ftr_configs.sh b/.buildkite/scripts/steps/test/ftr_configs.sh
index b8072f3580d5d..2d5d0eda4d72c 100755
--- a/.buildkite/scripts/steps/test/ftr_configs.sh
+++ b/.buildkite/scripts/steps/test/ftr_configs.sh
@@ -53,6 +53,7 @@ fi
 
 failedConfigs=""
 results=()
+failure_detail_lines=()
 
 # Capture which configs failed in the previous attempt before the meta-data key is overwritten below.
 # Used in the annotation to distinguish "new failure", "still failing", and "recovered" per config.
@@ -102,6 +103,10 @@ while read -r config; do
   """
   fi
 
+  # Snapshot existing JUnit XML files so we can identify which ones this config produces
+  tmp_xml_before=$(mktemp)
+  find "target/junit/${JOB}" -maxdepth 1 -name "*.xml" 2>/dev/null | sort > "$tmp_xml_before" || true
+
   # prevent non-zero exit code from breaking the loop
   set +e;
   node ./scripts/functional_tests \
@@ -137,6 +142,7 @@ while read -r config; do
 
   config_link="[\`${config}\`](https://github.com/elastic/kibana/blob/${BUILDKITE_COMMIT:-main}/${config})"
   if [ $lastCode -eq 0 ]; then
+    rm -f "$tmp_xml_before"
     # Test was successful, so mark it as executed
     buildkite-agent meta-data set "$CONFIG_EXECUTION_KEY" "true"
     if [[ -n "$prevRunFailedConfigs" ]] && grep -qxF "$config" <<< "$prevRunFailedConfigs"; then
@@ -161,6 +167,28 @@ while read -r config; do
     else
       annotation_rows+=("| ${config_link} | ${duration} | **failed** |")
     fi
+
+    # Find JUnit XML files produced by this config and extract individual failing test names
+    tmp_xml_after=$(mktemp)
+    find "target/junit/${JOB}" -maxdepth 1 -name "*.xml" 2>/dev/null | sort > "$tmp_xml_after" || true
+    new_config_xmls=$(comm -13 "$tmp_xml_before" "$tmp_xml_after" 2>/dev/null | grep -v '^[[:space:]]*$' || true)
+    rm -f "$tmp_xml_before" "$tmp_xml_after"
+    if [[ -n "$new_config_xmls" ]]; then
+      tmp_junit=$(mktemp -d)
+      while IFS= read -r f; do
+        [[ -n "$f" ]] && cp "$f" "$tmp_junit/" 2>/dev/null || true
+      done <<< "$new_config_xmls"
+      config_test_failures=$(node scripts/ftr_check_retry_result list-failures "$tmp_junit" 2>/dev/null || true)
+      rm -rf "$tmp_junit"
+      if [[ -n "$config_test_failures" ]]; then
+        failure_detail_lines+=("**Failing tests — \`${config}\`:**")
+        failure_detail_lines+=("")
+        while IFS= read -r t; do
+          [[ -n "$t" ]] && failure_detail_lines+=("- ${t}")
+        done <<< "$config_test_failures"
+        failure_detail_lines+=("")
+      fi
+    fi
   fi
 done <<< "$configs"
 
@@ -272,6 +300,11 @@ write_job_annotation() {
       echo "| --- | --- | --- |"
       printf "%s\n" "${annotation_rows[@]}"
     fi
+
+    if [[ ${#failure_detail_lines[@]} -gt 0 ]]; then
+      echo ""
+      printf "%s\n" "${failure_detail_lines[@]}"
+    fi
   } | buildkite-agent annotate \
         --scope job \
         --context "ftr-summary" \

From 7f2c559a8bcbd1975c4eb9d480a6c4453f04bfae Mon Sep 17 00:00:00 2001
From: Tamerlan Gudabayev <tamerlan.gudabayev@elastic.co>
Date: Tue, 26 May 2026 17:37:42 +0200
Subject: [PATCH 12/30] =?UTF-8?q?Revert=20"Reapply=20"test(ci):=20TEMP=20a?=
 =?UTF-8?q?dd=20retry-validation=20fixture=20=E2=80=94=20DELETE=20BEFORE?=
 =?UTF-8?q?=20MERGE""?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts commit 443e1cf9c135efb67e4737d1b478772de923a0f0.
---
 .../apis/unused_urls_task/index.ts            |  2 -
 .../retry_validation_delete_before_merge.ts   | 41 -------------------
 2 files changed, 43 deletions(-)
 delete mode 100644 src/platform/test/api_integration/apis/unused_urls_task/retry_validation_delete_before_merge.ts

diff --git a/src/platform/test/api_integration/apis/unused_urls_task/index.ts b/src/platform/test/api_integration/apis/unused_urls_task/index.ts
index eae3bf1227ce4..c6210ed44023d 100644
--- a/src/platform/test/api_integration/apis/unused_urls_task/index.ts
+++ b/src/platform/test/api_integration/apis/unused_urls_task/index.ts
@@ -11,8 +11,6 @@ import type { FtrProviderContext } from '../../ftr_provider_context';
 
 export default function ({ loadTestFile }: FtrProviderContext) {
   describe('unused_urls_task', () => {
-    // TEMPORARY: validates FTR retry intersection logic. Delete before merging this PR.
-    loadTestFile(require.resolve('./retry_validation_delete_before_merge'));
     loadTestFile(require.resolve('./run'));
   });
 }
diff --git a/src/platform/test/api_integration/apis/unused_urls_task/retry_validation_delete_before_merge.ts b/src/platform/test/api_integration/apis/unused_urls_task/retry_validation_delete_before_merge.ts
deleted file mode 100644
index 6a7efea7dcbf4..0000000000000
--- a/src/platform/test/api_integration/apis/unused_urls_task/retry_validation_delete_before_merge.ts
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
- * or more contributor license agreements. Licensed under the "Elastic License
- * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
- * Public License v 1"; you may not use this file except in compliance with, at
- * your election, the "Elastic License 2.0", the "GNU Affero General Public
- * License v3.0 only", or the "Server Side Public License, v 1".
- */
-
-// TEMPORARY: validates the FTR retry intersection logic from this PR. Delete before merge.
-//
-// Scenario:
-//   - Attempt 1 (BUILDKITE_RETRY_COUNT unset / '0'): TEST_A fails. --bail stops the run.
-//     JUnit XML records TEST_A as failed; TEST_B is never reached.
-//   - Attempt 2 (BUILDKITE_RETRY_COUNT == '1'): TEST_A passes (recovered).
-//     TEST_B now fails (simulates an unrelated flake on retry). --bail stops the run.
-//     JUnit XML records TEST_B as failed.
-//
-// Stored prev failures: {TEST_A}.  Current failures: {TEST_B}.  Intersection: ∅.
-// Expected: ftr_configs.sh overrides exit code to 0 and the step turns green.
-
-import type { FtrProviderContext } from '../../ftr_provider_context';
-
-const isFirstAttempt =
-  !process.env.BUILDKITE_RETRY_COUNT || process.env.BUILDKITE_RETRY_COUNT === '0';
-
-export default function ({}: FtrProviderContext) {
-  describe('retry-validation', () => {
-    it('TEST_A: intentionally fails on attempt 1, passes on attempt 2', () => {
-      if (isFirstAttempt) {
-        throw new Error('Intentional first-attempt failure (retry validation)');
-      }
-    });
-
-    it('TEST_B: passes on attempt 1, intentionally fails on attempt 2', () => {
-      if (!isFirstAttempt) {
-        throw new Error('Intentional second-attempt failure (retry validation)');
-      }
-    });
-  });
-}

From b9aa8a4a1f5c4fed562e2a5360319b87959fb1b3 Mon Sep 17 00:00:00 2001
From: Tamerlan Gudabayev <tamerlan.gudabayev@elastic.co>
Date: Wed, 27 May 2026 10:46:38 +0200
Subject: [PATCH 13/30] refactor(ci): simplify ftr_configs.sh annotation and
 failure extraction

Extract inline XML diff logic into collect_config_failures(), remove the
redundant failed-configs summary from write_job_annotation() (the table
rows already show still-failing/new-failure/recovered per config), and
simplify the failedConfigs concatenation.
---
 .buildkite/scripts/steps/test/ftr_configs.sh | 105 ++++++-------------
 1 file changed, 30 insertions(+), 75 deletions(-)

diff --git a/.buildkite/scripts/steps/test/ftr_configs.sh b/.buildkite/scripts/steps/test/ftr_configs.sh
index 2d5d0eda4d72c..306302ac6a0d0 100755
--- a/.buildkite/scripts/steps/test/ftr_configs.sh
+++ b/.buildkite/scripts/steps/test/ftr_configs.sh
@@ -17,15 +17,11 @@ test -z "$EXTRA_ARGS" || buildkite-agent meta-data set "ftr-extra-args" "$EXTRA_
 export JOB="$FTR_CONFIG_GROUP_KEY"
 
 FAILED_CONFIGS_KEY="${BUILDKITE_STEP_ID}${FTR_CONFIG_GROUP_KEY}"
-
 FAILED_TESTS_KEY="${BUILDKITE_STEP_ID}${FTR_CONFIG_GROUP_KEY}_failed_tests"
 
-# a FTR failure will result in the script returning an exit code of 10
 exitCode=0
-
-# Per-config rows for the job annotation summary, plus a flag set when
-# the retry-only-failed logic marks an otherwise-red step green.
 annotation_rows=()
+failure_detail_lines=()
 retry_recovered=false
 
 configs="${FTR_CONFIG:-}"
@@ -53,15 +49,30 @@ fi
 
 failedConfigs=""
 results=()
-failure_detail_lines=()
 
 # Capture which configs failed in the previous attempt before the meta-data key is overwritten below.
-# Used in the annotation to distinguish "new failure", "still failing", and "recovered" per config.
 prevRunFailedConfigs=""
 if [[ "${BUILDKITE_RETRY_COUNT:-0}" -ge "1" ]]; then
   prevRunFailedConfigs=$(buildkite-agent meta-data get "$FAILED_CONFIGS_KEY" --default '' 2>/dev/null || true)
 fi
 
+# Diffs the JUnit XML directory against a pre-run snapshot and returns failing test names.
+collect_config_failures() {
+  local xml_before="$1"
+  local tmp_xml_after new_xmls tmp_junit
+  tmp_xml_after=$(mktemp)
+  find "target/junit/${JOB}" -maxdepth 1 -name "*.xml" 2>/dev/null | sort > "$tmp_xml_after" || true
+  new_xmls=$(comm -13 "$xml_before" "$tmp_xml_after" 2>/dev/null | grep -v '^[[:space:]]*$' || true)
+  rm -f "$tmp_xml_after"
+  [[ -z "$new_xmls" ]] && return
+  tmp_junit=$(mktemp -d)
+  while IFS= read -r f; do
+    [[ -n "$f" ]] && cp "$f" "$tmp_junit/" 2>/dev/null || true
+  done <<< "$new_xmls"
+  node scripts/ftr_check_retry_result list-failures "$tmp_junit" 2>/dev/null || true
+  rm -rf "$tmp_junit"
+}
+
 while read -r config; do
   if [[ ! "$config" ]]; then
     continue;
@@ -143,7 +154,6 @@ while read -r config; do
   config_link="[\`${config}\`](https://github.com/elastic/kibana/blob/${BUILDKITE_COMMIT:-main}/${config})"
   if [ $lastCode -eq 0 ]; then
     rm -f "$tmp_xml_before"
-    # Test was successful, so mark it as executed
     buildkite-agent meta-data set "$CONFIG_EXECUTION_KEY" "true"
     if [[ -n "$prevRunFailedConfigs" ]] && grep -qxF "$config" <<< "$prevRunFailedConfigs"; then
       annotation_rows+=("| ${config_link} | ${duration} | recovered |")
@@ -155,11 +165,8 @@ while read -r config; do
     echo "FTR exited with code $lastCode"
     echo "^^^ +++"
 
-    if [[ "$failedConfigs" ]]; then
-      failedConfigs="${failedConfigs}"$'\n'"$config"
-    else
-      failedConfigs="$config"
-    fi
+    failedConfigs="${failedConfigs:+${failedConfigs}$'\n'}$config"
+
     if [[ -n "$prevRunFailedConfigs" ]] && grep -qxF "$config" <<< "$prevRunFailedConfigs"; then
       annotation_rows+=("| ${config_link} | ${duration} | **still failing** |")
     elif [[ -n "$prevRunFailedConfigs" ]]; then
@@ -168,26 +175,14 @@ while read -r config; do
       annotation_rows+=("| ${config_link} | ${duration} | **failed** |")
     fi
 
-    # Find JUnit XML files produced by this config and extract individual failing test names
-    tmp_xml_after=$(mktemp)
-    find "target/junit/${JOB}" -maxdepth 1 -name "*.xml" 2>/dev/null | sort > "$tmp_xml_after" || true
-    new_config_xmls=$(comm -13 "$tmp_xml_before" "$tmp_xml_after" 2>/dev/null | grep -v '^[[:space:]]*$' || true)
-    rm -f "$tmp_xml_before" "$tmp_xml_after"
-    if [[ -n "$new_config_xmls" ]]; then
-      tmp_junit=$(mktemp -d)
-      while IFS= read -r f; do
-        [[ -n "$f" ]] && cp "$f" "$tmp_junit/" 2>/dev/null || true
-      done <<< "$new_config_xmls"
-      config_test_failures=$(node scripts/ftr_check_retry_result list-failures "$tmp_junit" 2>/dev/null || true)
-      rm -rf "$tmp_junit"
-      if [[ -n "$config_test_failures" ]]; then
-        failure_detail_lines+=("**Failing tests — \`${config}\`:**")
-        failure_detail_lines+=("")
-        while IFS= read -r t; do
-          [[ -n "$t" ]] && failure_detail_lines+=("- ${t}")
-        done <<< "$config_test_failures"
-        failure_detail_lines+=("")
-      fi
+    config_failures=$(collect_config_failures "$tmp_xml_before")
+    rm -f "$tmp_xml_before"
+    if [[ -n "$config_failures" ]]; then
+      failure_detail_lines+=("**Failing tests — \`${config}\`:**" "")
+      while IFS= read -r t; do
+        [[ -n "$t" ]] && failure_detail_lines+=("- ${t}")
+      done <<< "$config_failures"
+      failure_detail_lines+=("")
     fi
   fi
 done <<< "$configs"
@@ -196,7 +191,6 @@ if [[ "$failedConfigs" ]]; then
   buildkite-agent meta-data set "$FAILED_CONFIGS_KEY" "$failedConfigs"
 fi
 
-# --- retry-only-failed feature ---
 # Attempt 1: record the names of failing tests so the retry can evaluate whether they recovered.
 # On the first retry, the step is marked green if every previously-failing test passes — even if
 # a different (previously-passing) test happens to fail on retry.
@@ -237,22 +231,15 @@ if [[ -z "${KIBANA_FLAKY_TEST_RUNNER_CONFIG:-}" && \
     fi
   fi
 fi
-# --- end retry-only-failed feature ---
 
 echo "--- FTR configs complete"
 printf "%s\n" "${results[@]}"
 echo ""
 
 write_job_annotation() {
-  local style attempt_num prev_attempt_num
+  local attempt_num style
   attempt_num=$((${BUILDKITE_RETRY_COUNT:-0} + 1))
-  prev_attempt_num=$((attempt_num - 1))
-
-  if [[ "$exitCode" == "0" ]]; then
-    style="success"
-  else
-    style="error"
-  fi
+  style=$([[ "$exitCode" == "0" ]] && echo "success" || echo "error")
 
   {
     echo "### FTR Configs — \`${JOB}\` (attempt ${attempt_num})"
@@ -261,38 +248,6 @@ write_job_annotation() {
     if [[ "$retry_recovered" == "true" ]]; then
       echo "**Recovered on retry** — all previously-failing tests passed; step marked green."
       echo ""
-    elif [[ -n "$failedConfigs" && -n "$prevRunFailedConfigs" ]]; then
-      # On a retry, split failures into persistent (seen before) vs new (regression)
-      local persistentFailures="" newFailures=""
-      while IFS= read -r f; do
-        [[ -z "$f" ]] && continue
-        if grep -qxF "$f" <<< "$prevRunFailedConfigs"; then
-          persistentFailures="${persistentFailures:+${persistentFailures}$'\n'}$f"
-        else
-          newFailures="${newFailures:+${newFailures}$'\n'}$f"
-        fi
-      done <<< "$failedConfigs"
-
-      if [[ -n "$persistentFailures" ]]; then
-        echo "**Still failing** (attempt ${prev_attempt_num} → attempt ${attempt_num}):"
-        while IFS= read -r f; do
-          [[ -n "$f" ]] && echo "- \`$f\`"
-        done <<< "$persistentFailures"
-        echo ""
-      fi
-      if [[ -n "$newFailures" ]]; then
-        echo "**New failures** (passed attempt ${prev_attempt_num}, failed attempt ${attempt_num}):"
-        while IFS= read -r f; do
-          [[ -n "$f" ]] && echo "- \`$f\`"
-        done <<< "$newFailures"
-        echo ""
-      fi
-    elif [[ -n "$failedConfigs" ]]; then
-      echo "**Failed configs:**"
-      while IFS= read -r f; do
-        [[ -n "$f" ]] && echo "- \`$f\`"
-      done <<< "$failedConfigs"
-      echo ""
     fi
 
     if [[ ${#annotation_rows[@]} -gt 0 ]]; then

From 8ab9a58ab215c8896752f1aa19df95a4e055ea93 Mon Sep 17 00:00:00 2001
From: Tamerlan Gudabayev <tamerlan.gudabayev@elastic.co>
Date: Wed, 27 May 2026 11:17:24 +0200
Subject: [PATCH 14/30] feat(ci): verify explicit passes on retry instead of
 absence of failure

Replace the intersection-of-failures check with collectPassedTestNames,
which requires each previously-failing test to appear as an explicit pass
in the retry JUnit output. This closes three false-green gaps: runner
crash (empty JUnit dir), beforeAll hook failure (tests reported as skipped
rather than failed), and stale XML files from attempt 1 persisting on
persistent-workspace agents.
---
 .../retry_result_checker.test.ts              | 81 ++++++++++++++++++-
 .../retry_result_checker.ts                   | 54 +++++++++----
 2 files changed, 118 insertions(+), 17 deletions(-)

diff --git a/packages/kbn-failed-test-reporter-cli/failed_tests_reporter/retry_result_checker.test.ts b/packages/kbn-failed-test-reporter-cli/failed_tests_reporter/retry_result_checker.test.ts
index 20beb274d9c76..f0c262864e9cc 100644
--- a/packages/kbn-failed-test-reporter-cli/failed_tests_reporter/retry_result_checker.test.ts
+++ b/packages/kbn-failed-test-reporter-cli/failed_tests_reporter/retry_result_checker.test.ts
@@ -11,7 +11,11 @@ import Fs from 'fs';
 import Os from 'os';
 import Path from 'path';
 
-import { collectFailedTestNames, computeIntersection } from './retry_result_checker';
+import {
+  collectFailedTestNames,
+  collectPassedTestNames,
+  computeIntersection,
+} from './retry_result_checker';
 
 // Minimal JUnit XML helpers
 const buildXml = (testcases: string) => `<?xml version="1.0" encoding="utf-8"?>
@@ -25,6 +29,9 @@ const failedCase = (name: string) =>
 const passedCase = (name: string) =>
   `<testcase name="${name}" classname="suite.file" time="1"></testcase>`;
 
+const skippedCase = (name: string) =>
+  `<testcase name="${name}" classname="suite.file" time="1"><skipped/></testcase>`;
+
 const hookFailure = (hookName: string) =>
   `<testcase name='suite "${hookName}" hook' classname="suite.file" time="0"><failure>error</failure></testcase>`;
 
@@ -73,6 +80,78 @@ describe('collectFailedTestNames', () => {
   });
 });
 
+describe('collectPassedTestNames', () => {
+  let tmpDir: string;
+
+  beforeEach(() => {
+    tmpDir = Fs.mkdtempSync(Path.join(Os.tmpdir(), 'retry-checker-test-'));
+  });
+
+  afterEach(() => {
+    Fs.rmSync(tmpDir, { recursive: true, force: true });
+  });
+
+  it('returns only tests that passed (no failure, no skipped)', async () => {
+    Fs.writeFileSync(
+      Path.join(tmpDir, 'TEST-report.xml'),
+      buildXml(passedCase('test A') + failedCase('test B') + skippedCase('test C'))
+    );
+    const names = await collectPassedTestNames(tmpDir);
+    expect([...names]).toEqual(['test A']);
+  });
+
+  it('does not count skipped tests as passed (beforeAll hook scenario)', async () => {
+    Fs.writeFileSync(Path.join(tmpDir, 'TEST-a.xml'), buildXml(skippedCase('test A')));
+    const names = await collectPassedTestNames(tmpDir);
+    expect(names.size).toBe(0);
+  });
+
+  it('returns empty set when no XML files exist (runner crash scenario)', async () => {
+    const names = await collectPassedTestNames(tmpDir);
+    expect(names.size).toBe(0);
+  });
+
+  it('aggregates passed tests across multiple XML files', async () => {
+    Fs.writeFileSync(Path.join(tmpDir, 'TEST-a.xml'), buildXml(passedCase('test A')));
+    Fs.writeFileSync(
+      Path.join(tmpDir, 'TEST-b.xml'),
+      buildXml(passedCase('test B') + failedCase('test C'))
+    );
+    const names = await collectPassedTestNames(tmpDir);
+    expect([...names].sort()).toEqual(['test A', 'test B']);
+  });
+
+  it('finds a recovered test even when a stale attempt-1 XML is present', async () => {
+    // Stale file from attempt 1 where the test failed
+    Fs.writeFileSync(
+      Path.join(tmpDir, 'TEST-attempt1-bk__OLD.xml'),
+      buildXml(failedCase('test A'))
+    );
+    // New file from attempt 2 where the test passes
+    Fs.writeFileSync(
+      Path.join(tmpDir, 'TEST-attempt2-bk__NEW.xml'),
+      buildXml(passedCase('test A'))
+    );
+    const names = await collectPassedTestNames(tmpDir);
+    expect(names.has('test A')).toBe(true);
+  });
+
+  it('does not count a test as passed when it fails in both attempts (stale XMLs present)', async () => {
+    // Stale file from attempt 1: test A failed
+    Fs.writeFileSync(
+      Path.join(tmpDir, 'TEST-attempt1-bk__OLD.xml'),
+      buildXml(failedCase('test A'))
+    );
+    // New file from attempt 2: test A still fails
+    Fs.writeFileSync(
+      Path.join(tmpDir, 'TEST-attempt2-bk__NEW.xml'),
+      buildXml(failedCase('test A'))
+    );
+    const names = await collectPassedTestNames(tmpDir);
+    expect(names.has('test A')).toBe(false);
+  });
+});
+
 describe('computeIntersection', () => {
   it('returns empty when no overlap', () => {
     const prev = new Set(['test A', 'test B']);
diff --git a/packages/kbn-failed-test-reporter-cli/failed_tests_reporter/retry_result_checker.ts b/packages/kbn-failed-test-reporter-cli/failed_tests_reporter/retry_result_checker.ts
index c6e309ff4eb66..0aced4292757f 100644
--- a/packages/kbn-failed-test-reporter-cli/failed_tests_reporter/retry_result_checker.ts
+++ b/packages/kbn-failed-test-reporter-cli/failed_tests_reporter/retry_result_checker.ts
@@ -15,7 +15,7 @@ import { run } from '@kbn/dev-cli-runner';
 import globby from 'globby';
 import normalize from 'normalize-path';
 
-import { makeFailedTestCaseIter, readTestReport } from './test_report';
+import { makeFailedTestCaseIter, makeTestCaseIter, readTestReport } from './test_report';
 
 export async function collectFailedTestNames(junitDir: string): Promise<Set<string>> {
   const xmlPaths = await globby(normalize(Path.resolve(junitDir, '*.xml')), { absolute: true });
@@ -23,7 +23,27 @@ export async function collectFailedTestNames(junitDir: string): Promise<Set<stri
   for (const xmlPath of xmlPaths) {
     const report = await readTestReport(xmlPath);
     for (const testCase of makeFailedTestCaseIter(report)) {
-      names.add(testCase.$.name);
+      names.add(testCase.$.name.trim());
+    }
+  }
+  return names;
+}
+
+/**
+ * Returns the names of test cases that completed without failure and without being skipped.
+ * Used on retry to verify previously-failing tests explicitly passed, not merely that they
+ * were absent from results (e.g. runner crash, beforeAll hook failure, or stale XML files
+ * from the previous attempt coexisting on a persistent-workspace agent).
+ */
+export async function collectPassedTestNames(junitDir: string): Promise<Set<string>> {
+  const xmlPaths = await globby(normalize(Path.resolve(junitDir, '*.xml')), { absolute: true });
+  const names = new Set<string>();
+  for (const xmlPath of xmlPaths) {
+    const report = await readTestReport(xmlPath);
+    for (const testCase of makeTestCaseIter(report)) {
+      if (!testCase.failure && !testCase.skipped) {
+        names.add(testCase.$.name.trim());
+      }
     }
   }
   return names;
@@ -74,20 +94,22 @@ export function runRetryResultCheckerCli() {
           return;
         }
 
-        const currentFailed = await collectFailedTestNames(junitDir);
-        const intersection = computeIntersection(prevFailed, currentFailed);
-
-        if (intersection.length === 0) {
-          // Known limitation: if a different test fails before reaching the previously-failing test on retry (due to --bail), the intersection will appear empty and the step will be marked green even though the original failing test was never verified.
-          // We could possibly drop the bail flag.
-          log.success(
-            `All ${prevFailed.size} previously-failing test(s) either passed or did not run on retry`
-          );
+        // Require every previously-failing test to appear as an explicit pass on retry.
+        // Checking for explicit passes (rather than absence of failure) guards against
+        // three false-green scenarios: (a) the runner crashes before reaching the test
+        // leaving an empty JUnit directory, (b) a beforeAll hook failure causes the test
+        // to be reported as skipped rather than failed, and (c) stale XML files from the
+        // previous attempt persist in the directory on a persistent-workspace agent.
+        const currentPassed = await collectPassedTestNames(junitDir);
+        const notRecovered = [...prevFailed].filter((name) => !currentPassed.has(name));
+
+        if (notRecovered.length === 0) {
+          log.success(`All ${prevFailed.size} previously-failing test(s) passed on retry`);
           return;
         }
 
-        log.error(`${intersection.length} test(s) failed in both attempts:`);
-        for (const name of intersection) {
+        log.error(`${notRecovered.length} test(s) did not pass on retry:`);
+        for (const name of notRecovered) {
           log.error(`  ${name}`);
         }
         process.exit(1);
@@ -107,9 +129,9 @@ export function runRetryResultCheckerCli() {
             the given directory. Used to capture attempt-1 failures before retry.
 
           check-intersection --junit-dir <dir> --prev-failures-file <file>
-            Compares the failed tests in <dir> against the names in <file>.
-            Exits 0 if the intersection is empty (previously-failing tests recovered).
-            Exits 1 if any previously-failing test still fails.
+            Checks whether every test named in <file> appears as an explicit pass in <dir>.
+            Exits 0 if all previously-failing tests passed (step can be marked green).
+            Exits 1 if any previously-failing test did not pass (still failing, skipped, or absent).
       `,
       flags: {
         string: ['junit-dir', 'prev-failures-file'],

From 924635cb5ed4fde3aba3f7db3eb34123cc3ffd68 Mon Sep 17 00:00:00 2001
From: Tamerlan Gudabayev <tamerlan.gudabayev@elastic.co>
Date: Wed, 27 May 2026 11:17:34 +0200
Subject: [PATCH 15/30] fix(ci): guard scout reporter error, log smart-retry
 inactivity, clarify recovered message

- Wrap scout upload in set+e so a non-zero exit code does not abort the
  config loop; log the exit code and continue rather than silently swallowing it
- Log a clear message when smart-retry is inactive on a third-or-later manual retry
- Update the "recovered on retry" annotation to note that new failures on retry
  are not counted against recovery
---
 .buildkite/scripts/steps/test/ftr_configs.sh | 25 ++++++++++++++++----
 1 file changed, 20 insertions(+), 5 deletions(-)

diff --git a/.buildkite/scripts/steps/test/ftr_configs.sh b/.buildkite/scripts/steps/test/ftr_configs.sh
index 306302ac6a0d0..47b7c7627f707 100755
--- a/.buildkite/scripts/steps/test/ftr_configs.sh
+++ b/.buildkite/scripts/steps/test/ftr_configs.sh
@@ -127,13 +127,19 @@ while read -r config; do
   lastCode=$?
   set -e;
 
-  # Scout reporter
+  # Scout reporter — run under set+e so a failure here does not abort the config loop
   if [[ "${SCOUT_REPORTER_ENABLED:-}" =~ ^(1|true)$ ]]; then
-    # Upload events after running each config
     echo "Upload Scout reporter events to AppEx QA's team cluster for config $config"
+    set +e
     node scripts/scout upload-events --dontFailOnError
-    echo "Upload successful, removing local events at .scout/reports"
-    rm -rf .scout/reports
+    scout_upload_code=$?
+    set -e
+    if [[ $scout_upload_code -ne 0 ]]; then
+      echo "Scout reporter upload exited $scout_upload_code (continuing)"
+    else
+      echo "Upload successful, removing local events at .scout/reports"
+      rm -rf .scout/reports
+    fi
   else
     echo "SCOUT_REPORTER_ENABLED=$SCOUT_REPORTER_ENABLED, skipping event upload."
   fi
@@ -191,6 +197,13 @@ if [[ "$failedConfigs" ]]; then
   buildkite-agent meta-data set "$FAILED_CONFIGS_KEY" "$failedConfigs"
 fi
 
+# smart-retry is only active for attempt 1 (store) and attempt 2 (check).
+# On a manual third-or-later retry it is silently inactive; log that so CI debugging is easier.
+if [[ -z "${KIBANA_FLAKY_TEST_RUNNER_CONFIG:-}" && \
+      "${BUILDKITE_RETRY_COUNT:-0}" -ge "2" && "$exitCode" != "0" ]]; then
+  echo "--- [smart-retry] inactive on attempt $((${BUILDKITE_RETRY_COUNT:-0} + 1)) — only applies to the first automatic retry"
+fi
+
 # Attempt 1: record the names of failing tests so the retry can evaluate whether they recovered.
 # On the first retry, the step is marked green if every previously-failing test passes — even if
 # a different (previously-passing) test happens to fail on retry.
@@ -246,7 +259,9 @@ write_job_annotation() {
     echo ""
 
     if [[ "$retry_recovered" == "true" ]]; then
-      echo "**Recovered on retry** — all previously-failing tests passed; step marked green."
+      echo "**Recovered on retry** — all originally-failing tests passed; step marked green."
+      echo ""
+      echo "> Configs shown as 'still failing' below introduced *new* failures on retry that were not part of the original failure set and are not counted against recovery."
       echo ""
     fi
 

From 989bd710b17a36699da2d948cc28a5e9de18b882 Mon Sep 17 00:00:00 2001
From: Tamerlan Gudabayev <tamerlan.gudabayev@elastic.co>
Date: Wed, 27 May 2026 11:24:38 +0200
Subject: [PATCH 16/30] refactor(ci): split ftr_configs.sh into focused helper
 files

Extract smart-retry logic into ftr_smart_retry.sh (store_failing_tests /
apply_smart_retry) and annotation helpers into ftr_job_annotation.sh
(collect_config_failures / write_job_annotation). The main script is now
a thin orchestrator that sources both and reads as a linear narrative.
---
 .buildkite/scripts/steps/test/ftr_configs.sh  | 104 +-----------------
 .../scripts/steps/test/ftr_job_annotation.sh  |  52 +++++++++
 .../scripts/steps/test/ftr_smart_retry.sh     |  62 +++++++++++
 3 files changed, 120 insertions(+), 98 deletions(-)
 create mode 100644 .buildkite/scripts/steps/test/ftr_job_annotation.sh
 create mode 100644 .buildkite/scripts/steps/test/ftr_smart_retry.sh

diff --git a/.buildkite/scripts/steps/test/ftr_configs.sh b/.buildkite/scripts/steps/test/ftr_configs.sh
index 47b7c7627f707..05bcd2c5c789b 100755
--- a/.buildkite/scripts/steps/test/ftr_configs.sh
+++ b/.buildkite/scripts/steps/test/ftr_configs.sh
@@ -3,6 +3,8 @@
 set -euo pipefail
 
 source .buildkite/scripts/steps/functional/common.sh
+source .buildkite/scripts/steps/test/ftr_smart_retry.sh
+source .buildkite/scripts/steps/test/ftr_job_annotation.sh
 
 BUILDKITE_PARALLEL_JOB=${BUILDKITE_PARALLEL_JOB:-}
 FTR_CONFIG_GROUP_KEY=${FTR_CONFIG_GROUP_KEY:-}
@@ -26,8 +28,8 @@ retry_recovered=false
 
 configs="${FTR_CONFIG:-}"
 
-# The first retry should only run the configs that failed in the previous attempt
-# Any subsequent retries, which would generally only happen by someone clicking the button in the UI, will run everything
+# The first retry should only run the configs that failed in the previous attempt.
+# Any subsequent retries (generally triggered manually) will run everything.
 if [[ ! "$configs" && "${BUILDKITE_RETRY_COUNT:-0}" == "1" ]]; then
   configs=$(buildkite-agent meta-data get "$FAILED_CONFIGS_KEY" --default '')
   if [[ "$configs" ]]; then
@@ -56,23 +58,6 @@ if [[ "${BUILDKITE_RETRY_COUNT:-0}" -ge "1" ]]; then
   prevRunFailedConfigs=$(buildkite-agent meta-data get "$FAILED_CONFIGS_KEY" --default '' 2>/dev/null || true)
 fi
 
-# Diffs the JUnit XML directory against a pre-run snapshot and returns failing test names.
-collect_config_failures() {
-  local xml_before="$1"
-  local tmp_xml_after new_xmls tmp_junit
-  tmp_xml_after=$(mktemp)
-  find "target/junit/${JOB}" -maxdepth 1 -name "*.xml" 2>/dev/null | sort > "$tmp_xml_after" || true
-  new_xmls=$(comm -13 "$xml_before" "$tmp_xml_after" 2>/dev/null | grep -v '^[[:space:]]*$' || true)
-  rm -f "$tmp_xml_after"
-  [[ -z "$new_xmls" ]] && return
-  tmp_junit=$(mktemp -d)
-  while IFS= read -r f; do
-    [[ -n "$f" ]] && cp "$f" "$tmp_junit/" 2>/dev/null || true
-  done <<< "$new_xmls"
-  node scripts/ftr_check_retry_result list-failures "$tmp_junit" 2>/dev/null || true
-  rm -rf "$tmp_junit"
-}
-
 while read -r config; do
   if [[ ! "$config" ]]; then
     continue;
@@ -197,90 +182,13 @@ if [[ "$failedConfigs" ]]; then
   buildkite-agent meta-data set "$FAILED_CONFIGS_KEY" "$failedConfigs"
 fi
 
-# smart-retry is only active for attempt 1 (store) and attempt 2 (check).
-# On a manual third-or-later retry it is silently inactive; log that so CI debugging is easier.
-if [[ -z "${KIBANA_FLAKY_TEST_RUNNER_CONFIG:-}" && \
-      "${BUILDKITE_RETRY_COUNT:-0}" -ge "2" && "$exitCode" != "0" ]]; then
-  echo "--- [smart-retry] inactive on attempt $((${BUILDKITE_RETRY_COUNT:-0} + 1)) — only applies to the first automatic retry"
-fi
-
-# Attempt 1: record the names of failing tests so the retry can evaluate whether they recovered.
-# On the first retry, the step is marked green if every previously-failing test passes — even if
-# a different (previously-passing) test happens to fail on retry.
-if [[ -z "${KIBANA_FLAKY_TEST_RUNNER_CONFIG:-}" && \
-      "${BUILDKITE_RETRY_COUNT:-0}" == "0" && "$exitCode" != "0" ]]; then
-  junitDir="target/junit/$JOB"
-  if [ -d "$junitDir" ]; then
-    failedTestNames=$(node scripts/ftr_check_retry_result list-failures "$junitDir" 2>/dev/null || true)
-    if [[ "$failedTestNames" ]]; then
-      buildkite-agent meta-data set "$FAILED_TESTS_KEY" "$failedTestNames"
-      echo "Stored $(echo "$failedTestNames" | wc -l | tr -d ' ') previously-failing test name(s) for retry evaluation"
-    fi
-  fi
-fi
-
-# Attempt 2: check whether the failures from attempt 1 are still failing.
-# If every previously-failing test now passes, mark the step green.
-if [[ -z "${KIBANA_FLAKY_TEST_RUNNER_CONFIG:-}" && \
-      "${BUILDKITE_RETRY_COUNT:-0}" == "1" && "$exitCode" != "0" ]]; then
-  prevFailedTests=$(buildkite-agent meta-data get "$FAILED_TESTS_KEY" --default '' 2>/dev/null || true)
-  if [[ "$prevFailedTests" ]]; then
-    junitDir="target/junit/$JOB"
-    tmpPrevFile=$(mktemp)
-    printf '%s' "$prevFailedTests" > "$tmpPrevFile"
-    set +e
-    node scripts/ftr_check_retry_result check-intersection \
-      --junit-dir "$junitDir" \
-      --prev-failures-file "$tmpPrevFile"
-    intersectionCode=$?
-    set -e
-    rm -f "$tmpPrevFile"
-    if [[ "$intersectionCode" == "0" ]]; then
-      echo "--- [retry-only-failed] All previously-failing tests recovered on retry — marking step green"
-      exitCode=0
-      failedConfigs=""
-      retry_recovered=true
-      buildkite-agent meta-data set "$FAILED_CONFIGS_KEY" "" 2>/dev/null || true
-    fi
-  fi
-fi
+store_failing_tests  # attempt 1: record what failed so the retry can verify recovery
+apply_smart_retry    # attempt 2: mark green if all previously-failing tests explicitly passed
 
 echo "--- FTR configs complete"
 printf "%s\n" "${results[@]}"
 echo ""
 
-write_job_annotation() {
-  local attempt_num style
-  attempt_num=$((${BUILDKITE_RETRY_COUNT:-0} + 1))
-  style=$([[ "$exitCode" == "0" ]] && echo "success" || echo "error")
-
-  {
-    echo "### FTR Configs — \`${JOB}\` (attempt ${attempt_num})"
-    echo ""
-
-    if [[ "$retry_recovered" == "true" ]]; then
-      echo "**Recovered on retry** — all originally-failing tests passed; step marked green."
-      echo ""
-      echo "> Configs shown as 'still failing' below introduced *new* failures on retry that were not part of the original failure set and are not counted against recovery."
-      echo ""
-    fi
-
-    if [[ ${#annotation_rows[@]} -gt 0 ]]; then
-      echo "| Config | Duration | Status |"
-      echo "| --- | --- | --- |"
-      printf "%s\n" "${annotation_rows[@]}"
-    fi
-
-    if [[ ${#failure_detail_lines[@]} -gt 0 ]]; then
-      echo ""
-      printf "%s\n" "${failure_detail_lines[@]}"
-    fi
-  } | buildkite-agent annotate \
-        --scope job \
-        --context "ftr-summary" \
-        --style "${style}" || true
-}
-
 write_job_annotation
 
 exit $exitCode
diff --git a/.buildkite/scripts/steps/test/ftr_job_annotation.sh b/.buildkite/scripts/steps/test/ftr_job_annotation.sh
new file mode 100644
index 0000000000000..4bea31b11b5c5
--- /dev/null
+++ b/.buildkite/scripts/steps/test/ftr_job_annotation.sh
@@ -0,0 +1,52 @@
+# Sourced by ftr_configs.sh — do not execute directly.
+# Reads globals: exitCode, retry_recovered, annotation_rows, failure_detail_lines,
+# JOB, BUILDKITE_RETRY_COUNT, BUILDKITE_COMMIT.
+
+# Diffs the JUnit XML directory against a pre-run snapshot and prints failing test names.
+collect_config_failures() {
+  local xml_before="$1"
+  local tmp_xml_after new_xmls tmp_junit
+  tmp_xml_after=$(mktemp)
+  find "target/junit/${JOB}" -maxdepth 1 -name "*.xml" 2>/dev/null | sort > "$tmp_xml_after" || true
+  new_xmls=$(comm -13 "$xml_before" "$tmp_xml_after" 2>/dev/null | grep -v '^[[:space:]]*$' || true)
+  rm -f "$tmp_xml_after"
+  [[ -z "$new_xmls" ]] && return
+  tmp_junit=$(mktemp -d)
+  while IFS= read -r f; do
+    [[ -n "$f" ]] && cp "$f" "$tmp_junit/" 2>/dev/null || true
+  done <<< "$new_xmls"
+  node scripts/ftr_check_retry_result list-failures "$tmp_junit" 2>/dev/null || true
+  rm -rf "$tmp_junit"
+}
+
+write_job_annotation() {
+  local attempt_num style
+  attempt_num=$((${BUILDKITE_RETRY_COUNT:-0} + 1))
+  style=$([[ "$exitCode" == "0" ]] && echo "success" || echo "error")
+
+  {
+    echo "### FTR Configs — \`${JOB}\` (attempt ${attempt_num})"
+    echo ""
+
+    if [[ "$retry_recovered" == "true" ]]; then
+      echo "**Recovered on retry** — all originally-failing tests passed; step marked green."
+      echo ""
+      echo "> Configs shown as 'still failing' below introduced *new* failures on retry that were not part of the original failure set and are not counted against recovery."
+      echo ""
+    fi
+
+    if [[ ${#annotation_rows[@]} -gt 0 ]]; then
+      echo "| Config | Duration | Status |"
+      echo "| --- | --- | --- |"
+      printf "%s\n" "${annotation_rows[@]}"
+    fi
+
+    if [[ ${#failure_detail_lines[@]} -gt 0 ]]; then
+      echo ""
+      printf "%s\n" "${failure_detail_lines[@]}"
+    fi
+  } | buildkite-agent annotate \
+        --scope job \
+        --context "ftr-summary" \
+        --style "${style}" || true
+}
diff --git a/.buildkite/scripts/steps/test/ftr_smart_retry.sh b/.buildkite/scripts/steps/test/ftr_smart_retry.sh
new file mode 100644
index 0000000000000..8368fd595f7c0
--- /dev/null
+++ b/.buildkite/scripts/steps/test/ftr_smart_retry.sh
@@ -0,0 +1,62 @@
+# Sourced by ftr_configs.sh — do not execute directly.
+# Reads/writes globals: exitCode, failedConfigs, retry_recovered,
+# FAILED_TESTS_KEY, FAILED_CONFIGS_KEY, JOB, BUILDKITE_RETRY_COUNT.
+
+# Called after attempt 1: stores failing test names so the retry can verify recovery.
+store_failing_tests() {
+  [[ -n "${KIBANA_FLAKY_TEST_RUNNER_CONFIG:-}" ]] && return
+  [[ "${BUILDKITE_RETRY_COUNT:-0}" != "0" ]] && return
+  [[ "$exitCode" == "0" ]] && return
+
+  local junitDir="target/junit/$JOB"
+  [[ -d "$junitDir" ]] || return
+
+  local failedTestNames
+  failedTestNames=$(node scripts/ftr_check_retry_result list-failures "$junitDir" 2>/dev/null || true)
+  if [[ "$failedTestNames" ]]; then
+    buildkite-agent meta-data set "$FAILED_TESTS_KEY" "$failedTestNames"
+    echo "Stored $(echo "$failedTestNames" | wc -l | tr -d ' ') previously-failing test name(s) for retry evaluation"
+  fi
+}
+
+# Called after attempt 2: marks the step green if all previously-failing tests explicitly passed.
+# On a third-or-later manual retry, logs that smart-retry is inactive.
+apply_smart_retry() {
+  [[ -n "${KIBANA_FLAKY_TEST_RUNNER_CONFIG:-}" ]] && return
+  [[ "$exitCode" == "0" ]] && return
+
+  local retryCount="${BUILDKITE_RETRY_COUNT:-0}"
+
+  if [[ "$retryCount" -ge "2" ]]; then
+    echo "--- [smart-retry] inactive on attempt $((retryCount + 1)) — only applies to the first automatic retry"
+    return
+  fi
+
+  [[ "$retryCount" != "1" ]] && return
+
+  local prevFailedTests
+  prevFailedTests=$(buildkite-agent meta-data get "$FAILED_TESTS_KEY" --default '' 2>/dev/null || true)
+  [[ "$prevFailedTests" ]] || return
+
+  local junitDir="target/junit/$JOB"
+  local tmpPrevFile
+  tmpPrevFile=$(mktemp)
+  printf '%s' "$prevFailedTests" > "$tmpPrevFile"
+
+  local intersectionCode
+  set +e
+  node scripts/ftr_check_retry_result check-intersection \
+    --junit-dir "$junitDir" \
+    --prev-failures-file "$tmpPrevFile"
+  intersectionCode=$?
+  set -e
+  rm -f "$tmpPrevFile"
+
+  if [[ "$intersectionCode" == "0" ]]; then
+    echo "--- [smart-retry] All previously-failing tests recovered on retry — marking step green"
+    exitCode=0
+    failedConfigs=""
+    retry_recovered=true
+    buildkite-agent meta-data set "$FAILED_CONFIGS_KEY" "" 2>/dev/null || true
+  fi
+}

From 8238bd3a0b8e40149f8650737426bc4f009b8aa8 Mon Sep 17 00:00:00 2001
From: Tamerlan Gudabayev <tamerlan.gudabayev@elastic.co>
Date: Wed, 27 May 2026 14:34:48 +0200
Subject: [PATCH 17/30] refactor(ci): move XML diff dance and temp-file
 plumbing into the Node CLI

Add snapshot/list-new-failures subcommands to ftr_check_retry_result so
the per-config JUnit attribution diff lives in TypeScript rather than bash
(find/sort/comm/cp plumbing). Add --prev-failures-stdin to check-intersection
so the temp-file handshake in apply_smart_retry becomes a plain pipe.

The bash loop now reads as two CLI calls per config instead of managing
temp files and directory diffs inline.
---
 .buildkite/scripts/steps/test/ftr_configs.sh  |   8 +-
 .../scripts/steps/test/ftr_job_annotation.sh  |  17 ---
 .../scripts/steps/test/ftr_smart_retry.sh     |   8 +-
 .../retry_result_checker.test.ts              |  45 ++++++++
 .../retry_result_checker.ts                   | 105 ++++++++++++++++--
 5 files changed, 144 insertions(+), 39 deletions(-)

diff --git a/.buildkite/scripts/steps/test/ftr_configs.sh b/.buildkite/scripts/steps/test/ftr_configs.sh
index 05bcd2c5c789b..5885ea97ddc87 100755
--- a/.buildkite/scripts/steps/test/ftr_configs.sh
+++ b/.buildkite/scripts/steps/test/ftr_configs.sh
@@ -99,9 +99,7 @@ while read -r config; do
   """
   fi
 
-  # Snapshot existing JUnit XML files so we can identify which ones this config produces
-  tmp_xml_before=$(mktemp)
-  find "target/junit/${JOB}" -maxdepth 1 -name "*.xml" 2>/dev/null | sort > "$tmp_xml_before" || true
+  node scripts/ftr_check_retry_result snapshot "target/junit/$JOB" 2>/dev/null || true
 
   # prevent non-zero exit code from breaking the loop
   set +e;
@@ -144,7 +142,6 @@ while read -r config; do
 
   config_link="[\`${config}\`](https://github.com/elastic/kibana/blob/${BUILDKITE_COMMIT:-main}/${config})"
   if [ $lastCode -eq 0 ]; then
-    rm -f "$tmp_xml_before"
     buildkite-agent meta-data set "$CONFIG_EXECUTION_KEY" "true"
     if [[ -n "$prevRunFailedConfigs" ]] && grep -qxF "$config" <<< "$prevRunFailedConfigs"; then
       annotation_rows+=("| ${config_link} | ${duration} | recovered |")
@@ -166,8 +163,7 @@ while read -r config; do
       annotation_rows+=("| ${config_link} | ${duration} | **failed** |")
     fi
 
-    config_failures=$(collect_config_failures "$tmp_xml_before")
-    rm -f "$tmp_xml_before"
+    config_failures=$(node scripts/ftr_check_retry_result list-new-failures "target/junit/$JOB" 2>/dev/null || true)
     if [[ -n "$config_failures" ]]; then
       failure_detail_lines+=("**Failing tests — \`${config}\`:**" "")
       while IFS= read -r t; do
diff --git a/.buildkite/scripts/steps/test/ftr_job_annotation.sh b/.buildkite/scripts/steps/test/ftr_job_annotation.sh
index 4bea31b11b5c5..5c45652302f2d 100644
--- a/.buildkite/scripts/steps/test/ftr_job_annotation.sh
+++ b/.buildkite/scripts/steps/test/ftr_job_annotation.sh
@@ -2,23 +2,6 @@
 # Reads globals: exitCode, retry_recovered, annotation_rows, failure_detail_lines,
 # JOB, BUILDKITE_RETRY_COUNT, BUILDKITE_COMMIT.
 
-# Diffs the JUnit XML directory against a pre-run snapshot and prints failing test names.
-collect_config_failures() {
-  local xml_before="$1"
-  local tmp_xml_after new_xmls tmp_junit
-  tmp_xml_after=$(mktemp)
-  find "target/junit/${JOB}" -maxdepth 1 -name "*.xml" 2>/dev/null | sort > "$tmp_xml_after" || true
-  new_xmls=$(comm -13 "$xml_before" "$tmp_xml_after" 2>/dev/null | grep -v '^[[:space:]]*$' || true)
-  rm -f "$tmp_xml_after"
-  [[ -z "$new_xmls" ]] && return
-  tmp_junit=$(mktemp -d)
-  while IFS= read -r f; do
-    [[ -n "$f" ]] && cp "$f" "$tmp_junit/" 2>/dev/null || true
-  done <<< "$new_xmls"
-  node scripts/ftr_check_retry_result list-failures "$tmp_junit" 2>/dev/null || true
-  rm -rf "$tmp_junit"
-}
-
 write_job_annotation() {
   local attempt_num style
   attempt_num=$((${BUILDKITE_RETRY_COUNT:-0} + 1))
diff --git a/.buildkite/scripts/steps/test/ftr_smart_retry.sh b/.buildkite/scripts/steps/test/ftr_smart_retry.sh
index 8368fd595f7c0..86e172f298f98 100644
--- a/.buildkite/scripts/steps/test/ftr_smart_retry.sh
+++ b/.buildkite/scripts/steps/test/ftr_smart_retry.sh
@@ -39,18 +39,14 @@ apply_smart_retry() {
   [[ "$prevFailedTests" ]] || return
 
   local junitDir="target/junit/$JOB"
-  local tmpPrevFile
-  tmpPrevFile=$(mktemp)
-  printf '%s' "$prevFailedTests" > "$tmpPrevFile"
 
   local intersectionCode
   set +e
-  node scripts/ftr_check_retry_result check-intersection \
+  printf '%s' "$prevFailedTests" | node scripts/ftr_check_retry_result check-intersection \
     --junit-dir "$junitDir" \
-    --prev-failures-file "$tmpPrevFile"
+    --prev-failures-stdin
   intersectionCode=$?
   set -e
-  rm -f "$tmpPrevFile"
 
   if [[ "$intersectionCode" == "0" ]]; then
     echo "--- [smart-retry] All previously-failing tests recovered on retry — marking step green"
diff --git a/packages/kbn-failed-test-reporter-cli/failed_tests_reporter/retry_result_checker.test.ts b/packages/kbn-failed-test-reporter-cli/failed_tests_reporter/retry_result_checker.test.ts
index f0c262864e9cc..a8c0928ee261c 100644
--- a/packages/kbn-failed-test-reporter-cli/failed_tests_reporter/retry_result_checker.test.ts
+++ b/packages/kbn-failed-test-reporter-cli/failed_tests_reporter/retry_result_checker.test.ts
@@ -13,8 +13,10 @@ import Path from 'path';
 
 import {
   collectFailedTestNames,
+  collectNewFailedTestNames,
   collectPassedTestNames,
   computeIntersection,
+  snapshotJunitDir,
 } from './retry_result_checker';
 
 // Minimal JUnit XML helpers
@@ -152,6 +154,49 @@ describe('collectPassedTestNames', () => {
   });
 });
 
+describe('snapshotJunitDir + collectNewFailedTestNames', () => {
+  let tmpDir: string;
+
+  beforeEach(() => {
+    tmpDir = Fs.mkdtempSync(Path.join(Os.tmpdir(), 'retry-checker-test-'));
+  });
+
+  afterEach(() => {
+    Fs.rmSync(tmpDir, { recursive: true, force: true });
+  });
+
+  it('returns only failures from XMLs written after the snapshot', async () => {
+    Fs.writeFileSync(Path.join(tmpDir, 'TEST-existing.xml'), buildXml(failedCase('old failure')));
+    await snapshotJunitDir(tmpDir);
+    Fs.writeFileSync(Path.join(tmpDir, 'TEST-new.xml'), buildXml(failedCase('new failure')));
+
+    const names = await collectNewFailedTestNames(tmpDir);
+    expect([...names]).toEqual(['new failure']);
+  });
+
+  it('deletes the snapshot file after reading', async () => {
+    await snapshotJunitDir(tmpDir);
+    await collectNewFailedTestNames(tmpDir);
+
+    expect(Fs.existsSync(Path.join(tmpDir, '.smart_retry_snapshot'))).toBe(false);
+  });
+
+  it('treats all XMLs as new when no snapshot exists', async () => {
+    Fs.writeFileSync(Path.join(tmpDir, 'TEST-a.xml'), buildXml(failedCase('test A')));
+
+    const names = await collectNewFailedTestNames(tmpDir);
+    expect([...names]).toEqual(['test A']);
+  });
+
+  it('returns empty set when no new XMLs were written after the snapshot', async () => {
+    Fs.writeFileSync(Path.join(tmpDir, 'TEST-existing.xml'), buildXml(failedCase('old failure')));
+    await snapshotJunitDir(tmpDir);
+
+    const names = await collectNewFailedTestNames(tmpDir);
+    expect(names.size).toBe(0);
+  });
+});
+
 describe('computeIntersection', () => {
   it('returns empty when no overlap', () => {
     const prev = new Set(['test A', 'test B']);
diff --git a/packages/kbn-failed-test-reporter-cli/failed_tests_reporter/retry_result_checker.ts b/packages/kbn-failed-test-reporter-cli/failed_tests_reporter/retry_result_checker.ts
index 0aced4292757f..0614a75f2f0e3 100644
--- a/packages/kbn-failed-test-reporter-cli/failed_tests_reporter/retry_result_checker.ts
+++ b/packages/kbn-failed-test-reporter-cli/failed_tests_reporter/retry_result_checker.ts
@@ -17,6 +17,8 @@ import normalize from 'normalize-path';
 
 import { makeFailedTestCaseIter, makeTestCaseIter, readTestReport } from './test_report';
 
+const SNAPSHOT_FILE = '.smart_retry_snapshot';
+
 export async function collectFailedTestNames(junitDir: string): Promise<Set<string>> {
   const xmlPaths = await globby(normalize(Path.resolve(junitDir, '*.xml')), { absolute: true });
   const names = new Set<string>();
@@ -49,15 +51,80 @@ export async function collectPassedTestNames(junitDir: string): Promise<Set<stri
   return names;
 }
 
+/**
+ * Writes the current XML file list in junitDir to a snapshot file.
+ * Call this before running a config so list-new-failures can diff against it.
+ */
+export async function snapshotJunitDir(junitDir: string): Promise<void> {
+  const xmlPaths = await globby(normalize(Path.resolve(junitDir, '*.xml')), { absolute: true });
+  Fs.mkdirSync(junitDir, { recursive: true });
+  Fs.writeFileSync(Path.join(junitDir, SNAPSHOT_FILE), JSON.stringify(xmlPaths.sort()));
+}
+
+/**
+ * Reads the snapshot written by snapshotJunitDir, diffs it against the current XML files,
+ * and returns the failing test names from XMLs that were produced after the snapshot.
+ * Deletes the snapshot file after reading.
+ */
+export async function collectNewFailedTestNames(junitDir: string): Promise<Set<string>> {
+  const snapshotPath = Path.join(junitDir, SNAPSHOT_FILE);
+  let before = new Set<string>();
+  if (Fs.existsSync(snapshotPath)) {
+    before = new Set<string>(JSON.parse(Fs.readFileSync(snapshotPath, 'utf8')));
+    Fs.unlinkSync(snapshotPath);
+  }
+  const xmlPaths = await globby(normalize(Path.resolve(junitDir, '*.xml')), { absolute: true });
+  const newPaths = xmlPaths.filter((p) => !before.has(p));
+  const names = new Set<string>();
+  for (const xmlPath of newPaths) {
+    const report = await readTestReport(xmlPath);
+    for (const tc of makeFailedTestCaseIter(report)) {
+      names.add(tc.$.name.trim());
+    }
+  }
+  return names;
+}
+
 export function computeIntersection(prev: Set<string>, current: Set<string>): string[] {
   return [...current].filter((name) => prev.has(name));
 }
 
+const readStdin = (): Promise<string> =>
+  new Promise((resolve, reject) => {
+    const chunks: Buffer[] = [];
+    process.stdin.on('data', (chunk) =>
+      chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk))
+    );
+    process.stdin.on('end', () => resolve(Buffer.concat(chunks).toString('utf8')));
+    process.stdin.on('error', reject);
+  });
+
 export function runRetryResultCheckerCli() {
   run(
     async ({ log, flags }) => {
       const [command, ...rest] = flags._;
 
+      if (command === 'snapshot') {
+        const [junitDir] = rest;
+        if (!junitDir) {
+          throw createFlagError('Usage: snapshot <junit-dir>');
+        }
+        await snapshotJunitDir(junitDir);
+        return;
+      }
+
+      if (command === 'list-new-failures') {
+        const [junitDir] = rest;
+        if (!junitDir) {
+          throw createFlagError('Usage: list-new-failures <junit-dir>');
+        }
+        const names = await collectNewFailedTestNames(junitDir);
+        if (names.size > 0) {
+          process.stdout.write([...names].join('\n') + '\n');
+        }
+        return;
+      }
+
       if (command === 'list-failures') {
         const [junitDir] = rest;
         if (!junitDir) {
@@ -73,15 +140,21 @@ export function runRetryResultCheckerCli() {
       if (command === 'check-intersection') {
         const junitDir = flags['junit-dir'];
         const prevFailuresFile = flags['prev-failures-file'];
+        const prevFailuresStdin = flags['prev-failures-stdin'];
 
         if (typeof junitDir !== 'string' || !junitDir) {
           throw createFlagError('--junit-dir is required');
         }
-        if (typeof prevFailuresFile !== 'string' || !prevFailuresFile) {
-          throw createFlagError('--prev-failures-file is required');
+
+        let prevContent: string;
+        if (prevFailuresStdin) {
+          prevContent = await readStdin();
+        } else if (typeof prevFailuresFile === 'string' && prevFailuresFile) {
+          prevContent = Fs.readFileSync(prevFailuresFile, 'utf8');
+        } else {
+          throw createFlagError('Either --prev-failures-file or --prev-failures-stdin is required');
         }
 
-        const prevContent = Fs.readFileSync(prevFailuresFile, 'utf8');
         const prevFailed = new Set(
           prevContent
             .split('\n')
@@ -116,7 +189,7 @@ export function runRetryResultCheckerCli() {
       }
 
       throw createFlagError(
-        `Unknown command: ${command}. Valid commands: list-failures, check-intersection`
+        `Unknown command: ${command}. Valid commands: snapshot, list-new-failures, list-failures, check-intersection`
       );
     },
     {
@@ -124,20 +197,32 @@ export function runRetryResultCheckerCli() {
         Utilities for evaluating FTR retry results.
 
         Commands:
+          snapshot <junit-dir>
+            Writes the current list of *.xml files in <junit-dir> to a snapshot file.
+            Call this before running a config so list-new-failures can diff against it.
+
+          list-new-failures <junit-dir>
+            Reads the snapshot written by the snapshot command, diffs it against the
+            current *.xml files, and prints the failing test names from new XMLs only.
+            Deletes the snapshot file after reading.
+
           list-failures <junit-dir>
             Lists all failed test names (one per line) found in *.xml files under
             the given directory. Used to capture attempt-1 failures before retry.
 
-          check-intersection --junit-dir <dir> --prev-failures-file <file>
-            Checks whether every test named in <file> appears as an explicit pass in <dir>.
-            Exits 0 if all previously-failing tests passed (step can be marked green).
-            Exits 1 if any previously-failing test did not pass (still failing, skipped, or absent).
+          check-intersection --junit-dir <dir> --prev-failures-file <file>|--prev-failures-stdin
+            Checks whether every test named in <file> (or stdin) appears as an explicit
+            pass in <dir>. Exits 0 if all previously-failing tests passed (step can be
+            marked green). Exits 1 if any previously-failing test did not pass
+            (still failing, skipped, or absent).
       `,
       flags: {
         string: ['junit-dir', 'prev-failures-file'],
+        boolean: ['prev-failures-stdin'],
         help: `
-          --junit-dir            Directory containing JUnit XML files for the current attempt
-          --prev-failures-file   File with newline-separated test names that failed in attempt 1
+          --junit-dir              Directory containing JUnit XML files for the current attempt
+          --prev-failures-file     File with newline-separated test names that failed in attempt 1
+          --prev-failures-stdin    Read prev-failures from stdin instead of a file
         `,
       },
     }

From 84f0b907c2db6973bc18fab02b3fa7eed1794db4 Mon Sep 17 00:00:00 2001
From: Tamerlan Gudabayev <tamerlan.gudabayev@elastic.co>
Date: Wed, 27 May 2026 16:15:24 +0200
Subject: [PATCH 18/30] chore(ci): remove job annotation from smart-retry PR

Moving annotation logic (write_job_annotation, per-config status table,
per-config failing test names, snapshot/list-new-failures CLI subcommands)
to a separate PR so this branch focuses purely on the retry mechanism.
---
 .buildkite/scripts/steps/test/ftr_configs.sh  | 39 -----------
 .../scripts/steps/test/ftr_job_annotation.sh  | 35 ----------
 .../retry_result_checker.test.ts              | 45 ------------
 .../retry_result_checker.ts                   | 68 +------------------
 4 files changed, 1 insertion(+), 186 deletions(-)
 delete mode 100644 .buildkite/scripts/steps/test/ftr_job_annotation.sh

diff --git a/.buildkite/scripts/steps/test/ftr_configs.sh b/.buildkite/scripts/steps/test/ftr_configs.sh
index 5885ea97ddc87..e60df5362fc0d 100755
--- a/.buildkite/scripts/steps/test/ftr_configs.sh
+++ b/.buildkite/scripts/steps/test/ftr_configs.sh
@@ -4,7 +4,6 @@ set -euo pipefail
 
 source .buildkite/scripts/steps/functional/common.sh
 source .buildkite/scripts/steps/test/ftr_smart_retry.sh
-source .buildkite/scripts/steps/test/ftr_job_annotation.sh
 
 BUILDKITE_PARALLEL_JOB=${BUILDKITE_PARALLEL_JOB:-}
 FTR_CONFIG_GROUP_KEY=${FTR_CONFIG_GROUP_KEY:-}
@@ -22,9 +21,6 @@ FAILED_CONFIGS_KEY="${BUILDKITE_STEP_ID}${FTR_CONFIG_GROUP_KEY}"
 FAILED_TESTS_KEY="${BUILDKITE_STEP_ID}${FTR_CONFIG_GROUP_KEY}_failed_tests"
 
 exitCode=0
-annotation_rows=()
-failure_detail_lines=()
-retry_recovered=false
 
 configs="${FTR_CONFIG:-}"
 
@@ -52,12 +48,6 @@ fi
 failedConfigs=""
 results=()
 
-# Capture which configs failed in the previous attempt before the meta-data key is overwritten below.
-prevRunFailedConfigs=""
-if [[ "${BUILDKITE_RETRY_COUNT:-0}" -ge "1" ]]; then
-  prevRunFailedConfigs=$(buildkite-agent meta-data get "$FAILED_CONFIGS_KEY" --default '' 2>/dev/null || true)
-fi
-
 while read -r config; do
   if [[ ! "$config" ]]; then
     continue;
@@ -73,7 +63,6 @@ while read -r config; do
 
   if [[ "$IS_CONFIG_EXECUTION" == "true" && "$IS_FLAKY_TEST_RUN" == "false" ]]; then
     echo "--- [ already-tested ] $FULL_COMMAND"
-    annotation_rows+=("| [\`${config}\`](https://github.com/elastic/kibana/blob/${BUILDKITE_COMMIT:-main}/${config}) | — | skipped (already-tested) |")
     continue
   else
     echo "--- $ $FULL_COMMAND"
@@ -99,8 +88,6 @@ while read -r config; do
   """
   fi
 
-  node scripts/ftr_check_retry_result snapshot "target/junit/$JOB" 2>/dev/null || true
-
   # prevent non-zero exit code from breaking the loop
   set +e;
   node ./scripts/functional_tests \
@@ -140,37 +127,13 @@ while read -r config; do
     duration: ${duration}
     result: ${lastCode}")
 
-  config_link="[\`${config}\`](https://github.com/elastic/kibana/blob/${BUILDKITE_COMMIT:-main}/${config})"
   if [ $lastCode -eq 0 ]; then
     buildkite-agent meta-data set "$CONFIG_EXECUTION_KEY" "true"
-    if [[ -n "$prevRunFailedConfigs" ]] && grep -qxF "$config" <<< "$prevRunFailedConfigs"; then
-      annotation_rows+=("| ${config_link} | ${duration} | recovered |")
-    else
-      annotation_rows+=("| ${config_link} | ${duration} | passed |")
-    fi
   else
     exitCode=10
     echo "FTR exited with code $lastCode"
     echo "^^^ +++"
-
     failedConfigs="${failedConfigs:+${failedConfigs}$'\n'}$config"
-
-    if [[ -n "$prevRunFailedConfigs" ]] && grep -qxF "$config" <<< "$prevRunFailedConfigs"; then
-      annotation_rows+=("| ${config_link} | ${duration} | **still failing** |")
-    elif [[ -n "$prevRunFailedConfigs" ]]; then
-      annotation_rows+=("| ${config_link} | ${duration} | **new failure** (was passing) |")
-    else
-      annotation_rows+=("| ${config_link} | ${duration} | **failed** |")
-    fi
-
-    config_failures=$(node scripts/ftr_check_retry_result list-new-failures "target/junit/$JOB" 2>/dev/null || true)
-    if [[ -n "$config_failures" ]]; then
-      failure_detail_lines+=("**Failing tests — \`${config}\`:**" "")
-      while IFS= read -r t; do
-        [[ -n "$t" ]] && failure_detail_lines+=("- ${t}")
-      done <<< "$config_failures"
-      failure_detail_lines+=("")
-    fi
   fi
 done <<< "$configs"
 
@@ -185,6 +148,4 @@ echo "--- FTR configs complete"
 printf "%s\n" "${results[@]}"
 echo ""
 
-write_job_annotation
-
 exit $exitCode
diff --git a/.buildkite/scripts/steps/test/ftr_job_annotation.sh b/.buildkite/scripts/steps/test/ftr_job_annotation.sh
deleted file mode 100644
index 5c45652302f2d..0000000000000
--- a/.buildkite/scripts/steps/test/ftr_job_annotation.sh
+++ /dev/null
@@ -1,35 +0,0 @@
-# Sourced by ftr_configs.sh — do not execute directly.
-# Reads globals: exitCode, retry_recovered, annotation_rows, failure_detail_lines,
-# JOB, BUILDKITE_RETRY_COUNT, BUILDKITE_COMMIT.
-
-write_job_annotation() {
-  local attempt_num style
-  attempt_num=$((${BUILDKITE_RETRY_COUNT:-0} + 1))
-  style=$([[ "$exitCode" == "0" ]] && echo "success" || echo "error")
-
-  {
-    echo "### FTR Configs — \`${JOB}\` (attempt ${attempt_num})"
-    echo ""
-
-    if [[ "$retry_recovered" == "true" ]]; then
-      echo "**Recovered on retry** — all originally-failing tests passed; step marked green."
-      echo ""
-      echo "> Configs shown as 'still failing' below introduced *new* failures on retry that were not part of the original failure set and are not counted against recovery."
-      echo ""
-    fi
-
-    if [[ ${#annotation_rows[@]} -gt 0 ]]; then
-      echo "| Config | Duration | Status |"
-      echo "| --- | --- | --- |"
-      printf "%s\n" "${annotation_rows[@]}"
-    fi
-
-    if [[ ${#failure_detail_lines[@]} -gt 0 ]]; then
-      echo ""
-      printf "%s\n" "${failure_detail_lines[@]}"
-    fi
-  } | buildkite-agent annotate \
-        --scope job \
-        --context "ftr-summary" \
-        --style "${style}" || true
-}
diff --git a/packages/kbn-failed-test-reporter-cli/failed_tests_reporter/retry_result_checker.test.ts b/packages/kbn-failed-test-reporter-cli/failed_tests_reporter/retry_result_checker.test.ts
index a8c0928ee261c..f0c262864e9cc 100644
--- a/packages/kbn-failed-test-reporter-cli/failed_tests_reporter/retry_result_checker.test.ts
+++ b/packages/kbn-failed-test-reporter-cli/failed_tests_reporter/retry_result_checker.test.ts
@@ -13,10 +13,8 @@ import Path from 'path';
 
 import {
   collectFailedTestNames,
-  collectNewFailedTestNames,
   collectPassedTestNames,
   computeIntersection,
-  snapshotJunitDir,
 } from './retry_result_checker';
 
 // Minimal JUnit XML helpers
@@ -154,49 +152,6 @@ describe('collectPassedTestNames', () => {
   });
 });
 
-describe('snapshotJunitDir + collectNewFailedTestNames', () => {
-  let tmpDir: string;
-
-  beforeEach(() => {
-    tmpDir = Fs.mkdtempSync(Path.join(Os.tmpdir(), 'retry-checker-test-'));
-  });
-
-  afterEach(() => {
-    Fs.rmSync(tmpDir, { recursive: true, force: true });
-  });
-
-  it('returns only failures from XMLs written after the snapshot', async () => {
-    Fs.writeFileSync(Path.join(tmpDir, 'TEST-existing.xml'), buildXml(failedCase('old failure')));
-    await snapshotJunitDir(tmpDir);
-    Fs.writeFileSync(Path.join(tmpDir, 'TEST-new.xml'), buildXml(failedCase('new failure')));
-
-    const names = await collectNewFailedTestNames(tmpDir);
-    expect([...names]).toEqual(['new failure']);
-  });
-
-  it('deletes the snapshot file after reading', async () => {
-    await snapshotJunitDir(tmpDir);
-    await collectNewFailedTestNames(tmpDir);
-
-    expect(Fs.existsSync(Path.join(tmpDir, '.smart_retry_snapshot'))).toBe(false);
-  });
-
-  it('treats all XMLs as new when no snapshot exists', async () => {
-    Fs.writeFileSync(Path.join(tmpDir, 'TEST-a.xml'), buildXml(failedCase('test A')));
-
-    const names = await collectNewFailedTestNames(tmpDir);
-    expect([...names]).toEqual(['test A']);
-  });
-
-  it('returns empty set when no new XMLs were written after the snapshot', async () => {
-    Fs.writeFileSync(Path.join(tmpDir, 'TEST-existing.xml'), buildXml(failedCase('old failure')));
-    await snapshotJunitDir(tmpDir);
-
-    const names = await collectNewFailedTestNames(tmpDir);
-    expect(names.size).toBe(0);
-  });
-});
-
 describe('computeIntersection', () => {
   it('returns empty when no overlap', () => {
     const prev = new Set(['test A', 'test B']);
diff --git a/packages/kbn-failed-test-reporter-cli/failed_tests_reporter/retry_result_checker.ts b/packages/kbn-failed-test-reporter-cli/failed_tests_reporter/retry_result_checker.ts
index 0614a75f2f0e3..2ee2916eeb2ec 100644
--- a/packages/kbn-failed-test-reporter-cli/failed_tests_reporter/retry_result_checker.ts
+++ b/packages/kbn-failed-test-reporter-cli/failed_tests_reporter/retry_result_checker.ts
@@ -17,8 +17,6 @@ import normalize from 'normalize-path';
 
 import { makeFailedTestCaseIter, makeTestCaseIter, readTestReport } from './test_report';
 
-const SNAPSHOT_FILE = '.smart_retry_snapshot';
-
 export async function collectFailedTestNames(junitDir: string): Promise<Set<string>> {
   const xmlPaths = await globby(normalize(Path.resolve(junitDir, '*.xml')), { absolute: true });
   const names = new Set<string>();
@@ -51,40 +49,6 @@ export async function collectPassedTestNames(junitDir: string): Promise<Set<stri
   return names;
 }
 
-/**
- * Writes the current XML file list in junitDir to a snapshot file.
- * Call this before running a config so list-new-failures can diff against it.
- */
-export async function snapshotJunitDir(junitDir: string): Promise<void> {
-  const xmlPaths = await globby(normalize(Path.resolve(junitDir, '*.xml')), { absolute: true });
-  Fs.mkdirSync(junitDir, { recursive: true });
-  Fs.writeFileSync(Path.join(junitDir, SNAPSHOT_FILE), JSON.stringify(xmlPaths.sort()));
-}
-
-/**
- * Reads the snapshot written by snapshotJunitDir, diffs it against the current XML files,
- * and returns the failing test names from XMLs that were produced after the snapshot.
- * Deletes the snapshot file after reading.
- */
-export async function collectNewFailedTestNames(junitDir: string): Promise<Set<string>> {
-  const snapshotPath = Path.join(junitDir, SNAPSHOT_FILE);
-  let before = new Set<string>();
-  if (Fs.existsSync(snapshotPath)) {
-    before = new Set<string>(JSON.parse(Fs.readFileSync(snapshotPath, 'utf8')));
-    Fs.unlinkSync(snapshotPath);
-  }
-  const xmlPaths = await globby(normalize(Path.resolve(junitDir, '*.xml')), { absolute: true });
-  const newPaths = xmlPaths.filter((p) => !before.has(p));
-  const names = new Set<string>();
-  for (const xmlPath of newPaths) {
-    const report = await readTestReport(xmlPath);
-    for (const tc of makeFailedTestCaseIter(report)) {
-      names.add(tc.$.name.trim());
-    }
-  }
-  return names;
-}
-
 export function computeIntersection(prev: Set<string>, current: Set<string>): string[] {
   return [...current].filter((name) => prev.has(name));
 }
@@ -104,27 +68,6 @@ export function runRetryResultCheckerCli() {
     async ({ log, flags }) => {
       const [command, ...rest] = flags._;
 
-      if (command === 'snapshot') {
-        const [junitDir] = rest;
-        if (!junitDir) {
-          throw createFlagError('Usage: snapshot <junit-dir>');
-        }
-        await snapshotJunitDir(junitDir);
-        return;
-      }
-
-      if (command === 'list-new-failures') {
-        const [junitDir] = rest;
-        if (!junitDir) {
-          throw createFlagError('Usage: list-new-failures <junit-dir>');
-        }
-        const names = await collectNewFailedTestNames(junitDir);
-        if (names.size > 0) {
-          process.stdout.write([...names].join('\n') + '\n');
-        }
-        return;
-      }
-
       if (command === 'list-failures') {
         const [junitDir] = rest;
         if (!junitDir) {
@@ -189,7 +132,7 @@ export function runRetryResultCheckerCli() {
       }
 
       throw createFlagError(
-        `Unknown command: ${command}. Valid commands: snapshot, list-new-failures, list-failures, check-intersection`
+        `Unknown command: ${command}. Valid commands: list-failures, check-intersection`
       );
     },
     {
@@ -197,15 +140,6 @@ export function runRetryResultCheckerCli() {
         Utilities for evaluating FTR retry results.
 
         Commands:
-          snapshot <junit-dir>
-            Writes the current list of *.xml files in <junit-dir> to a snapshot file.
-            Call this before running a config so list-new-failures can diff against it.
-
-          list-new-failures <junit-dir>
-            Reads the snapshot written by the snapshot command, diffs it against the
-            current *.xml files, and prints the failing test names from new XMLs only.
-            Deletes the snapshot file after reading.
-
           list-failures <junit-dir>
             Lists all failed test names (one per line) found in *.xml files under
             the given directory. Used to capture attempt-1 failures before retry.

From c3712f999d9bced58a7d51eb875fcd7354d8001b Mon Sep 17 00:00:00 2001
From: Tamerlan Gudabayev <tamerlan.gudabayev@elastic.co>
Date: Wed, 27 May 2026 16:46:51 +0200
Subject: [PATCH 19/30] refactor(ci): remove dead computeIntersection export
 and initialize retry_recovered
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Drop computeIntersection function and its tests — unused in production code
- Initialize retry_recovered=false in ftr_configs.sh so the variable is bound
  under set -u before ftr_smart_retry.sh sets it on recovery
- Revert Scout reporter error handling to original form
---
 .buildkite/scripts/steps/test/ftr_configs.sh  | 15 +++-----
 .../retry_result_checker.test.ts              | 38 +------------------
 .../retry_result_checker.ts                   |  4 --
 3 files changed, 6 insertions(+), 51 deletions(-)

diff --git a/.buildkite/scripts/steps/test/ftr_configs.sh b/.buildkite/scripts/steps/test/ftr_configs.sh
index e60df5362fc0d..39414ddb0ca26 100755
--- a/.buildkite/scripts/steps/test/ftr_configs.sh
+++ b/.buildkite/scripts/steps/test/ftr_configs.sh
@@ -21,6 +21,7 @@ FAILED_CONFIGS_KEY="${BUILDKITE_STEP_ID}${FTR_CONFIG_GROUP_KEY}"
 FAILED_TESTS_KEY="${BUILDKITE_STEP_ID}${FTR_CONFIG_GROUP_KEY}_failed_tests"
 
 exitCode=0
+retry_recovered=false
 
 configs="${FTR_CONFIG:-}"
 
@@ -97,19 +98,13 @@ while read -r config; do
   lastCode=$?
   set -e;
 
-  # Scout reporter — run under set+e so a failure here does not abort the config loop
+  # Scout reporter
   if [[ "${SCOUT_REPORTER_ENABLED:-}" =~ ^(1|true)$ ]]; then
+    # Upload events after running each config
     echo "Upload Scout reporter events to AppEx QA's team cluster for config $config"
-    set +e
     node scripts/scout upload-events --dontFailOnError
-    scout_upload_code=$?
-    set -e
-    if [[ $scout_upload_code -ne 0 ]]; then
-      echo "Scout reporter upload exited $scout_upload_code (continuing)"
-    else
-      echo "Upload successful, removing local events at .scout/reports"
-      rm -rf .scout/reports
-    fi
+    echo "Upload successful, removing local events at .scout/reports"
+    rm -rf .scout/reports
   else
     echo "SCOUT_REPORTER_ENABLED=$SCOUT_REPORTER_ENABLED, skipping event upload."
   fi
diff --git a/packages/kbn-failed-test-reporter-cli/failed_tests_reporter/retry_result_checker.test.ts b/packages/kbn-failed-test-reporter-cli/failed_tests_reporter/retry_result_checker.test.ts
index f0c262864e9cc..15c0e642f2cfa 100644
--- a/packages/kbn-failed-test-reporter-cli/failed_tests_reporter/retry_result_checker.test.ts
+++ b/packages/kbn-failed-test-reporter-cli/failed_tests_reporter/retry_result_checker.test.ts
@@ -11,11 +11,7 @@ import Fs from 'fs';
 import Os from 'os';
 import Path from 'path';
 
-import {
-  collectFailedTestNames,
-  collectPassedTestNames,
-  computeIntersection,
-} from './retry_result_checker';
+import { collectFailedTestNames, collectPassedTestNames } from './retry_result_checker';
 
 // Minimal JUnit XML helpers
 const buildXml = (testcases: string) => `<?xml version="1.0" encoding="utf-8"?>
@@ -151,35 +147,3 @@ describe('collectPassedTestNames', () => {
     expect(names.has('test A')).toBe(false);
   });
 });
-
-describe('computeIntersection', () => {
-  it('returns empty when no overlap', () => {
-    const prev = new Set(['test A', 'test B']);
-    const current = new Set(['test C']);
-    expect(computeIntersection(prev, current)).toEqual([]);
-  });
-
-  it('returns overlapping tests', () => {
-    const prev = new Set(['test A', 'test B']);
-    const current = new Set(['test A', 'test C']);
-    expect(computeIntersection(prev, current)).toEqual(['test A']);
-  });
-
-  it('returns empty when current is empty', () => {
-    const prev = new Set(['test A']);
-    const current = new Set<string>();
-    expect(computeIntersection(prev, current)).toEqual([]);
-  });
-
-  it('returns empty when prev is empty', () => {
-    const prev = new Set<string>();
-    const current = new Set(['test A']);
-    expect(computeIntersection(prev, current)).toEqual([]);
-  });
-
-  it('returns all current failures when all were previously failing', () => {
-    const prev = new Set(['test A', 'test B']);
-    const current = new Set(['test A', 'test B']);
-    expect(computeIntersection(prev, current).sort()).toEqual(['test A', 'test B']);
-  });
-});
diff --git a/packages/kbn-failed-test-reporter-cli/failed_tests_reporter/retry_result_checker.ts b/packages/kbn-failed-test-reporter-cli/failed_tests_reporter/retry_result_checker.ts
index 2ee2916eeb2ec..c40322b82f9a9 100644
--- a/packages/kbn-failed-test-reporter-cli/failed_tests_reporter/retry_result_checker.ts
+++ b/packages/kbn-failed-test-reporter-cli/failed_tests_reporter/retry_result_checker.ts
@@ -49,10 +49,6 @@ export async function collectPassedTestNames(junitDir: string): Promise<Set<stri
   return names;
 }
 
-export function computeIntersection(prev: Set<string>, current: Set<string>): string[] {
-  return [...current].filter((name) => prev.has(name));
-}
-
 const readStdin = (): Promise<string> =>
   new Promise((resolve, reject) => {
     const chunks: Buffer[] = [];

From 69c832667a82ccade4a5983059390e112d2ad890 Mon Sep 17 00:00:00 2001
From: Tamerlan Gudabayev <tamerlan.gudabayev@elastic.co>
Date: Mon, 18 May 2026 10:03:54 +0200
Subject: [PATCH 20/30] =?UTF-8?q?Reapply=20"test(ci):=20TEMP=20add=20retry?=
 =?UTF-8?q?-validation=20fixture=20=E2=80=94=20DELETE=20BEFORE=20MERGE"?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts commit 1be5cd8a7e5db8ff3ae0f64b1878d17a3e1d5544.
---
 .../apis/unused_urls_task/index.ts            |  2 +
 .../retry_validation_delete_before_merge.ts   | 41 +++++++++++++++++++
 2 files changed, 43 insertions(+)
 create mode 100644 src/platform/test/api_integration/apis/unused_urls_task/retry_validation_delete_before_merge.ts

diff --git a/src/platform/test/api_integration/apis/unused_urls_task/index.ts b/src/platform/test/api_integration/apis/unused_urls_task/index.ts
index c6210ed44023d..eae3bf1227ce4 100644
--- a/src/platform/test/api_integration/apis/unused_urls_task/index.ts
+++ b/src/platform/test/api_integration/apis/unused_urls_task/index.ts
@@ -11,6 +11,8 @@ import type { FtrProviderContext } from '../../ftr_provider_context';
 
 export default function ({ loadTestFile }: FtrProviderContext) {
   describe('unused_urls_task', () => {
+    // TEMPORARY: validates FTR retry intersection logic. Delete before merging this PR.
+    loadTestFile(require.resolve('./retry_validation_delete_before_merge'));
     loadTestFile(require.resolve('./run'));
   });
 }
diff --git a/src/platform/test/api_integration/apis/unused_urls_task/retry_validation_delete_before_merge.ts b/src/platform/test/api_integration/apis/unused_urls_task/retry_validation_delete_before_merge.ts
new file mode 100644
index 0000000000000..6a7efea7dcbf4
--- /dev/null
+++ b/src/platform/test/api_integration/apis/unused_urls_task/retry_validation_delete_before_merge.ts
@@ -0,0 +1,41 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+// TEMPORARY: validates the FTR retry intersection logic from this PR. Delete before merge.
+//
+// Scenario:
+//   - Attempt 1 (BUILDKITE_RETRY_COUNT unset / '0'): TEST_A fails. --bail stops the run.
+//     JUnit XML records TEST_A as failed; TEST_B is never reached.
+//   - Attempt 2 (BUILDKITE_RETRY_COUNT == '1'): TEST_A passes (recovered).
+//     TEST_B now fails (simulates an unrelated flake on retry). --bail stops the run.
+//     JUnit XML records TEST_B as failed.
+//
+// Stored prev failures: {TEST_A}.  Current failures: {TEST_B}.  Intersection: ∅.
+// Expected: ftr_configs.sh overrides exit code to 0 and the step turns green.
+
+import type { FtrProviderContext } from '../../ftr_provider_context';
+
+const isFirstAttempt =
+  !process.env.BUILDKITE_RETRY_COUNT || process.env.BUILDKITE_RETRY_COUNT === '0';
+
+export default function ({}: FtrProviderContext) {
+  describe('retry-validation', () => {
+    it('TEST_A: intentionally fails on attempt 1, passes on attempt 2', () => {
+      if (isFirstAttempt) {
+        throw new Error('Intentional first-attempt failure (retry validation)');
+      }
+    });
+
+    it('TEST_B: passes on attempt 1, intentionally fails on attempt 2', () => {
+      if (!isFirstAttempt) {
+        throw new Error('Intentional second-attempt failure (retry validation)');
+      }
+    });
+  });
+}

From c8a62ec25792d7ca0b022ff85b47af9641034727 Mon Sep 17 00:00:00 2001
From: Tamerlan Gudabayev <tamerlan.gudabayev@elastic.co>
Date: Thu, 28 May 2026 11:08:25 +0200
Subject: [PATCH 21/30] Revert "chore: remove old comments that references file
 that doesn't exist anymore"

This reverts commit 5201524573d9371987c353ef4d41d3dfc64c303a.
---
 .buildkite/scripts/steps/functional/common.sh | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.buildkite/scripts/steps/functional/common.sh b/.buildkite/scripts/steps/functional/common.sh
index 4be748513d299..7ae0f78bdcddf 100755
--- a/.buildkite/scripts/steps/functional/common.sh
+++ b/.buildkite/scripts/steps/functional/common.sh
@@ -2,6 +2,8 @@
 
 set -euo pipefail
 
+# Note, changes here might also need to be made in other scripts, e.g. uptime.sh
+
 source .buildkite/scripts/common/util.sh
 
 # All functional/integration test steps run Kibana from the distributable,

From 2193bfbdfcd315acbf7c2e1ed83abea5c3dae47c Mon Sep 17 00:00:00 2001
From: Tamerlan Gudabayev <tamerlan.gudabayev@elastic.co>
Date: Thu, 28 May 2026 11:09:00 +0200
Subject: [PATCH 22/30] =?UTF-8?q?Revert=20"Reapply=20"test(ci):=20TEMP=20a?=
 =?UTF-8?q?dd=20retry-validation=20fixture=20=E2=80=94=20DELETE=20BEFORE?=
 =?UTF-8?q?=20MERGE""?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts commit 69c832667a82ccade4a5983059390e112d2ad890.
---
 .../apis/unused_urls_task/index.ts            |  2 -
 .../retry_validation_delete_before_merge.ts   | 41 -------------------
 2 files changed, 43 deletions(-)
 delete mode 100644 src/platform/test/api_integration/apis/unused_urls_task/retry_validation_delete_before_merge.ts

diff --git a/src/platform/test/api_integration/apis/unused_urls_task/index.ts b/src/platform/test/api_integration/apis/unused_urls_task/index.ts
index eae3bf1227ce4..c6210ed44023d 100644
--- a/src/platform/test/api_integration/apis/unused_urls_task/index.ts
+++ b/src/platform/test/api_integration/apis/unused_urls_task/index.ts
@@ -11,8 +11,6 @@ import type { FtrProviderContext } from '../../ftr_provider_context';
 
 export default function ({ loadTestFile }: FtrProviderContext) {
   describe('unused_urls_task', () => {
-    // TEMPORARY: validates FTR retry intersection logic. Delete before merging this PR.
-    loadTestFile(require.resolve('./retry_validation_delete_before_merge'));
     loadTestFile(require.resolve('./run'));
   });
 }
diff --git a/src/platform/test/api_integration/apis/unused_urls_task/retry_validation_delete_before_merge.ts b/src/platform/test/api_integration/apis/unused_urls_task/retry_validation_delete_before_merge.ts
deleted file mode 100644
index 6a7efea7dcbf4..0000000000000
--- a/src/platform/test/api_integration/apis/unused_urls_task/retry_validation_delete_before_merge.ts
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
- * or more contributor license agreements. Licensed under the "Elastic License
- * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
- * Public License v 1"; you may not use this file except in compliance with, at
- * your election, the "Elastic License 2.0", the "GNU Affero General Public
- * License v3.0 only", or the "Server Side Public License, v 1".
- */
-
-// TEMPORARY: validates the FTR retry intersection logic from this PR. Delete before merge.
-//
-// Scenario:
-//   - Attempt 1 (BUILDKITE_RETRY_COUNT unset / '0'): TEST_A fails. --bail stops the run.
-//     JUnit XML records TEST_A as failed; TEST_B is never reached.
-//   - Attempt 2 (BUILDKITE_RETRY_COUNT == '1'): TEST_A passes (recovered).
-//     TEST_B now fails (simulates an unrelated flake on retry). --bail stops the run.
-//     JUnit XML records TEST_B as failed.
-//
-// Stored prev failures: {TEST_A}.  Current failures: {TEST_B}.  Intersection: ∅.
-// Expected: ftr_configs.sh overrides exit code to 0 and the step turns green.
-
-import type { FtrProviderContext } from '../../ftr_provider_context';
-
-const isFirstAttempt =
-  !process.env.BUILDKITE_RETRY_COUNT || process.env.BUILDKITE_RETRY_COUNT === '0';
-
-export default function ({}: FtrProviderContext) {
-  describe('retry-validation', () => {
-    it('TEST_A: intentionally fails on attempt 1, passes on attempt 2', () => {
-      if (isFirstAttempt) {
-        throw new Error('Intentional first-attempt failure (retry validation)');
-      }
-    });
-
-    it('TEST_B: passes on attempt 1, intentionally fails on attempt 2', () => {
-      if (!isFirstAttempt) {
-        throw new Error('Intentional second-attempt failure (retry validation)');
-      }
-    });
-  });
-}

From a1c15655fe1dc72c78a4018af7ad5fa19a89bda2 Mon Sep 17 00:00:00 2001
From: Tamerlan Gudabayev <tamerlan.gudabayev@elastic.co>
Date: Thu, 28 May 2026 11:13:15 +0200
Subject: [PATCH 23/30] bring back verbose version

---
 .buildkite/scripts/steps/test/ftr_configs.sh | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/.buildkite/scripts/steps/test/ftr_configs.sh b/.buildkite/scripts/steps/test/ftr_configs.sh
index 39414ddb0ca26..d8d863c0ee1d6 100755
--- a/.buildkite/scripts/steps/test/ftr_configs.sh
+++ b/.buildkite/scripts/steps/test/ftr_configs.sh
@@ -123,12 +123,17 @@ while read -r config; do
     result: ${lastCode}")
 
   if [ $lastCode -eq 0 ]; then
+    # Test was successful, so mark it as executed
     buildkite-agent meta-data set "$CONFIG_EXECUTION_KEY" "true"
   else
     exitCode=10
     echo "FTR exited with code $lastCode"
     echo "^^^ +++"
-    failedConfigs="${failedConfigs:+${failedConfigs}$'\n'}$config"
+    if [[ "$failedConfigs" ]]; then
+      failedConfigs="${failedConfigs}"$'\n'"$config"
+    else
+      failedConfigs="$config"
+    fi
   fi
 done <<< "$configs"
 

From d51172cf2f3743134c66c84e4593c1d396018d91 Mon Sep 17 00:00:00 2001
From: Tamerlan Gudabayev <tamerlan.gudabayev@elastic.co>
Date: Thu, 28 May 2026 11:16:24 +0200
Subject: [PATCH 24/30] revert comments

---
 .buildkite/scripts/steps/test/ftr_configs.sh | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/.buildkite/scripts/steps/test/ftr_configs.sh b/.buildkite/scripts/steps/test/ftr_configs.sh
index d8d863c0ee1d6..b0bea9a4cec1f 100755
--- a/.buildkite/scripts/steps/test/ftr_configs.sh
+++ b/.buildkite/scripts/steps/test/ftr_configs.sh
@@ -20,13 +20,14 @@ export JOB="$FTR_CONFIG_GROUP_KEY"
 FAILED_CONFIGS_KEY="${BUILDKITE_STEP_ID}${FTR_CONFIG_GROUP_KEY}"
 FAILED_TESTS_KEY="${BUILDKITE_STEP_ID}${FTR_CONFIG_GROUP_KEY}_failed_tests"
 
+# a FTR failure will result in the script returning an exit code of 10
 exitCode=0
 retry_recovered=false
 
 configs="${FTR_CONFIG:-}"
 
-# The first retry should only run the configs that failed in the previous attempt.
-# Any subsequent retries (generally triggered manually) will run everything.
+# The first retry should only run the configs that failed in the previous attempt
+# Any subsequent retries, which would generally only happen by someone clicking the button in the UI, will run everything
 if [[ ! "$configs" && "${BUILDKITE_RETRY_COUNT:-0}" == "1" ]]; then
   configs=$(buildkite-agent meta-data get "$FAILED_CONFIGS_KEY" --default '')
   if [[ "$configs" ]]; then

From 3cfcdfb04e9dd81747e27c4b1088a2e6ec5a33c6 Mon Sep 17 00:00:00 2001
From: Tamerlan Gudabayev <tamerlan.gudabayev@elastic.co>
Date: Thu, 28 May 2026 11:19:46 +0200
Subject: [PATCH 25/30] refactor(ci): move FAILED_TESTS_KEY and retry_recovered
 into ftr_smart_retry.sh

Both variables are only used within ftr_smart_retry.sh, so they belong there.
---
 .buildkite/scripts/steps/test/ftr_configs.sh     | 2 --
 .buildkite/scripts/steps/test/ftr_smart_retry.sh | 7 +++++--
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/.buildkite/scripts/steps/test/ftr_configs.sh b/.buildkite/scripts/steps/test/ftr_configs.sh
index b0bea9a4cec1f..d56bc60b27056 100755
--- a/.buildkite/scripts/steps/test/ftr_configs.sh
+++ b/.buildkite/scripts/steps/test/ftr_configs.sh
@@ -18,11 +18,9 @@ test -z "$EXTRA_ARGS" || buildkite-agent meta-data set "ftr-extra-args" "$EXTRA_
 export JOB="$FTR_CONFIG_GROUP_KEY"
 
 FAILED_CONFIGS_KEY="${BUILDKITE_STEP_ID}${FTR_CONFIG_GROUP_KEY}"
-FAILED_TESTS_KEY="${BUILDKITE_STEP_ID}${FTR_CONFIG_GROUP_KEY}_failed_tests"
 
 # a FTR failure will result in the script returning an exit code of 10
 exitCode=0
-retry_recovered=false
 
 configs="${FTR_CONFIG:-}"
 
diff --git a/.buildkite/scripts/steps/test/ftr_smart_retry.sh b/.buildkite/scripts/steps/test/ftr_smart_retry.sh
index 86e172f298f98..9342b5679f5d3 100644
--- a/.buildkite/scripts/steps/test/ftr_smart_retry.sh
+++ b/.buildkite/scripts/steps/test/ftr_smart_retry.sh
@@ -1,6 +1,9 @@
 # Sourced by ftr_configs.sh — do not execute directly.
-# Reads/writes globals: exitCode, failedConfigs, retry_recovered,
-# FAILED_TESTS_KEY, FAILED_CONFIGS_KEY, JOB, BUILDKITE_RETRY_COUNT.
+# Reads/writes globals: exitCode, failedConfigs,
+# FAILED_CONFIGS_KEY, JOB, BUILDKITE_RETRY_COUNT.
+
+FAILED_TESTS_KEY="${BUILDKITE_STEP_ID}${FTR_CONFIG_GROUP_KEY}_failed_tests"
+retry_recovered=false
 
 # Called after attempt 1: stores failing test names so the retry can verify recovery.
 store_failing_tests() {

From 64837774052e09b073548edf0ca4c429abefe434 Mon Sep 17 00:00:00 2001
From: Tamerlan Gudabayev <tamerlan.gudabayev@elastic.co>
Date: Thu, 28 May 2026 11:21:41 +0200
Subject: [PATCH 26/30] add whitesapce

---
 .buildkite/scripts/steps/test/ftr_configs.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.buildkite/scripts/steps/test/ftr_configs.sh b/.buildkite/scripts/steps/test/ftr_configs.sh
index d56bc60b27056..758cd90643045 100755
--- a/.buildkite/scripts/steps/test/ftr_configs.sh
+++ b/.buildkite/scripts/steps/test/ftr_configs.sh
@@ -128,6 +128,7 @@ while read -r config; do
     exitCode=10
     echo "FTR exited with code $lastCode"
     echo "^^^ +++"
+
     if [[ "$failedConfigs" ]]; then
       failedConfigs="${failedConfigs}"$'\n'"$config"
     else

From fcdfba7a11d6cb01f68885a2417e0ffb190923ac Mon Sep 17 00:00:00 2001
From: Tamerlan Gudabayev <tamerlan.gudabayev@elastic.co>
Date: Thu, 28 May 2026 11:39:34 +0200
Subject: [PATCH 27/30] =?UTF-8?q?Reapply=20"Reapply=20"test(ci):=20TEMP=20?=
 =?UTF-8?q?add=20retry-validation=20fixture=20=E2=80=94=20DELETE=20BEFORE?=
 =?UTF-8?q?=20MERGE""?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts commit 2193bfbdfcd315acbf7c2e1ed83abea5c3dae47c.
---
 .../apis/unused_urls_task/index.ts            |  2 +
 .../retry_validation_delete_before_merge.ts   | 41 +++++++++++++++++++
 2 files changed, 43 insertions(+)
 create mode 100644 src/platform/test/api_integration/apis/unused_urls_task/retry_validation_delete_before_merge.ts

diff --git a/src/platform/test/api_integration/apis/unused_urls_task/index.ts b/src/platform/test/api_integration/apis/unused_urls_task/index.ts
index c6210ed44023d..eae3bf1227ce4 100644
--- a/src/platform/test/api_integration/apis/unused_urls_task/index.ts
+++ b/src/platform/test/api_integration/apis/unused_urls_task/index.ts
@@ -11,6 +11,8 @@ import type { FtrProviderContext } from '../../ftr_provider_context';
 
 export default function ({ loadTestFile }: FtrProviderContext) {
   describe('unused_urls_task', () => {
+    // TEMPORARY: validates FTR retry intersection logic. Delete before merging this PR.
+    loadTestFile(require.resolve('./retry_validation_delete_before_merge'));
     loadTestFile(require.resolve('./run'));
   });
 }
diff --git a/src/platform/test/api_integration/apis/unused_urls_task/retry_validation_delete_before_merge.ts b/src/platform/test/api_integration/apis/unused_urls_task/retry_validation_delete_before_merge.ts
new file mode 100644
index 0000000000000..6a7efea7dcbf4
--- /dev/null
+++ b/src/platform/test/api_integration/apis/unused_urls_task/retry_validation_delete_before_merge.ts
@@ -0,0 +1,41 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+// TEMPORARY: validates the FTR retry intersection logic from this PR. Delete before merge.
+//
+// Scenario:
+//   - Attempt 1 (BUILDKITE_RETRY_COUNT unset / '0'): TEST_A fails. --bail stops the run.
+//     JUnit XML records TEST_A as failed; TEST_B is never reached.
+//   - Attempt 2 (BUILDKITE_RETRY_COUNT == '1'): TEST_A passes (recovered).
+//     TEST_B now fails (simulates an unrelated flake on retry). --bail stops the run.
+//     JUnit XML records TEST_B as failed.
+//
+// Stored prev failures: {TEST_A}.  Current failures: {TEST_B}.  Intersection: ∅.
+// Expected: ftr_configs.sh overrides exit code to 0 and the step turns green.
+
+import type { FtrProviderContext } from '../../ftr_provider_context';
+
+const isFirstAttempt =
+  !process.env.BUILDKITE_RETRY_COUNT || process.env.BUILDKITE_RETRY_COUNT === '0';
+
+export default function ({}: FtrProviderContext) {
+  describe('retry-validation', () => {
+    it('TEST_A: intentionally fails on attempt 1, passes on attempt 2', () => {
+      if (isFirstAttempt) {
+        throw new Error('Intentional first-attempt failure (retry validation)');
+      }
+    });
+
+    it('TEST_B: passes on attempt 1, intentionally fails on attempt 2', () => {
+      if (!isFirstAttempt) {
+        throw new Error('Intentional second-attempt failure (retry validation)');
+      }
+    });
+  });
+}

From 0ba5cbb7330c1e34a1b058aafd6b59f7c0ed09c5 Mon Sep 17 00:00:00 2001
From: Tamerlan Gudabayev <tamerlan.gudabayev@elastic.co>
Date: Sat, 30 May 2026 22:53:31 +0200
Subject: [PATCH 28/30] put this all behind an env flag

---
 .../pipeline-resource-definitions/kibana-on-merge.yml      | 1 +
 .buildkite/pipeline-resource-definitions/kibana-pr.yml     | 1 +
 .buildkite/scripts/steps/test/ftr_configs.sh               | 7 +++++--
 3 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/.buildkite/pipeline-resource-definitions/kibana-on-merge.yml b/.buildkite/pipeline-resource-definitions/kibana-on-merge.yml
index 255debc0d98cb..60da014f75b6d 100644
--- a/.buildkite/pipeline-resource-definitions/kibana-on-merge.yml
+++ b/.buildkite/pipeline-resource-definitions/kibana-on-merge.yml
@@ -26,6 +26,7 @@ spec:
         KIBANA_SLACK_NOTIFICATIONS_ENABLED: 'true'
         SLACK_NOTIFICATIONS_SKIP_FOR_RETRIES: 'true'
         SCOUT_REPORTER_ENABLED: 'true'
+        FTR_SMART_RETRY_ENABLED: 'false'
       allow_rebuilds: true
       branch_configuration: main 9.4 9.3 8.19
       default_branch: main
diff --git a/.buildkite/pipeline-resource-definitions/kibana-pr.yml b/.buildkite/pipeline-resource-definitions/kibana-pr.yml
index bf1f884c5c9e6..4326dfa09dd5a 100644
--- a/.buildkite/pipeline-resource-definitions/kibana-pr.yml
+++ b/.buildkite/pipeline-resource-definitions/kibana-pr.yml
@@ -23,6 +23,7 @@ spec:
         KIBANA_GITHUB_BUILD_COMMIT_STATUS_ENABLED: 'true'
         GITHUB_BUILD_COMMIT_STATUS_CONTEXT: kibana-ci
         SCOUT_REPORTER_ENABLED: 'true'
+        FTR_SMART_RETRY_ENABLED: 'false'
       allow_rebuilds: true
       branch_configuration: ''
       cancel_intermediate_builds: true
diff --git a/.buildkite/scripts/steps/test/ftr_configs.sh b/.buildkite/scripts/steps/test/ftr_configs.sh
index 758cd90643045..17872b1193cda 100755
--- a/.buildkite/scripts/steps/test/ftr_configs.sh
+++ b/.buildkite/scripts/steps/test/ftr_configs.sh
@@ -141,8 +141,11 @@ if [[ "$failedConfigs" ]]; then
   buildkite-agent meta-data set "$FAILED_CONFIGS_KEY" "$failedConfigs"
 fi
 
-store_failing_tests  # attempt 1: record what failed so the retry can verify recovery
-apply_smart_retry    # attempt 2: mark green if all previously-failing tests explicitly passed
+
+if [[ "${FTR_SMART_RETRY_ENABLED:-}" =~ ^(1|true)$ ]]; then
+  store_failing_tests  # attempt 1: record what failed so the retry can verify recovery
+  apply_smart_retry    # attempt 2: mark green if all previously-failing tests explicitly passed
+fi
 
 echo "--- FTR configs complete"
 printf "%s\n" "${results[@]}"

From bad92a96984b48d039276f358617d6608c5b20ea Mon Sep 17 00:00:00 2001
From: Tamerlan Gudabayev <tamerlan.gudabayev@elastic.co>
Date: Sat, 30 May 2026 22:58:12 +0200
Subject: [PATCH 29/30] put bail behind a env variable too

---
 .buildkite/scripts/steps/test/ftr_configs.sh | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/.buildkite/scripts/steps/test/ftr_configs.sh b/.buildkite/scripts/steps/test/ftr_configs.sh
index 17872b1193cda..e3fa074a8d72b 100755
--- a/.buildkite/scripts/steps/test/ftr_configs.sh
+++ b/.buildkite/scripts/steps/test/ftr_configs.sh
@@ -12,6 +12,11 @@ if [ "$FTR_CONFIG_GROUP_KEY" == "" ] && [ "$BUILDKITE_PARALLEL_JOB" == "" ]; the
   exit 1
 fi
 
+BAIL_ARG=""
+if [[ "${FTR_SMART_RETRY_ENABLED:-}" =~ ^(1|true)$ ]]; then
+  BAIL_ARG="--bail"
+fi
+
 EXTRA_ARGS=${FTR_EXTRA_ARGS:-}
 test -z "$EXTRA_ARGS" || buildkite-agent meta-data set "ftr-extra-args" "$EXTRA_ARGS"
 
@@ -53,7 +58,7 @@ while read -r config; do
     continue;
   fi
 
-  FULL_COMMAND="node scripts/functional_tests --config $config $EXTRA_ARGS"
+  FULL_COMMAND="node scripts/functional_tests $BAIL_ARG --config $config $EXTRA_ARGS"
 
   # see if this config has already been executed successfully
   CONFIG_EXECUTION_KEY="${config}_executed"
@@ -93,6 +98,7 @@ while read -r config; do
   node ./scripts/functional_tests \
     --kibana-install-dir "$KIBANA_BUILD_LOCATION" \
     --config="$config" \
+    $BAIL_ARG \
     "$EXTRA_ARGS"
   lastCode=$?
   set -e;

From be25fc69f14707adeecfcfbb65ab53da023605aa Mon Sep 17 00:00:00 2001
From: Tamerlan Gudabayev <37669316+TamerlanG@users.noreply.github.com>
Date: Sun, 31 May 2026 20:14:00 +0200
Subject: [PATCH 30/30] Update .buildkite/scripts/steps/test/ftr_configs.sh

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
---
 .buildkite/scripts/steps/test/ftr_configs.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.buildkite/scripts/steps/test/ftr_configs.sh b/.buildkite/scripts/steps/test/ftr_configs.sh
index e3fa074a8d72b..fe5450c0296d3 100755
--- a/.buildkite/scripts/steps/test/ftr_configs.sh
+++ b/.buildkite/scripts/steps/test/ftr_configs.sh
@@ -12,9 +12,9 @@ if [ "$FTR_CONFIG_GROUP_KEY" == "" ] && [ "$BUILDKITE_PARALLEL_JOB" == "" ]; the
   exit 1
 fi
 
-BAIL_ARG=""
+BAIL_ARG="--bail"
 if [[ "${FTR_SMART_RETRY_ENABLED:-}" =~ ^(1|true)$ ]]; then
-  BAIL_ARG="--bail"
+  BAIL_ARG=""
 fi
 
 EXTRA_ARGS=${FTR_EXTRA_ARGS:-}