elastic · TamerlanG · May 16, 2026 · May 16, 2026 · May 16, 2026 · May 16, 2026
diff --git a/.buildkite/scripts/steps/functional/common.sh b/.buildkite/scripts/steps/functional/common.sh
@@ -2,8 +2,6 @@
 
 set -euo pipefail
 
-# Note, changes here might also need to be made in other scripts, e.g. uptime.sh
-
 source .buildkite/scripts/common/util.sh
 
 # All functional/integration test steps run Kibana from the distributable,

@@ -18,9 +18,16 @@ export JOB="$FTR_CONFIG_GROUP_KEY"
 
 FAILED_CONFIGS_KEY="${BUILDKITE_STEP_ID}${FTR_CONFIG_GROUP_KEY}"
 
+FAILED_TESTS_KEY="${BUILDKITE_STEP_ID}${FTR_CONFIG_GROUP_KEY}_failed_tests"
+
 # a FTR failure will result in the script returning an exit code of 10
 exitCode=0
 
+# Per-config rows for the job annotation summary, plus a flag set when
+# the retry-only-failed logic marks an otherwise-red step green.
+annotation_rows=()
+retry_recovered=false
+
 configs="${FTR_CONFIG:-}"
 
 # The first retry should only run the configs that failed in the previous attempt
@@ -62,6 +69,7 @@ while read -r config; do
 
   if [[ "$IS_CONFIG_EXECUTION" == "true" && "$IS_FLAKY_TEST_RUN" == "false" ]]; then
     echo "--- [ already-tested ] $FULL_COMMAND"
+    annotation_rows+=("| \`${config}\` | — | skipped (already-tested) |")
     continue
   else
     echo "--- $ $FULL_COMMAND"
@@ -124,6 +132,7 @@ while read -r config; do
   if [ $lastCode -eq 0 ]; then
     # Test was successful, so mark it as executed
     buildkite-agent meta-data set "$CONFIG_EXECUTION_KEY" "true"
+    annotation_rows+=("| \`${config}\` | ${duration} | passed |")
   else
     exitCode=10
     echo "FTR exited with code $lastCode"
@@ -134,15 +143,95 @@ while read -r config; do
     else
       failedConfigs="$config"
     fi
+    annotation_rows+=("| \`${config}\` | ${duration} | **failed** |")
   fi
 done <<< "$configs"
 
 if [[ "$failedConfigs" ]]; then
   buildkite-agent meta-data set "$FAILED_CONFIGS_KEY" "$failedConfigs"
 fi
 
+# --- retry-only-failed feature ---
+# Attempt 1: record the names of failing tests so the retry can evaluate whether they recovered.
+# On the first retry, the step is marked green if every previously-failing test passes — even if
+# a different (previously-passing) test happens to fail on retry.
+if [[ -z "${KIBANA_FLAKY_TEST_RUNNER_CONFIG:-}" && \
+      "${BUILDKITE_RETRY_COUNT:-0}" == "0" && "$exitCode" != "0" ]]; then
+  junitDir="target/junit/$JOB"
+  if [ -d "$junitDir" ]; then
+    failedTestNames=$(node scripts/ftr_check_retry_result list-failures "$junitDir" 2>/dev/null || true)
+    if [[ "$failedTestNames" ]]; then
+      buildkite-agent meta-data set "$FAILED_TESTS_KEY" "$failedTestNames"
+      echo "Stored $(echo "$failedTestNames" | wc -l | tr -d ' ') previously-failing test name(s) for retry evaluation"
+    fi
+  fi
+fi
+
+# Attempt 2: check whether the failures from attempt 1 are still failing.
+# If every previously-failing test now passes, mark the step green.
+if [[ -z "${KIBANA_FLAKY_TEST_RUNNER_CONFIG:-}" && \
+      "${BUILDKITE_RETRY_COUNT:-0}" == "1" && "$exitCode" != "0" ]]; then
+  prevFailedTests=$(buildkite-agent meta-data get "$FAILED_TESTS_KEY" --default '' 2>/dev/null || true)
+  if [[ "$prevFailedTests" ]]; then
+    junitDir="target/junit/$JOB"
+    tmpPrevFile=$(mktemp)
+    printf '%s' "$prevFailedTests" > "$tmpPrevFile"
+    set +e
+    node scripts/ftr_check_retry_result check-intersection \
+      --junit-dir "$junitDir" \
+      --prev-failures-file "$tmpPrevFile"
+    intersectionCode=$?
+    set -e
+    rm -f "$tmpPrevFile"
+    if [[ "$intersectionCode" == "0" ]]; then
+      echo "--- [retry-only-failed] All previously-failing tests recovered on retry — marking step green"
+      exitCode=0
+      failedConfigs=""
+      retry_recovered=true
+      buildkite-agent meta-data set "$FAILED_CONFIGS_KEY" "" 2>/dev/null || true
+    fi
+  fi
+fi
+# --- end retry-only-failed feature ---
+
 echo "--- FTR configs complete"
 printf "%s\n" "${results[@]}"
 echo ""
 
+write_job_annotation() {
+  local style attempt_num
+  attempt_num=$((${BUILDKITE_RETRY_COUNT:-0} + 1))
+
+  if [[ "$exitCode" == "0" ]]; then
+    style="success"
+  else
+    style="error"
+  fi
+
+  {
+    echo "### FTR Configs — \`${JOB}\` (attempt ${attempt_num})"
+    echo ""
+    if [[ "$retry_recovered" == "true" ]]; then
+      echo "**Recovered on retry** — all previously-failing tests passed; step marked green."
+      echo ""
+    elif [[ -n "$failedConfigs" ]]; then
+      echo "**Failed configs:**"
+      while IFS= read -r f; do
+        [[ -n "$f" ]] && echo "- \`$f\`"
+      done <<< "$failedConfigs"
+      echo ""
+    fi
+    if [[ ${#annotation_rows[@]} -gt 0 ]]; then
+      echo "| Config | Duration | Status |"
+      echo "| --- | --- | --- |"
+      printf "%s\n" "${annotation_rows[@]}"
+    fi
+  } | buildkite-agent annotate \
+        --scope job \
+        --context "ftr-summary" \
+        --style "${style}" || true
+}
+
+write_job_annotation
+
 exit $exitCode
@@ -27,133 +27,135 @@ const DEFAULT_PATTERNS = [Path.resolve(REPO_ROOT, 'target/junit/**/*.xml')];
 const DISABLE_MISSING_TEST_REPORT_ERRORS =
   process.env.DISABLE_MISSING_TEST_REPORT_ERRORS === 'true';
 
-run(
-  async ({ log, flags }) => {
-    const indexInEs = Boolean(flags['index-errors']);
-    const reportUpdate = Boolean(flags['report-update']);
-
-    let updateGithub = Boolean(flags['github-update']);
-    if (updateGithub && !process.env.GITHUB_TOKEN) {
-      throw createFailError(
-        'GITHUB_TOKEN environment variable must be set, otherwise use --no-github-update flag'
-      );
-    }
-
-    let branch: string = '';
-    let pipeline: string = '';
-    let prependTitle: string = '';
-    if (updateGithub) {
-      branch = process.env.BUILDKITE_BRANCH || '';
-      pipeline = process.env.BUILDKITE_PIPELINE_SLUG || '';
-      updateGithub = process.env.REPORT_FAILED_TESTS_TO_GITHUB === 'true';
-      prependTitle = process.env.PREPEND_FAILURE_TITLE || '';
-
-      if (!branch) {
+export function runFailedTestsReporterCli() {
+  run(
+    async ({ log, flags }) => {
+      const indexInEs = Boolean(flags['index-errors']);
+      const reportUpdate = Boolean(flags['report-update']);
+
+      let updateGithub = Boolean(flags['github-update']);
+      if (updateGithub && !process.env.GITHUB_TOKEN) {
         throw createFailError(
-          'Unable to determine originating branch from job name or other environment variables'
+          'GITHUB_TOKEN environment variable must be set, otherwise use --no-github-update flag'
         );
       }
-    }
 
-    const githubApi = new GithubApi({
-      log,
-      token: process.env.GITHUB_TOKEN,
-      dryRun: !updateGithub,
-    });
-
-    const bkMeta = getBuildkiteMetadata();
-
-    try {
-      const buildUrl = flags['build-url'] || (updateGithub ? '' : 'http://buildUrl');
-      if (typeof buildUrl !== 'string' || !buildUrl) {
-        throw createFlagError('Missing --build-url or process.env.BUILD_URL');
+      let branch: string = '';
+      let pipeline: string = '';
+      let prependTitle: string = '';
+      if (updateGithub) {
+        branch = process.env.BUILDKITE_BRANCH || '';
+        pipeline = process.env.BUILDKITE_PIPELINE_SLUG || '';
+        updateGithub = process.env.REPORT_FAILED_TESTS_TO_GITHUB === 'true';
+        prependTitle = process.env.PREPEND_FAILURE_TITLE || '';
+
+        if (!branch) {
+          throw createFailError(
+            'Unable to determine originating branch from job name or other environment variables'
+          );
+        }
       }
 
-      const patterns = (flags._.length ? flags._ : DEFAULT_PATTERNS).map((p) =>
-        normalize(Path.resolve(p))
-      );
-      log.info('Searching for reports at', patterns);
-      const reportPaths = await globby(patterns, {
-        absolute: true,
+      const githubApi = new GithubApi({
+        log,
+        token: process.env.GITHUB_TOKEN,
+        dryRun: !updateGithub,
       });
 
-      if (!reportPaths.length && DISABLE_MISSING_TEST_REPORT_ERRORS) {
-        // it is fine for code coverage to not have test results
-        return;
-      }
-
-      if (reportPaths.length) {
-        log.info('found', reportPaths.length, 'reports', reportPaths);
+      const bkMeta = getBuildkiteMetadata();
 
-        // Separate JUnit and Scout reports
-        const junitReports = reportPaths.filter((p) => p.endsWith('.xml'));
-        const scoutReports = reportPaths.filter((p) => p.endsWith('.ndjson'));
+      try {
+        const buildUrl = flags['build-url'] || (updateGithub ? '' : 'http://buildUrl');
+        if (typeof buildUrl !== 'string' || !buildUrl) {
+          throw createFlagError('Missing --build-url or process.env.BUILD_URL');
+        }
 
-        log.info(
-          'Processing',
-          junitReports.length,
-          'JUnit reports and',
-          scoutReports.length,
-          'Scout reports'
+        const patterns = (flags._.length ? flags._ : DEFAULT_PATTERNS).map((p) =>
+          normalize(Path.resolve(p))
         );
-
-        const existingIssues = new ExistingFailedTestIssues(log);
-
-        const processParams: ProcessReportsParams = {
-          log,
-          existingIssues,
-          buildUrl,
-          githubApi,
-          branch,
-          pipeline,
-          prependTitle,
-          updateGithub,
-          indexInEs,
-          reportUpdate,
-          bkMeta,
-        };
-
-        // Process FTR JUnit reports
-        await processJUnitReports(junitReports, processParams);
-
-        // Process Scout reports
-        await processScoutReports(scoutReports, processParams);
-
-        // Generate Scout test failure artifacts after reports are updated (GH issue info, html reports, etc.)
-        await generateScoutTestFailureArtifacts({ log, bkMeta });
+        log.info('Searching for reports at', patterns);
+        const reportPaths = await globby(patterns, {
+          absolute: true,
+        });
+
+        if (!reportPaths.length && DISABLE_MISSING_TEST_REPORT_ERRORS) {
+          // it is fine for code coverage to not have test results
+          return;
+        }
+
+        if (reportPaths.length) {
+          log.info('found', reportPaths.length, 'reports', reportPaths);
+
+          // Separate JUnit and Scout reports
+          const junitReports = reportPaths.filter((p) => p.endsWith('.xml'));
+          const scoutReports = reportPaths.filter((p) => p.endsWith('.ndjson'));
+
+          log.info(
+            'Processing',
+            junitReports.length,
+            'JUnit reports and',
+            scoutReports.length,
+            'Scout reports'
+          );
+
+          const existingIssues = new ExistingFailedTestIssues(log);
+
+          const processParams: ProcessReportsParams = {
+            log,
+            existingIssues,
+            buildUrl,
+            githubApi,
+            branch,
+            pipeline,
+            prependTitle,
+            updateGithub,
+            indexInEs,
+            reportUpdate,
+            bkMeta,
+          };
+
+          // Process FTR JUnit reports
+          await processJUnitReports(junitReports, processParams);
+
+          // Process Scout reports
+          await processScoutReports(scoutReports, processParams);
+
+          // Generate Scout test failure artifacts after reports are updated (GH issue info, html reports, etc.)
+          await generateScoutTestFailureArtifacts({ log, bkMeta });
+        }
+      } finally {
+        await CiStatsReporter.fromEnv(log).metrics([
+          {
+            group: 'github api request count',
+            id: `failed test reporter`,
+            value: githubApi.getRequestCount(),
+            meta: Object.fromEntries(
+              Object.entries(bkMeta).map(
+                ([k, v]) => [`buildkite${k[0].toUpperCase()}${k.slice(1)}`, v] as const
+              )
+            ),
+          },
+        ]);
       }
-    } finally {
-      await CiStatsReporter.fromEnv(log).metrics([
-        {
-          group: 'github api request count',
-          id: `failed test reporter`,
-          value: githubApi.getRequestCount(),
-          meta: Object.fromEntries(
-            Object.entries(bkMeta).map(
-              ([k, v]) => [`buildkite${k[0].toUpperCase()}${k.slice(1)}`, v] as const
-            )
-          ),
+    },
+    {
+      description: `a cli that opens issues or updates existing issues based on junit reports`,
+      flags: {
+        boolean: ['github-update', 'report-update'],
+        string: ['build-url'],
+        default: {
+          'github-update': true,
+          'report-update': true,
+          'index-errors': true,
+          'build-url': process.env.BUILD_URL,
         },
-      ]);
-    }
-  },
-  {
-    description: `a cli that opens issues or updates existing issues based on junit reports`,
-    flags: {
-      boolean: ['github-update', 'report-update'],
-      string: ['build-url'],
-      default: {
-        'github-update': true,
-        'report-update': true,
-        'index-errors': true,
-        'build-url': process.env.BUILD_URL,
-      },
-      help: `
+        help: `
         --no-github-update Execute the CLI without writing to Github
         --no-report-update Execute the CLI without writing to the JUnit reports
         --no-index-errors  Execute the CLI without indexing failures into Elasticsearch
         --build-url        URL of the failed build, defaults to process.env.BUILD_URL
       `,
-    },
-  }
-);
+      },
+    }
+  );
+}