Skip to content
Open
Show file tree
Hide file tree
Changes from 27 commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
5201524
chore: remove old comments that references file that doesn't exist an…
TamerlanG May 16, 2026
29689fd
feat(ci): add ftr retry result checker to kbn-failed-test-reporter-cli
TamerlanG May 16, 2026
f89d58a
feat(ci): mark FTR retry green when previously-failing tests recover
TamerlanG May 16, 2026
54726fb
test(ci): TEMP add retry-validation fixture — DELETE BEFORE MERGE
TamerlanG May 16, 2026
8403a73
Merge branch 'main' into ftr/smart-retry
TamerlanG May 16, 2026
1be5cd8
Revert "test(ci): TEMP add retry-validation fixture — DELETE BEFORE M…
TamerlanG May 16, 2026
f64246a
Merge branch 'main' into ftr/smart-retry
TamerlanG May 18, 2026
27b3524
[CI] Add job annotation to FTR configs summary
TamerlanG May 18, 2026
443e1cf
Reapply "test(ci): TEMP add retry-validation fixture — DELETE BEFORE …
TamerlanG May 18, 2026
093dc39
Merge branch 'main' into ftr/smart-retry
TamerlanG May 21, 2026
bb84df5
Merge branch 'main' into ftr/smart-retry
TamerlanG May 21, 2026
221df45
Merge branch 'main' into ftr/smart-retry
TamerlanG May 22, 2026
0cadf4a
Merge branch 'main' into ftr/smart-retry
TamerlanG May 25, 2026
5f661e0
Merge branch 'main' into ftr/smart-retry
TamerlanG May 26, 2026
2522c6c
Merge branch 'main' into ftr/smart-retry
TamerlanG May 26, 2026
11841f7
improve job annotation
TamerlanG May 26, 2026
6931aeb
remove bail
TamerlanG May 26, 2026
b137ca2
remove view logs link from job annotation
TamerlanG May 26, 2026
ad1ae9a
show failing test names per config in job annotation
TamerlanG May 26, 2026
7f2c559
Revert "Reapply "test(ci): TEMP add retry-validation fixture — DELETE…
TamerlanG May 26, 2026
b9aa8a4
refactor(ci): simplify ftr_configs.sh annotation and failure extraction
TamerlanG May 27, 2026
8ab9a58
feat(ci): verify explicit passes on retry instead of absence of failure
TamerlanG May 27, 2026
924635c
fix(ci): guard scout reporter error, log smart-retry inactivity, clar…
TamerlanG May 27, 2026
989bd71
refactor(ci): split ftr_configs.sh into focused helper files
TamerlanG May 27, 2026
8238bd3
refactor(ci): move XML diff dance and temp-file plumbing into the Nod…
TamerlanG May 27, 2026
65bc3fd
Merge branch 'main' into ftr/smart-retry
TamerlanG May 27, 2026
84f0b90
chore(ci): remove job annotation from smart-retry PR
TamerlanG May 27, 2026
c3712f9
refactor(ci): remove dead computeIntersection export and initialize r…
TamerlanG May 27, 2026
69c8326
Reapply "test(ci): TEMP add retry-validation fixture — DELETE BEFORE …
TamerlanG May 18, 2026
c8a62ec
Revert "chore: remove old comments that references file that doesn't …
TamerlanG May 28, 2026
2193bfb
Revert "Reapply "test(ci): TEMP add retry-validation fixture — DELETE…
TamerlanG May 28, 2026
a1c1565
bring back verbose version
TamerlanG May 28, 2026
d51172c
revert comments
TamerlanG May 28, 2026
3cfcdfb
refactor(ci): move FAILED_TESTS_KEY and retry_recovered into ftr_smar…
TamerlanG May 28, 2026
6483777
add whitesapce
TamerlanG May 28, 2026
fcdfba7
Reapply "Reapply "test(ci): TEMP add retry-validation fixture — DELET…
TamerlanG May 28, 2026
ac19c1a
Merge branch 'main' into ftr/smart-retry
TamerlanG May 29, 2026
0ba5cbb
put this all behind an env flag
TamerlanG May 30, 2026
bad92a9
put bail behind a env variable too
TamerlanG May 30, 2026
be25fc6
Update .buildkite/scripts/steps/test/ftr_configs.sh
TamerlanG May 31, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions .buildkite/scripts/steps/functional/common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@

set -euo pipefail

# Note, changes here might also need to be made in other scripts, e.g. uptime.sh

source .buildkite/scripts/common/util.sh

# All functional/integration test steps run Kibana from the distributable,
Expand Down
35 changes: 19 additions & 16 deletions .buildkite/scripts/steps/test/ftr_configs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
set -euo pipefail

source .buildkite/scripts/steps/functional/common.sh
source .buildkite/scripts/steps/test/ftr_smart_retry.sh

BUILDKITE_PARALLEL_JOB=${BUILDKITE_PARALLEL_JOB:-}
FTR_CONFIG_GROUP_KEY=${FTR_CONFIG_GROUP_KEY:-}
Expand All @@ -17,14 +18,14 @@ test -z "$EXTRA_ARGS" || buildkite-agent meta-data set "ftr-extra-args" "$EXTRA_
export JOB="$FTR_CONFIG_GROUP_KEY"

FAILED_CONFIGS_KEY="${BUILDKITE_STEP_ID}${FTR_CONFIG_GROUP_KEY}"
FAILED_TESTS_KEY="${BUILDKITE_STEP_ID}${FTR_CONFIG_GROUP_KEY}_failed_tests"

# a FTR failure will result in the script returning an exit code of 10
exitCode=0

configs="${FTR_CONFIG:-}"

# The first retry should only run the configs that failed in the previous attempt
# Any subsequent retries, which would generally only happen by someone clicking the button in the UI, will run everything
# The first retry should only run the configs that failed in the previous attempt.
# Any subsequent retries (generally triggered manually) will run everything.
if [[ ! "$configs" && "${BUILDKITE_RETRY_COUNT:-0}" == "1" ]]; then
configs=$(buildkite-agent meta-data get "$FAILED_CONFIGS_KEY" --default '')
if [[ "$configs" ]]; then
Expand Down Expand Up @@ -52,7 +53,7 @@ while read -r config; do
continue;
fi

FULL_COMMAND="node scripts/functional_tests --bail --config $config $EXTRA_ARGS"
FULL_COMMAND="node scripts/functional_tests --config $config $EXTRA_ARGS"

# see if this config has already been executed successfully
CONFIG_EXECUTION_KEY="${config}_executed"
Expand Down Expand Up @@ -90,20 +91,25 @@ while read -r config; do
# prevent non-zero exit code from breaking the loop
set +e;
node ./scripts/functional_tests \
--bail \
--kibana-install-dir "$KIBANA_BUILD_LOCATION" \
--config="$config" \
"$EXTRA_ARGS"
lastCode=$?
set -e;

# Scout reporter
# Scout reporter — run under set+e so a failure here does not abort the config loop
if [[ "${SCOUT_REPORTER_ENABLED:-}" =~ ^(1|true)$ ]]; then
# Upload events after running each config
echo "Upload Scout reporter events to AppEx QA's team cluster for config $config"
set +e
node scripts/scout upload-events --dontFailOnError
echo "Upload successful, removing local events at .scout/reports"
rm -rf .scout/reports
scout_upload_code=$?
set -e
if [[ $scout_upload_code -ne 0 ]]; then
echo "Scout reporter upload exited $scout_upload_code (continuing)"
else
echo "Upload successful, removing local events at .scout/reports"
rm -rf .scout/reports
fi
else
echo "SCOUT_REPORTER_ENABLED=$SCOUT_REPORTER_ENABLED, skipping event upload."
fi
Expand All @@ -122,25 +128,22 @@ while read -r config; do
result: ${lastCode}")

if [ $lastCode -eq 0 ]; then
# Test was successful, so mark it as executed
buildkite-agent meta-data set "$CONFIG_EXECUTION_KEY" "true"
else
exitCode=10
echo "FTR exited with code $lastCode"
echo "^^^ +++"

if [[ "$failedConfigs" ]]; then
failedConfigs="${failedConfigs}"$'\n'"$config"
else
failedConfigs="$config"
fi
failedConfigs="${failedConfigs:+${failedConfigs}$'\n'}$config"
fi
done <<< "$configs"

if [[ "$failedConfigs" ]]; then
buildkite-agent meta-data set "$FAILED_CONFIGS_KEY" "$failedConfigs"
fi

store_failing_tests # attempt 1: record what failed so the retry can verify recovery
apply_smart_retry # attempt 2: mark green if all previously-failing tests explicitly passed

echo "--- FTR configs complete"
printf "%s\n" "${results[@]}"
echo ""
Expand Down
58 changes: 58 additions & 0 deletions .buildkite/scripts/steps/test/ftr_smart_retry.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# Sourced by ftr_configs.sh — do not execute directly.
# Reads/writes globals: exitCode, failedConfigs, retry_recovered,
# FAILED_TESTS_KEY, FAILED_CONFIGS_KEY, JOB, BUILDKITE_RETRY_COUNT.

# Called after attempt 1: stores failing test names so the retry can verify recovery.
store_failing_tests() {
[[ -n "${KIBANA_FLAKY_TEST_RUNNER_CONFIG:-}" ]] && return
[[ "${BUILDKITE_RETRY_COUNT:-0}" != "0" ]] && return
[[ "$exitCode" == "0" ]] && return

local junitDir="target/junit/$JOB"
[[ -d "$junitDir" ]] || return

local failedTestNames
failedTestNames=$(node scripts/ftr_check_retry_result list-failures "$junitDir" 2>/dev/null || true)
if [[ "$failedTestNames" ]]; then
buildkite-agent meta-data set "$FAILED_TESTS_KEY" "$failedTestNames"
echo "Stored $(echo "$failedTestNames" | wc -l | tr -d ' ') previously-failing test name(s) for retry evaluation"
fi
}

# Called after attempt 2: marks the step green if all previously-failing tests explicitly passed.
# On a third-or-later manual retry, logs that smart-retry is inactive.
apply_smart_retry() {
[[ -n "${KIBANA_FLAKY_TEST_RUNNER_CONFIG:-}" ]] && return
[[ "$exitCode" == "0" ]] && return

local retryCount="${BUILDKITE_RETRY_COUNT:-0}"

if [[ "$retryCount" -ge "2" ]]; then
echo "--- [smart-retry] inactive on attempt $((retryCount + 1)) — only applies to the first automatic retry"
return
fi

[[ "$retryCount" != "1" ]] && return

local prevFailedTests
prevFailedTests=$(buildkite-agent meta-data get "$FAILED_TESTS_KEY" --default '' 2>/dev/null || true)
[[ "$prevFailedTests" ]] || return

local junitDir="target/junit/$JOB"

local intersectionCode
set +e
printf '%s' "$prevFailedTests" | node scripts/ftr_check_retry_result check-intersection \
--junit-dir "$junitDir" \
--prev-failures-stdin
intersectionCode=$?
set -e

if [[ "$intersectionCode" == "0" ]]; then
echo "--- [smart-retry] All previously-failing tests recovered on retry — marking step green"
exitCode=0
failedConfigs=""
retry_recovered=true
Comment thread
TamerlanG marked this conversation as resolved.
buildkite-agent meta-data set "$FAILED_CONFIGS_KEY" "" 2>/dev/null || true
fi
}
Loading
Loading