Skip to content

Commit 5be1070

Browse files
[8.17] (backport #17028) Smoke test os retry event assertions (#17065)
* Smoke test os retry event assertions (#17028) * smoke: Add retries and wait to data stream event assertions This will improve the reliability of the smoke tests which intermittently fail due to data not yet being available in ES. * smoke: decrease wait time after sending events since assertion now performs retries * smoke: decrease elasticsearch flush_interval to 100ms from the 1s default This will reduce any latency indexing events to help stabilize smoke test and reduce the amount of retries when asserting events. * smoke: decreased retries for data stream event assertions (cherry picked from commit b3381c2) # Conflicts: # testing/smoke/lib.sh * fix backport merge conflicts --------- Co-authored-by: Isaac Flores <[email protected]> Co-authored-by: Isaac Flores <[email protected]> Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
1 parent 44299cd commit 5be1070

File tree

2 files changed

+27
-6
lines changed

2 files changed

+27
-6
lines changed

testing/infra/terraform/modules/standalone_apm_server/apm-server.yml.tftpl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ output:
1616
hosts: [ ${elasticsearch_url} ]
1717
username: ${elasticsearch_username}
1818
password: ${elasticsearch_password}
19+
flush_interval: 100ms
1920
logging.level: debug
2021
logging.to_files: true
2122
logging.files:

testing/smoke/lib.sh

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ assert_entry() {
125125
if [[ ${HITS} -ne ${ENTRIES} ]]; then
126126
echo "Didn't find ${ENTRIES} indexed documents ${MSG}, total hits ${HITS}"
127127
echo ${RESULT}
128-
exit 2
128+
return 2
129129
else
130130
echo "-> Asserted ${ENTRIES} ${MSG} exists"
131131
fi
@@ -141,7 +141,7 @@ send_events() {
141141
curl_fail --data-binary @${INTAKE_DATA} -H "${APM_AUTH_HEADER}" -H "${INTAKE_HEADER}" ${APM_SERVER_INTAKE}
142142

143143
# TODO(marclop). It would be best to query Elasticsearch until at least X documents have been ingested.
144-
sleep 10
144+
sleep 5
145145
}
146146

147147
delete_all() {
@@ -169,10 +169,10 @@ data_stream_assert_events() {
169169
local METRICS_INDEX="metrics-apm.internal-*"
170170
local VERSION=${1}
171171
local ENTRIES=${2}
172-
assert_document ${ERRORS_INDEX} "error.id" "9876543210abcdeffedcba0123456789" ${VERSION} ${ENTRIES}
173-
assert_document ${TRACES_INDEX} "span.id" "1234567890aaaade" ${VERSION} ${ENTRIES}
174-
assert_document ${TRACES_INDEX} "transaction.id" "4340a8e0df1906ecbfa9" ${VERSION} ${ENTRIES}
175-
assert_document ${METRICS_INDEX} "transaction.type" "request" ${VERSION} ${ENTRIES}
172+
retry 6 assert_document ${ERRORS_INDEX} "error.id" "9876543210abcdeffedcba0123456789" ${VERSION} ${ENTRIES}
173+
retry 6 assert_document ${TRACES_INDEX} "span.id" "1234567890aaaade" ${VERSION} ${ENTRIES}
174+
retry 6 assert_document ${TRACES_INDEX} "transaction.id" "4340a8e0df1906ecbfa9" ${VERSION} ${ENTRIES}
175+
retry 6 assert_document ${METRICS_INDEX} "transaction.type" "request" ${VERSION} ${ENTRIES}
176176
}
177177

178178
healthcheck() {
@@ -418,3 +418,23 @@ is_curl_fail_with_body() {
418418
fi
419419
return $HAS_FAIL_WITH_BODY
420420
}
421+
422+
retry() {
423+
local retries=$1
424+
shift
425+
426+
local count=0
427+
until "$@"; do
428+
exit=$?
429+
wait=$((2 ** count))
430+
count=$((count + 1))
431+
if [ $count -lt "$retries" ]; then
432+
echo "-> Retry cmd: '$*'; $count/$retries exited $exit, retrying in $wait seconds..."
433+
sleep $wait
434+
else
435+
echo "-> Retry cmd: '$*'; $count/$retries exited $exit, no more retries left."
436+
return $exit
437+
fi
438+
done
439+
return 0
440+
}

0 commit comments

Comments
 (0)