Skip to content

Commit 4783ffb

Browse files
chore(ci): add test to verify loki able to flush to s3 (#2673)
## Description Adds test to EKS workflow to verify that Loki can flush to S3. ## Related Issue Relates to CORE-495 ## Type of change - [ ] Bug fix (non-breaking change which fixes an issue) - [ ] New feature (non-breaking change which adds functionality) - [X] Other (security config, docs update, etc) ## Steps to Validate - Happy Path - Successful workflow completion - Sad Path - Break Loki write s3 permissions. Run workflow. Validate test fails as expected. - example failing run created by changing loki-write service account in commit [756131d](756131d) - https://github.com/defenseunicorns/uds-core/actions/runs/26062032260/job/76624324060?pr=2673#step:16:102 ## Checklist before merging - [X] Test, docs, adr added or updated as needed - [X] [Contributor Guide](https://github.com/defenseunicorns/uds-core/blob/main/CONTRIBUTING.md) followed
1 parent 1bfbfaf commit 4783ffb

3 files changed

Lines changed: 213 additions & 2 deletions

File tree

.github/workflows/test-eks.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,9 @@ jobs:
105105
- name: Test UDS Core
106106
run: uds run -f tasks/test.yaml uds-core-non-k3d
107107

108+
- name: Validate Loki chunks S3 writes
109+
run: uds run -f src/loki/tasks.yaml validate-object-storage-writes --no-progress --with provider=aws --with iac_dir=.github/test-infra/aws/eks
110+
108111
- name: Debug Output
109112
if: ${{ always() }}
110113
uses: ./.github/actions/debug-output

.github/workflows/test-iac.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright 2024 Defense Unicorns
1+
# Copyright 2024-2026 Defense Unicorns
22
# SPDX-License-Identifier: AGPL-3.0-or-later OR LicenseRef-Defense-Unicorns-Commercial
33

44
name: Filter IaC
@@ -77,6 +77,7 @@ jobs:
7777
- ".github/workflows/test-rke2.yaml"
7878
eks:
7979
- "tasks/iac.yaml"
80+
- "src/loki/tasks.yaml"
8081
- ".github/bundles/eks/**"
8182
- ".github/test-infra/aws/eks/**"
8283
- ".github/workflows/test-eks.yaml"

src/loki/tasks.yaml

Lines changed: 208 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
1-
# Copyright 2024 Defense Unicorns
1+
# Copyright 2024-2026 Defense Unicorns
22
# SPDX-License-Identifier: AGPL-3.0-or-later OR LicenseRef-Defense-Unicorns-Commercial
33

4+
variables:
5+
- name: LOKI_CHUNKS_OBJECT_STORAGE_BUCKET
6+
sensitive: true
7+
48
tasks:
59
- name: validate
610
actions:
@@ -31,3 +35,206 @@ tasks:
3135
cmd: |
3236
npm ci && npx vitest run "loki" --exclude "**/*integration*"
3337
dir: test/vitest
38+
39+
- name: validate-object-storage-writes
40+
description: Validate Loki writes log chunks to configured object storage
41+
inputs:
42+
provider:
43+
description: Object storage provider to validate. (Currently only 'aws' is supported)
44+
required: true
45+
iac_dir:
46+
description: Directory containing IaC outputs for the object storage backend
47+
default: .github/test-infra/aws/eks
48+
poll_timeout:
49+
description: Maximum seconds to wait for Loki objects to appear in object storage
50+
default: "300"
51+
poll_interval:
52+
description: Seconds between object storage polling attempts
53+
default: "10"
54+
actions:
55+
- description: Validate object storage provider configuration
56+
shell:
57+
darwin: bash
58+
linux: bash
59+
cmd: |
60+
set -euo pipefail
61+
62+
case "${{ .inputs.provider }}" in
63+
aws)
64+
command -v aws >/dev/null 2>&1 || { echo "ERROR: aws CLI is required"; exit 1; }
65+
command -v tofu >/dev/null 2>&1 || { echo "ERROR: tofu is required"; exit 1; }
66+
command -v curl >/dev/null 2>&1 || { echo "ERROR: curl is required"; exit 1; }
67+
command -v jq >/dev/null 2>&1 || { echo "ERROR: jq is required"; exit 1; }
68+
;;
69+
*)
70+
echo "ERROR: Unsupported Loki object storage provider '${{ .inputs.provider }}'. Supported providers: aws."
71+
exit 1
72+
;;
73+
esac
74+
75+
if [ ! -d "${{ .inputs.iac_dir }}" ]; then
76+
echo "ERROR: IaC directory '${{ .inputs.iac_dir }}' does not exist."
77+
exit 1
78+
fi
79+
80+
- description: Resolve Loki chunks S3 bucket
81+
cmd: tofu output -raw loki_s3_bucket
82+
dir: ${{ .inputs.iac_dir }}
83+
mute: true
84+
setVariables:
85+
- name: LOKI_CHUNKS_OBJECT_STORAGE_BUCKET
86+
87+
- description: Set object storage validation start time
88+
cmd: date -u +'%Y-%m-%dT%H:%M:%S+00:00'
89+
mute: true
90+
setVariables:
91+
- name: LOKI_OBJECT_STORAGE_TEST_START
92+
93+
- description: Push a test log and flush Loki chunks
94+
shell:
95+
darwin: bash
96+
linux: bash
97+
cmd: |
98+
set -euo pipefail
99+
100+
LOKI_PORT="3100"
101+
PORT_FORWARD_PID=""
102+
PORT_FORWARD_LOG="$(mktemp)"
103+
104+
cleanup() {
105+
if [ -n "${PORT_FORWARD_PID}" ] && kill -0 "${PORT_FORWARD_PID}" >/dev/null 2>&1; then
106+
kill "${PORT_FORWARD_PID}" >/dev/null 2>&1 || true
107+
wait "${PORT_FORWARD_PID}" >/dev/null 2>&1 || true
108+
fi
109+
rm -f "${PORT_FORWARD_LOG}"
110+
}
111+
112+
diagnostics() {
113+
echo "Loki push and flush diagnostics:"
114+
echo "Loki write pods:"
115+
uds zarf tools kubectl get pods -n loki -l app.kubernetes.io/component=write || true
116+
echo "Port-forward output:"
117+
if [ -s "${PORT_FORWARD_LOG}" ]; then
118+
while IFS= read -r line; do
119+
echo " ${line}"
120+
done < "${PORT_FORWARD_LOG}"
121+
else
122+
echo " No port-forward output captured."
123+
fi
124+
}
125+
126+
fail() {
127+
echo "ERROR: $*" >&2
128+
diagnostics
129+
exit 1
130+
}
131+
132+
trap cleanup EXIT
133+
134+
uds zarf tools kubectl port-forward -n loki svc/loki-write "${LOKI_PORT}:3100" >"${PORT_FORWARD_LOG}" 2>&1 &
135+
PORT_FORWARD_PID="$!"
136+
137+
for attempt in $(seq 1 30); do
138+
echo "Attempt ${attempt}/30: checking Loki write port-forward readiness..."
139+
if curl -fsS "http://127.0.0.1:${LOKI_PORT}/ready" >/dev/null 2>&1; then
140+
echo "Loki write port-forward is ready."
141+
break
142+
fi
143+
if ! kill -0 "${PORT_FORWARD_PID}" >/dev/null 2>&1; then
144+
fail "Loki write port-forward exited before becoming ready."
145+
fi
146+
if [ "${attempt}" -eq 30 ]; then
147+
fail "Timed out waiting for Loki write port-forward to become ready."
148+
fi
149+
sleep 2
150+
done
151+
152+
TEST_ID="$(date +%s)-$$"
153+
LOG_MESSAGE="loki-object-storage-ci-${TEST_ID}"
154+
TIMESTAMP="$(date +%s)000000000"
155+
PAYLOAD="$(jq -n \
156+
--arg timestamp "${TIMESTAMP}" \
157+
--arg log_message "${LOG_MESSAGE}" \
158+
--arg test_id "${TEST_ID}" \
159+
'{
160+
streams: [
161+
{
162+
stream: {
163+
job: "loki-object-storage-ci",
164+
test_id: $test_id
165+
},
166+
values: [
167+
[$timestamp, $log_message]
168+
]
169+
}
170+
]
171+
}')"
172+
173+
curl -fsS \
174+
-H "Content-Type: application/json" \
175+
-X POST \
176+
--data-raw "${PAYLOAD}" \
177+
"http://127.0.0.1:${LOKI_PORT}/loki/api/v1/push" \
178+
|| fail "Failed to push test log to Loki write endpoint."
179+
180+
curl -fsS -X POST "http://127.0.0.1:${LOKI_PORT}/flush" >/dev/null \
181+
|| fail "Failed to flush Loki write endpoint."
182+
183+
echo "Pushed test log and flushed Loki chunks."
184+
185+
- description: Verify Loki chunks object write
186+
shell:
187+
darwin: bash
188+
linux: bash
189+
cmd: |
190+
set -euo pipefail
191+
192+
S3_LIST_ERROR="$(mktemp)"
193+
194+
cleanup() {
195+
rm -f "${S3_LIST_ERROR}"
196+
}
197+
198+
diagnostics() {
199+
if [ -s "${S3_LIST_ERROR}" ]; then
200+
echo "Last S3 list error:"
201+
while IFS= read -r line; do
202+
echo " ${line//${LOKI_CHUNKS_OBJECT_STORAGE_BUCKET}/<loki-chunks-bucket>}"
203+
done < "${S3_LIST_ERROR}"
204+
fi
205+
}
206+
207+
fail() {
208+
echo "ERROR: $*" >&2
209+
diagnostics
210+
exit 1
211+
}
212+
213+
trap cleanup EXIT
214+
215+
END_TIME=$(( $(date +%s) + ${{ .inputs.poll_timeout }} ))
216+
ATTEMPT=1
217+
while [ "$(date +%s)" -lt "${END_TIME}" ]; do
218+
REMAINING_SECONDS=$(( END_TIME - $(date +%s) ))
219+
echo "Attempt ${ATTEMPT}: checking for Loki chunk objects under fake/ written after ${LOKI_OBJECT_STORAGE_TEST_START} (${REMAINING_SECONDS}s remaining)..."
220+
221+
# auth_enabled is false, so Loki writes tenant chunk objects under fake/.
222+
: > "${S3_LIST_ERROR}"
223+
RECENT_CHUNK_OBJECT_COUNT="$(aws s3api list-objects-v2 \
224+
--bucket "${LOKI_CHUNKS_OBJECT_STORAGE_BUCKET}" \
225+
--prefix "fake/" \
226+
--output json 2>"${S3_LIST_ERROR}" \
227+
| jq --arg start "${LOKI_OBJECT_STORAGE_TEST_START}" '[.Contents[]? | select(.LastModified >= $start)] | length' \
228+
|| true)"
229+
230+
if [ -n "${RECENT_CHUNK_OBJECT_COUNT}" ] && [ "${RECENT_CHUNK_OBJECT_COUNT}" -gt 0 ] 2>/dev/null; then
231+
echo "Validated Loki wrote recent chunk objects to configured object storage."
232+
exit 0
233+
fi
234+
235+
echo "No recent Loki chunk objects found under fake/ yet; waiting ${{ .inputs.poll_interval }}s before retrying."
236+
ATTEMPT=$(( ATTEMPT + 1 ))
237+
sleep "${{ .inputs.poll_interval }}"
238+
done
239+
240+
fail "Timed out waiting for recent Loki chunk objects under fake/ in object storage."

0 commit comments

Comments
 (0)