-
Notifications
You must be signed in to change notification settings - Fork 139
Expand file tree
/
Copy pathpull_request_integration_tests.yml
More file actions
359 lines (317 loc) · 12.6 KB
/
Copy pathpull_request_integration_tests.yml
File metadata and controls
359 lines (317 loc) · 12.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
name: Integration tests
on:
push:
branches: [main, release-*]
pull_request:
branches: [main, release-*]
paths:
- ".github/workflows/pull_request_integration_tests.yml"
- "**.go"
- "go.mod"
- "go.sum"
- "bpf/**"
- "cmd/**"
- "configs/**"
- "internal/**"
- "pkg/**"
- "scripts/**"
workflow_call:
inputs:
ref:
description: "Git ref (commit, branch, or tag) to check out"
required: false
type: string
secrets:
DOCKER_TOKEN_EBPF_INSTRUMENTATION:
description: "Docker Hub token for pulling images (optional)"
required: false
concurrency:
group: pr-integration-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
permissions:
contents: read
jobs:
generate-bpf:
name: Generate BPF
runs-on: ubuntu-latest
timeout-minutes: 30
steps:
- uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
with:
ref: ${{ inputs.ref || github.sha }}
persist-credentials: false
- uses: ./.github/actions/generate-bpf
with:
cache-key-prefix: go-build-integration-race
test-matrix:
name: "Build matrix"
runs-on: ubuntu-latest
timeout-minutes: 45
outputs:
matrix: ${{ steps.build-matrix.outputs.matrix }}
steps:
- uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
with:
ref: ${{ inputs.ref || github.sha }}
persist-credentials: false
- name: Build test matrix
id: build-matrix
env:
PARTITIONS: 10
run: |
echo -n "matrix=" >> $GITHUB_OUTPUT
make integration-test-matrix-json >> $GITHUB_OUTPUT
prep-images:
name: "Pre-pull base images"
runs-on: ubuntu-latest
timeout-minutes: 20
outputs:
cache-key: ${{ steps.cache-key.outputs.key }}
steps:
- uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
with:
ref: ${{ inputs.ref || github.sha }}
persist-credentials: false
- name: Compute base-image cache key
id: cache-key
env:
ARCH: ${{ runner.arch }}
INPUT_REF: ${{ inputs.ref || '' }}
run: |
if [ -n "${INPUT_REF}" ]; then
REF_SCOPE="input-ref-$(printf '%s' "${INPUT_REF}" | sha256sum | cut -d' ' -f1)"
else
REF_SCOPE="event-ref"
fi
IMAGE_SCOPE="$(bash scripts/discover-integration-images.sh --base-images | sha256sum | cut -d' ' -f1)"
echo "key=base-images-${ARCH}-${REF_SCOPE}-${IMAGE_SCOPE}" >> "$GITHUB_OUTPUT"
- name: Cache base images
id: base-images-cache
uses: actions/cache@2c8a9bd7457de244a408f35966fab2fb45fda9c8 # v6.0.0 # zizmor: ignore[cache-poisoning] key derives from the digest-pinned base-image list; images are content-addressable
with:
path: /tmp/base-images.tar
key: ${{ steps.cache-key.outputs.key }}
- name: Pull and save base images
if: steps.base-images-cache.outputs.cache-hit != 'true'
run: bash scripts/pull-base-images.sh /tmp/base-images.tar
test:
name: ${{ matrix.description }}
needs: [test-matrix, generate-bpf, prep-images]
permissions:
# Required for codecov
checks: write
pull-requests: write
runs-on: ubuntu-latest
timeout-minutes: 45
strategy:
fail-fast: false
matrix: ${{ fromJson(needs.test-matrix.outputs.matrix) }}
steps:
- uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
with:
ref: ${{ inputs.ref || github.sha }}
persist-credentials: false
- uses: ./.github/actions/free-disk
- name: Check for Docker Hub token
id: check-docker-token
run: |
if [ -n "${DOCKER_TOKEN}" ]; then
echo "has_token=true" >> "$GITHUB_OUTPUT"
else
echo "has_token=false" >> "$GITHUB_OUTPUT"
fi
env:
DOCKER_TOKEN: ${{ secrets.DOCKER_TOKEN_EBPF_INSTRUMENTATION }}
- name: Log in to Docker Hub
if: steps.check-docker-token.outputs.has_token == 'true'
uses: docker/login-action@650006c6eb7dba73a995cc03b0b2d7f5ca915bee # v4.2.0
with:
username: ${{ vars.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN_EBPF_INSTRUMENTATION }}
- name: Set up Go
uses: actions/setup-go@924ae3a1cded613372ab5595356fb5720e22ba16 # v6.5.0 # zizmor: ignore[cache-poisoning] go.sum verifies module integrity
with:
go-version-file: "go.mod"
cache: true
- name: Go build cache
uses: actions/cache@2c8a9bd7457de244a408f35966fab2fb45fda9c8 # v6.0.0 # zizmor: ignore[cache-poisoning] go build cache is content-addressable and self-verifying
with:
path: ~/.cache/go-build
key: go-build-integration-race-${{ runner.os }}-${{ runner.arch }}-${{ hashFiles('**/*.go', 'go.sum') }}
restore-keys: |
go-build-integration-race-${{ runner.os }}-${{ runner.arch }}-
- name: Download generated BPF files
uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
with:
name: bpf-generated-${{ github.run_id }}
- name: Build test tools
run: make prereqs
- name: Restore base images
uses: actions/cache/restore@2c8a9bd7457de244a408f35966fab2fb45fda9c8 # v6.0.0 # zizmor: ignore[cache-poisoning] key derives from the digest-pinned base-image list; images are content-addressable
with:
path: /tmp/base-images.tar
key: ${{ needs.prep-images.outputs.cache-key }}
- name: Load base images
run: |
if [ -f /tmp/base-images.tar ]; then
docker load -i /tmp/base-images.tar
else
echo "::warning::base image cache miss; builds may pull from the upstream registry and risk HTTP 429"
fi
- name: Check disk usage before tests
run: df -h
- name: Install bpftool (PoC eBPF metrics)
run: |
sudo apt-get update
sudo apt-get install -y linux-tools-generic jq
BPFTOOL_BIN=$(find /usr/lib -maxdepth 3 -name bpftool 2>/dev/null \
| grep linux-tools | head -n1)
if [ -z "$BPFTOOL_BIN" ]; then
echo "Could not find bpftool binary. Diagnostics:" >&2
dpkg -l | grep linux-tools >&2 || true
find /usr/lib -maxdepth 3 -name bpftool 2>/dev/null >&2 || true
exit 1
fi
echo "Using bpftool: $BPFTOOL_BIN"
sudo ln -sf "$BPFTOOL_BIN" /usr/local/sbin/bpftool
sudo /usr/local/sbin/bpftool version
- name: Start bpftool sampler (PoC eBPF metrics)
env:
BPF_SAMPLE_DIR: /tmp/bpfsamples
BPF_SAMPLE_INTERVAL: "2"
run: |
mkdir -p "$BPF_SAMPLE_DIR"
nohup bash ./scripts/bpf-metrics-sampler.sh "$BPF_SAMPLE_DIR" "$BPF_SAMPLE_INTERVAL" \
>"$BPF_SAMPLE_DIR/sampler.log" 2>&1 &
echo $! > "$BPF_SAMPLE_DIR/sampler.pid"
echo "Sampler started, PID $(cat "$BPF_SAMPLE_DIR/sampler.pid")"
- name: Run integration tests
env:
MATRIX_ID: ${{ matrix.id }}
MATRIX_JSON: ${{ toJson(matrix) }}
MATRIX_TEST_PATTERN: ${{ matrix.test_pattern }}
RUN_NUMBER: ${{ github.run_number }}
run: |
echo Partition
echo "${MATRIX_JSON}"
if [ -z "${MATRIX_TEST_PATTERN}" ]; then
echo "Error: Test pattern is empty for shard $MATRIX_ID"
exit 1
fi
mkdir -p /home/runner/reports
go tool -modfile=./internal/tools/go.mod gotestsum \
--rerun-fails=2 --rerun-fails-max-failures=2 --rerun-fails-abort-on-data-race \
--packages="./internal/test/integration" -ftestname \
--jsonfile=/home/runner/reports/test-run-"${RUN_NUMBER}"-"${MATRIX_ID}".log \
-- -race -count=1 -timeout 40m \
-run="^(${MATRIX_TEST_PATTERN})$" ./internal/test/integration
- name: Stop bpftool sampler (PoC eBPF metrics)
if: always()
env:
BPF_SAMPLE_DIR: /tmp/bpfsamples
run: |
if [ -f "$BPF_SAMPLE_DIR/sampler.pid" ]; then
SAMPLER_PID=$(cat "$BPF_SAMPLE_DIR/sampler.pid")
kill "$SAMPLER_PID" 2>/dev/null || true
# Wait up to 5s for SIGTERM handler + EXIT trap to finish so the
# summary doesn't read snapshots mid-write and the sysctl gets
# restored. Force kill if it overruns.
for _ in $(seq 1 50); do
kill -0 "$SAMPLER_PID" 2>/dev/null || break
sleep 0.1
done
if kill -0 "$SAMPLER_PID" 2>/dev/null; then
echo "Sampler $SAMPLER_PID did not exit after 5s; sending SIGKILL"
kill -9 "$SAMPLER_PID" 2>/dev/null || true
fi
fi
shopt -s nullglob
snaps=("$BPF_SAMPLE_DIR"/snap-*.json)
echo "Sample count: ${#snaps[@]}"
- name: Build eBPF metrics summary (PoC)
if: always()
env:
BPF_SAMPLE_DIR: /tmp/bpfsamples
MATRIX_ID: ${{ matrix.id }}
RUN_NUMBER: ${{ github.run_number }}
run: |
TEST_LOG="/home/runner/reports/test-run-${RUN_NUMBER}-${MATRIX_ID}.log"
ARGS=(--in "$BPF_SAMPLE_DIR" --shard "$MATRIX_ID" --out-json "$BPF_SAMPLE_DIR/summary.json")
if [ -f "$TEST_LOG" ]; then
ARGS+=(--testlog "$TEST_LOG")
fi
bash ./scripts/bpf-metrics-summary.sh "${ARGS[@]}" || true
- name: Upload eBPF metrics snapshots (PoC)
if: always()
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
with:
name: bpf-metrics-${{ matrix.id }}-${{ github.run_number }}
path: /tmp/bpfsamples/
retention-days: 5
- name: Process coverage data
run: make itest-coverage-data
- name: Report coverage
uses: codecov/codecov-action@fb8b3582c8e4def4969c97caa2f19720cb33a72f # v7.0.0
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
with:
files: ./testoutput/itest-covdata.txt
flags: integration-test
name: integration-coverage-${{ matrix.id }}-${{ github.run_number }}
- name: Upload test reports
if: always()
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
with:
name: integration-reports-${{ matrix.id }}-${{ github.run_number }}
path: /home/runner/reports/*.log
retention-days: 5
- name: Upload test logs
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
if: always()
with:
name: integration-logs-${{ matrix.id }}-${{ github.run_number }}
path: |
testoutput/*.log
testoutput/kind
- name: Check final disk usage
if: always()
run: df -h
bpf-metrics-aggregate:
name: Aggregate eBPF metrics (PoC)
needs: [test]
# Run after the matrix completes, including when shards fail, but skip
# entirely if the matrix was cancelled or skipped (nothing to aggregate).
if: ${{ !cancelled() && (needs.test.result == 'success' || needs.test.result == 'failure') }}
runs-on: ubuntu-latest
timeout-minutes: 10
steps:
- uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
with:
ref: ${{ inputs.ref || github.sha }}
persist-credentials: false
- name: Install jq
run: |
sudo apt-get update
sudo apt-get install -y jq
# Match only numeric shard IDs so this job's own aggregate artifact
# (uploaded later in this same job on reruns sharing a run_number)
# is not pulled back in.
- name: Download per-shard eBPF metrics artifacts
uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
with:
pattern: bpf-metrics-[0-9]*-${{ github.run_number }}
path: ./all-shards
- name: Aggregate and emit summary
run: |
mkdir -p /tmp/bpf-aggregate
bash ./scripts/bpf-metrics-aggregate.sh \
--in ./all-shards \
--out-md /tmp/bpf-aggregate/aggregate.md \
--out-json /tmp/bpf-aggregate/aggregate.json
- name: Upload aggregated summary
if: always()
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
with:
name: bpf-metrics-aggregate-${{ github.run_number }}
path: /tmp/bpf-aggregate/
retention-days: 30