Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 42 additions & 4 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,13 @@ jobs:
echo "================ CLEANUP COMPLETE ================"
- name: Use azure ubuntu archive
uses: ./.github/actions/dns-spoof-ubuntu-archive
- name: Start OOM monitor
run: |
sudo dmesg --clear || true
setsid sh -c 'sudo dmesg --follow 2>/dev/null | \
grep --line-buffered -iE "oom|out of memory|killed process|invoked oom" \
> /tmp/oom-monitor.log 2>&1' &
echo "OOM_MONITOR_PID=$!" >> "$GITHUB_ENV"
- name: Pre-build base images
run: |
set -eu
Expand All @@ -284,6 +291,7 @@ jobs:
env:
TEST_SUITE: ${{ matrix.suite }}
- name: Run integration tests
id: run-tests
run: |
set -ex
if [ -n "${TEST_SUITE}" ] && [ ! "${TEST_SUITE}" = "other" ]; then
Expand All @@ -296,6 +304,22 @@ jobs:
env:
TEST_SUITE: ${{ matrix.suite }}
TEST_SKIP: ${{ matrix.skip }}
- name: Check for OOM kills
if: always()
run: |
kill -- -"${OOM_MONITOR_PID}" 2>/dev/null || true
mkdir -p /tmp/reports

if [ -s /tmp/oom-monitor.log ]; then
echo "::warning::OOM kills detected during test run"
cat /tmp/oom-monitor.log
cp /tmp/oom-monitor.log /tmp/reports/oom-monitor.log
else
echo "No OOM kills detected"
fi

# Capture recent dmesg for context on any failures
sudo dmesg -T 2>/dev/null | tail -200 > /tmp/reports/dmesg-tail.log || true
- name: Get traces
if: always()
run: |
Expand Down Expand Up @@ -327,15 +351,29 @@ jobs:
set -e

dir="$(mktemp -d)"
f="${dir}/dockerd.log"
echo "DOCKERD_LOG_PATH=${f}" >> $GITHUB_OUTPUT
sudo journalctl -u docker > "${f}"
echo "DOCKERD_LOG_PATH=${dir}" >> $GITHUB_OUTPUT

if [ "${{ steps.run-tests.outputs.test_timeout }}" = "true" ]; then
echo "::group::Collecting pprof data from dockerd (test timeout detected)"
curl --unix-socket /var/run/docker.sock \
-o "${dir}/goroutine-stacks.txt" \
"http://localhost/debug/pprof/goroutine?debug=2" || true

curl --unix-socket /var/run/docker.sock \
-o "${dir}/heap-profile.bin" \
"http://localhost/debug/pprof/heap" || true

cp "$(which dockerd)" "${dir}/dockerd" || true
echo "::endgroup::"
fi

sudo journalctl -u docker > "${dir}/dockerd.log"
- name: Upload buildkit logs
if: failure()
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
with:
name: e2e-dockerd-logs-${{ matrix.suite }}
path: ${{ steps.dump-logs.outputs.DOCKERD_LOG_PATH }}
path: ${{ steps.dump-logs.outputs.DOCKERD_LOG_PATH }}/*
retention-days: 1

unit:
Expand Down
26 changes: 26 additions & 0 deletions cmd/test2json2gha/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"flag"
"fmt"
"io"
"iter"
"log/slog"
"os"
"runtime/debug"
Expand Down Expand Up @@ -73,6 +74,7 @@ func do(in io.Reader, out io.Writer, cfg config) (bool, error) {
var wg waitGroup

results.markUnfinishedAsTimeout()
signalTimeout(results.Results())

wg.Go(func() {
var rf ResultsFormatter
Expand Down Expand Up @@ -135,6 +137,30 @@ func do(in io.Reader, out io.Writer, cfg config) (bool, error) {
return bool(anyFailed), nil
}

// signalTimeout writes test_timeout=true to GITHUB_OUTPUT if any test timed out.
// This allows subsequent CI steps to detect that a timeout occurred.
func signalTimeout(results iter.Seq[*TestResult]) {
ghOutput := os.Getenv("GITHUB_OUTPUT")
if ghOutput == "" {
return
}

for r := range results {
if r.timeout {
f, err := os.OpenFile(ghOutput, os.O_WRONLY|os.O_APPEND, 0)
if err != nil {
slog.Error("Error opening GITHUB_OUTPUT", "error", err)
return
}
if _, err := fmt.Fprintln(f, "test_timeout=true"); err != nil {
slog.Error("Error writing timeout status to GITHUB_OUTPUT", "error", err)
}
f.Close()
return
}
}
}

type waitGroup struct {
sync.WaitGroup
}
Expand Down
Loading