Skip to content

Commit 19ddc45

Browse files
mchmarnyclaude
andcommitted
refactor: extract E2E tests into reusable composite action
Create .github/actions/e2e/action.yml for shared E2E test logic: - Kind cluster setup with fake GPU environment - Tilt CI for local development stack - Debug artifact collection on failure - Automatic cleanup Benefits: - Consistent E2E testing in both on-push and on-tag workflows - Single source of truth for E2E test configuration - Easier maintenance and updates Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 39d3ae1 commit 19ddc45

File tree

3 files changed

+158
-92
lines changed

3 files changed

+158
-92
lines changed

.github/actions/e2e/action.yml

Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
name: 'E2E Tests'
16+
description: 'Run end-to-end tests using Kind cluster with fake GPU environment'
17+
18+
inputs:
19+
go_version:
20+
description: 'Go version to install (e.g., 1.25)'
21+
required: true
22+
upload_artifacts:
23+
description: 'Whether to upload debug artifacts on failure (true/false)'
24+
required: false
25+
default: 'true'
26+
artifact_retention_days:
27+
description: 'Number of days to retain debug artifacts'
28+
required: false
29+
default: '7'
30+
31+
runs:
32+
using: 'composite'
33+
steps:
34+
- name: Set up Go
35+
uses: actions/setup-go@7a3fe6cf4cb3a834922a1244abfce67bcef6a0c5 # v6.2.0
36+
with:
37+
go-version: ${{ inputs.go_version }}
38+
cache: true
39+
40+
- name: Free up disk space
41+
shell: bash
42+
run: |
43+
sudo rm -rf /usr/local/lib/android
44+
sudo rm -rf /usr/share/dotnet
45+
sudo rm -rf /opt/ghc
46+
docker system prune -f
47+
48+
- name: Prep system for Kind cluster
49+
shell: bash
50+
run: |
51+
# Network configuration for Kind
52+
sudo sysctl -w net.ipv4.ip_forward=1
53+
sudo sysctl -w fs.inotify.max_user_watches=524288
54+
sudo sysctl -w fs.inotify.max_user_instances=1024
55+
56+
- name: Install E2E testing tools
57+
uses: ./.github/actions/install-e2e-tools
58+
59+
- name: Create Kind cluster
60+
shell: bash
61+
run: make cluster-create
62+
63+
- name: Run Tilt CI
64+
shell: bash
65+
run: make tilt-ci
66+
67+
- name: Build and push Eidos image to local registry
68+
shell: bash
69+
run: |
70+
# Build and push eidos image for snapshot agent
71+
KO_DOCKER_REPO=localhost:5001/eidos ko build --bare --tags=local ./cmd/eidos
72+
# Verify image is available
73+
curl -sf http://localhost:5001/v2/eidos/tags/list
74+
75+
- name: Set up fake GPU environment
76+
shell: bash
77+
run: |
78+
# Create gpu-operator namespace
79+
kubectl create namespace gpu-operator --dry-run=client -o yaml | kubectl apply -f -
80+
81+
# Inject fake nvidia-smi into Kind worker nodes
82+
for node in $(docker ps --filter "name=-worker" --format "{{.Names}}"); do
83+
echo "Injecting fake nvidia-smi into $node"
84+
docker cp tools/fake-nvidia-smi "${node}:/usr/local/bin/nvidia-smi"
85+
docker exec "$node" chmod +x /usr/local/bin/nvidia-smi
86+
# Verify it works
87+
docker exec "$node" nvidia-smi --version
88+
done
89+
90+
- name: Set up port forwarding
91+
shell: bash
92+
run: |
93+
kubectl port-forward -n eidos svc/eidosd 8080:8080 &
94+
sleep 5
95+
curl -sf http://localhost:8080/health || exit 1
96+
97+
- name: Run E2E tests
98+
id: e2e-tests
99+
shell: bash
100+
env:
101+
EIDOS_IMAGE: localhost:5001/eidos:local
102+
FAKE_GPU_ENABLED: "true"
103+
run: ./tests/e2e/run.sh
104+
105+
- name: Collect debug artifacts
106+
if: failure()
107+
shell: bash
108+
run: |
109+
mkdir -p /tmp/debug-artifacts
110+
kubectl get all --all-namespaces > /tmp/debug-artifacts/all-resources.txt || true
111+
kubectl get events --all-namespaces --sort-by='.lastTimestamp' > /tmp/debug-artifacts/events.txt || true
112+
kubectl logs -n eidos -l app.kubernetes.io/name=eidosd --tail=500 > /tmp/debug-artifacts/eidosd-logs.txt || true
113+
docker images > /tmp/debug-artifacts/docker-images.txt || true
114+
115+
- name: Export Kind logs
116+
if: failure()
117+
shell: bash
118+
run: |
119+
mkdir -p /tmp/kind-logs
120+
kind export logs /tmp/kind-logs --name eidos || true
121+
122+
- name: Upload debug artifacts
123+
if: failure() && inputs.upload_artifacts == 'true'
124+
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
125+
with:
126+
name: e2e-debug-artifacts-${{ github.run_id }}
127+
path: |
128+
/tmp/debug-artifacts/
129+
/tmp/kind-logs/
130+
retention-days: ${{ inputs.artifact_retention_days }}
131+
132+
- name: Cleanup
133+
if: always()
134+
shell: bash
135+
run: |
136+
make cluster-delete || true
137+
docker system prune -f || true

.github/workflows/on-push.yaml

Lines changed: 3 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -103,95 +103,7 @@ jobs:
103103
id: versions
104104
uses: ./.github/actions/load-versions
105105

106-
- name: Set up Go
107-
uses: actions/setup-go@7a3fe6cf4cb3a834922a1244abfce67bcef6a0c5 # v6.2.0
106+
- name: Run E2E Tests
107+
uses: ./.github/actions/e2e
108108
with:
109-
go-version: ${{ steps.versions.outputs.go }}
110-
cache: true
111-
112-
- name: Free up disk space
113-
run: |
114-
sudo rm -rf /usr/local/lib/android
115-
sudo rm -rf /usr/share/dotnet
116-
sudo rm -rf /opt/ghc
117-
docker system prune -f
118-
119-
- name: Prep system for Kind cluster
120-
run: |
121-
# Network configuration for Kind
122-
sudo sysctl -w net.ipv4.ip_forward=1
123-
sudo sysctl -w fs.inotify.max_user_watches=524288
124-
sudo sysctl -w fs.inotify.max_user_instances=1024
125-
126-
- name: Install E2E testing tools
127-
uses: ./.github/actions/install-e2e-tools
128-
129-
- name: Create Kind cluster
130-
run: make cluster-create
131-
132-
- name: Run Tilt CI
133-
run: make tilt-ci
134-
135-
- name: Build and push Eidos image to local registry
136-
run: |
137-
# Build and push eidos image for snapshot agent
138-
KO_DOCKER_REPO=localhost:5001/eidos ko build --bare --tags=local ./cmd/eidos
139-
# Verify image is available
140-
curl -sf http://localhost:5001/v2/eidos/tags/list
141-
142-
- name: Set up fake GPU environment
143-
run: |
144-
# Create gpu-operator namespace
145-
kubectl create namespace gpu-operator --dry-run=client -o yaml | kubectl apply -f -
146-
147-
# Inject fake nvidia-smi into Kind worker nodes
148-
for node in $(docker ps --filter "name=-worker" --format "{{.Names}}"); do
149-
echo "Injecting fake nvidia-smi into $node"
150-
docker cp tools/fake-nvidia-smi "${node}:/usr/local/bin/nvidia-smi"
151-
docker exec "$node" chmod +x /usr/local/bin/nvidia-smi
152-
# Verify it works
153-
docker exec "$node" nvidia-smi --version
154-
done
155-
156-
- name: Set up port forwarding
157-
run: |
158-
kubectl port-forward -n eidos svc/eidosd 8080:8080 &
159-
sleep 5
160-
curl -sf http://localhost:8080/health || exit 1
161-
162-
- name: Run E2E tests
163-
env:
164-
EIDOS_IMAGE: localhost:5001/eidos:local
165-
FAKE_GPU_ENABLED: "true"
166-
run: ./tests/e2e/run.sh
167-
168-
- name: Collect debug artifacts
169-
if: failure()
170-
run: |
171-
mkdir -p /tmp/debug-artifacts
172-
kubectl get all --all-namespaces > /tmp/debug-artifacts/all-resources.txt || true
173-
kubectl get events --all-namespaces --sort-by='.lastTimestamp' > /tmp/debug-artifacts/events.txt || true
174-
kubectl logs -n eidos -l app.kubernetes.io/name=eidosd --tail=500 > /tmp/debug-artifacts/eidosd-logs.txt || true
175-
docker images > /tmp/debug-artifacts/docker-images.txt || true
176-
177-
- name: Export Kind logs
178-
if: failure()
179-
run: |
180-
mkdir -p /tmp/kind-logs
181-
kind export logs /tmp/kind-logs --name eidos || true
182-
183-
- name: Upload debug artifacts
184-
if: failure()
185-
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
186-
with:
187-
name: e2e-debug-artifacts-${{ github.run_id }}
188-
path: |
189-
/tmp/debug-artifacts/
190-
/tmp/kind-logs/
191-
retention-days: 7
192-
193-
- name: Cleanup
194-
if: always()
195-
run: |
196-
make cluster-delete || true
197-
docker system prune -f || true
109+
go_version: ${{ steps.versions.outputs.go }}

.github/workflows/on-tag.yaml

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,14 +81,31 @@ jobs:
8181
with:
8282
go_version: ${{ steps.versions.outputs.go }}
8383

84+
e2e:
85+
name: E2E Tests
86+
runs-on: ubuntu-latest
87+
timeout-minutes: 30
88+
steps:
89+
- name: Checkout Code
90+
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
91+
92+
- name: Load versions
93+
id: versions
94+
uses: ./.github/actions/load-versions
95+
96+
- name: Run E2E Tests
97+
uses: ./.github/actions/e2e
98+
with:
99+
go_version: ${{ steps.versions.outputs.go }}
100+
84101
# =============================================================================
85102
# Build Job (runs after all tests pass)
86103
# =============================================================================
87104

88105
build:
89106
name: Build and Release
90107
runs-on: ubuntu-latest
91-
needs: [unit, integration] # Wait for tests to pass
108+
needs: [unit, integration, e2e] # Wait for all tests to pass
92109
timeout-minutes: 30
93110
outputs:
94111
release_outcome: ${{ steps.release.outputs.release_outcome }}

0 commit comments

Comments
 (0)