-
Notifications
You must be signed in to change notification settings - Fork 10
198 lines (171 loc) · 7.23 KB
/
e2e-mock-test.yml
File metadata and controls
198 lines (171 loc) · 7.23 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
name: E2E Mock vLLM Test
on:
pull_request_target:
types: [labeled, synchronize]
branches: [main, 'release-*']
permissions: {}
env:
MOCK_IMAGE: localhost/vllm-mock:test
MOCK_NAMESPACE: mock-vllm-test
KIND_CLUSTER_NAME: rhaii-e2e
jobs:
# Remove run-e2e-test label on new pushes to force re-review
remove-label-on-push:
name: Remove e2e label on push
permissions:
pull-requests: write
if: >-
github.event.action == 'synchronize' &&
contains(github.event.pull_request.labels.*.name, 'run-e2e-test')
runs-on: ubuntu-latest
steps:
- name: Remove run-e2e-test label
uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
with:
script: |
await github.rest.issues.removeLabel({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: context.payload.pull_request.number,
name: 'run-e2e-test'
});
console.log('Removed run-e2e-test label — maintainer must re-add after reviewing new code');
e2e-test:
name: E2E Mock vLLM Test
permissions:
contents: read
# Only runs when a maintainer adds the run-e2e-test label specifically.
# Does not trigger on other labels or on synchronize (new pushes).
if: github.event.action == 'labeled' && github.event.label.name == 'run-e2e-test'
runs-on: ubuntu-latest
steps:
- name: Checkout PR code
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
ref: ${{ github.event.pull_request.head.sha }}
persist-credentials: false
- name: Build mock vLLM image
run: docker build -t ${{ env.MOCK_IMAGE }} test/mock-vllm/
- name: Install helmfile and helm-diff
run: |
HELMFILE_VERSION=0.169.2
HELMFILE_SHA256="34a5ca9c5fda733f0322f7b12a2959b7de4ab125bcf6531337751e263b027d58"
curl -fsSL -o /tmp/helmfile.tar.gz \
"https://github.com/helmfile/helmfile/releases/download/v${HELMFILE_VERSION}/helmfile_${HELMFILE_VERSION}_linux_amd64.tar.gz"
echo "${HELMFILE_SHA256} /tmp/helmfile.tar.gz" | sha256sum -c -
sudo tar xz -C /usr/local/bin helmfile < /tmp/helmfile.tar.gz
rm /tmp/helmfile.tar.gz
helm plugin install https://github.com/databus23/helm-diff --version v3.9.14
- name: Create KinD config
run: |
cat > /tmp/kind-config.yaml <<'EOF'
kind: Cluster
apiVersion: kind.x-k8s.io/v1alpha4
nodes:
- role: control-plane
- role: worker
EOF
- name: Create KinD cluster
uses: helm/kind-action@a1b0e391336a6ee6713a0583f8c6240d70863de3 # v1.12.0
with:
cluster_name: ${{ env.KIND_CLUSTER_NAME }}
config: /tmp/kind-config.yaml
- name: Load mock image into KinD
run: kind load docker-image ${{ env.MOCK_IMAGE }} --name ${{ env.KIND_CLUSTER_NAME }}
- name: Configure pull credentials on KinD nodes
env:
PULL_SECRET: ${{ secrets.RAHII_ON_XKS_PULL_SECRET }}
run: |
for node in $(kind get nodes --name "$KIND_CLUSTER_NAME"); do
docker exec "$node" mkdir -p /var/lib/kubelet
docker exec -i "$node" tee /var/lib/kubelet/config.json > /dev/null <<< "$PULL_SECRET"
docker exec "$node" systemctl restart kubelet.service
done
kubectl wait --for=condition=Ready nodes --all --timeout=60s
- name: Configure pull secret for helmfile
env:
PULL_SECRET: ${{ secrets.RAHII_ON_XKS_PULL_SECRET }}
run: |
mkdir -p ~/.config/containers
python3 -c "
import json, os
data = json.loads(os.environ['PULL_SECRET'])
with open(os.path.expanduser('~/.config/containers/auth.json'), 'w') as f:
json.dump(data, f)
print('Pull secret configured')
"
- name: Deploy infrastructure (make deploy-all)
run: make deploy-all
timeout-minutes: 15
- name: Setup inference gateway
run: |
# Run setup-gateway.sh but override the Programmed wait for KinD
# (KinD has no LoadBalancer, so gateway stays AddressNotAssigned)
./scripts/setup-gateway.sh || true
# Verify gateway is Accepted and pod is running
kubectl wait --for=condition=Accepted gateway/inference-gateway \
-n opendatahub --timeout=300s
echo "[OK] Gateway accepted"
kubectl wait --for=condition=Ready pod \
-l gateway.networking.k8s.io/gateway-name=inference-gateway \
-n opendatahub --timeout=300s
echo "[OK] Gateway pod ready"
timeout-minutes: 7
- name: Verify deployment
run: |
make status
echo ""
echo "=== Verifying components ==="
kubectl wait --for=condition=Available deployment/cert-manager-webhook \
-n cert-manager --timeout=120s
echo "[OK] cert-manager"
kubectl wait --for=condition=Available deployment -l app=istiod \
-n istio-system --timeout=120s
echo "[OK] Istiod"
kubectl wait --for=condition=Available \
deployment/kserve-controller-manager -n opendatahub --timeout=120s
echo "[OK] KServe controller"
echo ""
echo "=== All components verified ==="
- name: Configure mock namespace pull secret
run: |
kubectl create namespace ${{ env.MOCK_NAMESPACE }} --dry-run=client -o yaml | kubectl apply -f -
kubectl create secret docker-registry redhat-pull-secret \
--from-file=.dockerconfigjson="$HOME/.config/containers/auth.json" \
-n ${{ env.MOCK_NAMESPACE }} --dry-run=client -o yaml | kubectl apply -f -
kubectl patch serviceaccount default -n ${{ env.MOCK_NAMESPACE }} \
-p '{"imagePullSecrets": [{"name": "redhat-pull-secret"}]}'
- name: Deploy mock model
run: make deploy-mock-model
timeout-minutes: 5
env:
MOCK_IMAGE: ${{ env.MOCK_IMAGE }}
IMAGE_PULL_POLICY: IfNotPresent
- name: Run conformance tests
run: make test NAMESPACE=${{ env.MOCK_NAMESPACE }}
- name: Collect debug info on failure
if: failure()
run: |
echo "=== Debug Info ==="
make status || true
echo ""
echo "=== All pods ==="
kubectl get pods -A || true
echo ""
echo "=== Mock namespace ==="
kubectl get all -n ${{ env.MOCK_NAMESPACE }} || true
echo ""
echo "=== LLMInferenceService ==="
kubectl get llmisvc -A -o yaml || true
echo ""
echo "=== Events (mock namespace) ==="
kubectl get events -n ${{ env.MOCK_NAMESPACE }} --sort-by='.lastTimestamp' || true
echo ""
echo "=== Pod logs (mock namespace) ==="
for pod in $(kubectl get pods -n ${{ env.MOCK_NAMESPACE }} -o name 2>/dev/null); do
echo "--- $pod ---"
kubectl logs "$pod" -n ${{ env.MOCK_NAMESPACE }} --all-containers --tail=50 || true
done
- name: Clean up mock model
if: always()
run: make clean-mock-model || true