|
31 | 31 | - '.github/actions/load-versions/**' |
32 | 32 | - 'tests/manifests/**' |
33 | 33 | - 'tests/chainsaw/ai-conformance/**' |
34 | | - - 'docs/conformance/cncf/**' |
35 | 34 | - 'recipes/components/dynamo-platform/**' |
36 | 35 | - 'recipes/components/prometheus-adapter/**' |
37 | 36 | - 'recipes/overlays/kind.yaml' |
@@ -109,38 +108,14 @@ jobs: |
109 | 108 | fi |
110 | 109 | echo "Snapshot correctly detected ${GPU_COUNT}x ${GPU_MODEL}" |
111 | 110 |
|
112 | | - # --- Deploy DRA test pod (prerequisite for secure-accelerator-access check) --- |
113 | | - |
114 | | - - name: Deploy DRA GPU test |
115 | | - run: | |
116 | | - kubectl --context="kind-${KIND_CLUSTER_NAME}" apply \ |
117 | | - -f docs/conformance/cncf/manifests/dra-gpu-test.yaml |
118 | | -
|
119 | | - echo "Waiting for DRA GPU test pod to complete..." |
120 | | - if kubectl --context="kind-${KIND_CLUSTER_NAME}" -n dra-test \ |
121 | | - wait --for=jsonpath='{.status.phase}'=Succeeded pod/dra-gpu-test --timeout=120s; then |
122 | | - echo "DRA GPU allocation test passed." |
123 | | - else |
124 | | - echo "::error::DRA GPU test pod did not succeed" |
125 | | - kubectl --context="kind-${KIND_CLUSTER_NAME}" -n dra-test \ |
126 | | - logs pod/dra-gpu-test 2>/dev/null || true |
127 | | - kubectl --context="kind-${KIND_CLUSTER_NAME}" -n dra-test \ |
128 | | - get pod/dra-gpu-test -o yaml 2>/dev/null || true |
129 | | - exit 1 |
130 | | - fi |
131 | | -
|
132 | | - echo "=== DRA GPU test logs ===" |
133 | | - kubectl --context="kind-${KIND_CLUSTER_NAME}" -n dra-test \ |
134 | | - logs pod/dra-gpu-test |
135 | | -
|
136 | 111 | # --- Install Karpenter before validation so cluster-autoscaling check passes --- |
137 | 112 |
|
138 | 113 | - name: Install Karpenter + KWOK (setup) |
139 | 114 | run: bash kwok/scripts/validate-cluster-autoscaling.sh --setup |
140 | 115 |
|
141 | 116 | # --- Validate cluster (Go conformance checks run inside K8s Jobs) --- |
142 | | - # Replaces previous bash assertion steps for: inference-gateway, |
143 | | - # accelerator-metrics, pod-autoscaling, secure-accelerator-access. |
| 117 | + # Includes self-contained secure-accelerator-access check (creates its own |
| 118 | + # DRA test resources, validates, and cleans up automatically). |
144 | 119 |
|
145 | 120 | - name: Validate cluster |
146 | 121 | run: | |
@@ -258,12 +233,6 @@ jobs: |
258 | 233 | - name: Cluster Autoscaling (Karpenter + KWOK) |
259 | 234 | run: bash kwok/scripts/validate-cluster-autoscaling.sh --exercise |
260 | 235 |
|
261 | | - - name: DRA GPU test cleanup |
262 | | - if: always() |
263 | | - run: | |
264 | | - kubectl --context="kind-${KIND_CLUSTER_NAME}" delete \ |
265 | | - -f docs/conformance/cncf/manifests/dra-gpu-test.yaml --ignore-not-found 2>/dev/null || true |
266 | | -
|
267 | 236 | # --- Evidence collection --- |
268 | 237 |
|
269 | 238 | - name: Collect AI conformance evidence |
@@ -337,9 +306,6 @@ jobs: |
337 | 306 | kubectl --context="kind-${KIND_CLUSTER_NAME}" -n monitoring get pods -o wide 2>/dev/null || true |
338 | 307 | echo "=== DRA ResourceSlices ===" |
339 | 308 | kubectl --context="kind-${KIND_CLUSTER_NAME}" get resourceslices -o wide 2>/dev/null || true |
340 | | - echo "=== DRA test pod spec ===" |
341 | | - kubectl --context="kind-${KIND_CLUSTER_NAME}" -n dra-test \ |
342 | | - get pod/dra-gpu-test -o yaml 2>/dev/null || true |
343 | 309 | echo "=== Node status ===" |
344 | 310 | kubectl --context="kind-${KIND_CLUSTER_NAME}" get nodes -o wide 2>/dev/null || true |
345 | 311 |
|
|
0 commit comments