Skip to content

Commit f1411b6

Browse files
authored
feat(validator): upgrade conformance checks from static to behavioral validation (#185)
1 parent 2acf5d0 commit f1411b6

16 files changed

+1543
-384
lines changed

.github/workflows/gpu-h100-inference-test.yaml

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@ on:
3939
- 'recipes/overlays/h100-kind-inference-dynamo.yaml'
4040
- 'kwok/manifests/karpenter/**'
4141
- 'kwok/scripts/install-karpenter-kwok.sh'
42-
- 'kwok/scripts/validate-cluster-autoscaling.sh'
4342
workflow_dispatch: {} # Allow manual runs
4443

4544
permissions:
@@ -111,7 +110,10 @@ jobs:
111110
# --- Install Karpenter before validation so cluster-autoscaling check passes ---
112111

113112
- name: Install Karpenter + KWOK (setup)
114-
run: bash kwok/scripts/validate-cluster-autoscaling.sh --setup
113+
run: |
114+
export KARPENTER_VERSION=$(yq eval '.testing_tools.karpenter' .settings.yaml)
115+
bash kwok/scripts/install-karpenter-kwok.sh
116+
kubectl --context="kind-${KIND_CLUSTER_NAME}" apply -f kwok/manifests/karpenter/nodepool.yaml
115117
116118
# --- Validate cluster (Go conformance checks run inside K8s Jobs) ---
117119
# Includes self-contained secure-accelerator-access check (creates its own
@@ -228,11 +230,6 @@ jobs:
228230
fi
229231
echo "Dynamo vLLM inference smoke test passed."
230232
231-
# --- Cluster Autoscaling validation ---
232-
233-
- name: Cluster Autoscaling (Karpenter + KWOK)
234-
run: bash kwok/scripts/validate-cluster-autoscaling.sh --exercise
235-
236233
# --- Evidence collection ---
237234

238235
- name: Collect AI conformance evidence

.github/workflows/gpu-h100-training-test.yaml

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@ on:
3535
- 'recipes/overlays/h100-kind-training.yaml'
3636
- 'kwok/manifests/karpenter/**'
3737
- 'kwok/scripts/install-karpenter-kwok.sh'
38-
- 'kwok/scripts/validate-cluster-autoscaling.sh'
3938
- 'recipes/components/prometheus-adapter/**'
4039
workflow_dispatch: {} # Allow manual runs
4140

@@ -108,7 +107,10 @@ jobs:
108107
# --- Install Karpenter before validation so cluster-autoscaling check passes ---
109108

110109
- name: Install Karpenter + KWOK (setup)
111-
run: bash kwok/scripts/validate-cluster-autoscaling.sh --setup
110+
run: |
111+
export KARPENTER_VERSION=$(yq eval '.testing_tools.karpenter' .settings.yaml)
112+
bash kwok/scripts/install-karpenter-kwok.sh
113+
kubectl --context="kind-${KIND_CLUSTER_NAME}" apply -f kwok/manifests/karpenter/nodepool.yaml
112114
113115
# --- Health checks (run before conformance to give metrics pipeline time) ---
114116

@@ -142,11 +144,6 @@ jobs:
142144
--require-gpu \
143145
--image=ko.local:smoke-test
144146
145-
# --- Cluster Autoscaling validation ---
146-
147-
- name: Cluster Autoscaling (Karpenter + KWOK)
148-
run: bash kwok/scripts/validate-cluster-autoscaling.sh --exercise
149-
150147
# --- Evidence collection ---
151148

152149
- name: Collect AI conformance evidence

kwok/scripts/validate-cluster-autoscaling.sh

Lines changed: 0 additions & 329 deletions
This file was deleted.

0 commit comments

Comments
 (0)