Skip to content

Commit 50bce74

Browse files
committed
feat: add modelharness helm chart
Signed-off-by: rambohe-ch <rambohe.ch@gmail.com>
1 parent c98d77f commit 50bce74

27 files changed

Lines changed: 524 additions & 692 deletions

.github/workflows/benchmark.yaml

Lines changed: 69 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,20 @@ name: ModelDeployment benchmark
44
# 1. Reuses the E2E base setup composite action to bring up the AKS
55
# cluster + all stack components (Istio gateway, BBR, EPP, KEDA,
66
# llm-gateway-apikey, gpu-node-mocker, KAITO).
7-
# 2. Installs the modeldeployment Helm chart in the `default` namespace,
8-
# reusing the cluster-wide `inference-gateway` Gateway and the
9-
# cluster-wide `default` APIKey (Secret `llm-api-key`).
10-
# 3. Starts a `kubectl port-forward` against the gateway service so the
7+
# 2. Provisions a dedicated workload namespace via the
8+
# `charts/modelharness` Helm chart (per-namespace Istio Gateway
9+
# `<namespace>-gw`, catch-all HTTPRoute + ReferenceGrant pointing
10+
# at the cluster-shared `default/model-not-found` Service, plus
11+
# the per-namespace `AuthorizationPolicy` + `APIKey` CR when
12+
# `auth.enabled=true`).
13+
# 3. Installs the `modeldeployment` Helm chart in the same workload
14+
# namespace, parented to that namespace's Gateway and protected by
15+
# the namespace-local APIKey (Secret `llm-api-key`).
16+
# 4. Starts a `kubectl port-forward` against the gateway service so the
1117
# complete request path Gateway → BBR → EPP → vLLM (mock) pod can be
1218
# exercised from localhost. The endpoint URL and API key are echoed
1319
# to the workflow log.
14-
# 4. Installs guidellm and runs a sweep benchmark against the endpoint,
20+
# 5. Installs guidellm and runs a sweep benchmark against the endpoint,
1521
# printing TTFT / TPOT / TPM (and the rest of the guidellm console
1622
# summary) into the workflow log.
1723
#
@@ -45,8 +51,11 @@ env:
4551
LOCATION: swedencentral
4652
NODE_COUNT: '3'
4753
NODE_VM_SIZE: Standard_D8s_v5
48-
GATEWAY_NAME: inference-gateway
49-
GATEWAY_NAMESPACE: default
54+
# Dedicated workload namespace provisioned by charts/modelharness.
55+
# Naming: per-namespace Gateway becomes "<namespace>-gw" by chart
56+
# default; the per-namespace APIKey Secret (llm-api-key) lives here.
57+
WORKLOAD_NAMESPACE: benchmark
58+
GATEWAY_NAME: benchmark-gw
5059
GATEWAY_LOCAL_PORT: '18080'
5160
# The llm-gateway-apikey ext_authz filter resolves the APIKey CR's
5261
# namespace from the request's Host header subdomain (
@@ -55,7 +64,7 @@ env:
5564
# "cannot determine gateway namespace: set context_extensions[gateway-namespace]
5665
# or use subdomain-based host". We map this FQDN to 127.0.0.1 in
5766
# /etc/hosts so the kubectl port-forward target carries the correct Host.
58-
GATEWAY_HOST: default.gw.kaito.sh
67+
GATEWAY_HOST: benchmark.gw.kaito.sh
5968
APIKEY_SECRET_NAME: llm-api-key
6069
APIKEY_SECRET_KEY: apiKey
6170
# ----- Benchmark parameters (previously workflow_dispatch inputs) -----
@@ -153,22 +162,44 @@ jobs:
153162
keda-kaito-scaler-version: ''
154163
llm-gateway-auth-version: ''
155164

165+
- name: Provision workload namespace via modelharness
166+
run: |
167+
set -euo pipefail
168+
echo "── Installing modelharness chart in ${WORKLOAD_NAMESPACE} ──"
169+
# The chart provisions:
170+
# - per-namespace Istio Gateway "<namespace>-gw"
171+
# - catch-all HTTPRoute → cluster-shared `default/model-not-found`
172+
# - ReferenceGrant authorising that cross-ns backendRef
173+
# - AuthorizationPolicy wiring the Gateway pod into the
174+
# cluster-wide `apikey-ext-authz` CUSTOM provider
175+
# - APIKey CR `default` (apikey-operator reconciles it into
176+
# Secret `llm-api-key` in this namespace)
177+
# `--create-namespace` lets helm create the workload namespace
178+
# on first install — no separate `kubectl create namespace`
179+
# step needed.
180+
helm upgrade --install modelharness charts/modelharness \
181+
--namespace "${WORKLOAD_NAMESPACE}" \
182+
--create-namespace \
183+
--set namespace="${WORKLOAD_NAMESPACE}" \
184+
--set auth.enabled=true \
185+
--wait --timeout=5m
186+
156187
- name: Deploy inference workload via ModelDeployment
157188
run: |
158189
set -euo pipefail
159190
echo "── Installing modeldeployment chart ──"
160191
echo " release / name : ${DEPLOYMENT_NAME}"
161-
echo " namespace : ${GATEWAY_NAMESPACE}"
192+
echo " namespace : ${WORKLOAD_NAMESPACE}"
162193
echo " preset model : ${MODEL_PRESET}"
163194
echo " replicas : ${REPLICAS}"
164195
echo " instance type : ${INSTANCE_TYPE}"
165-
echo " gateway : ${GATEWAY_NAME} (namespace ${GATEWAY_NAMESPACE})"
196+
echo " gateway : ${GATEWAY_NAME} (namespace ${WORKLOAD_NAMESPACE})"
166197
167198
helm upgrade --install "${DEPLOYMENT_NAME}" charts/modeldeployment \
168-
--namespace "${GATEWAY_NAMESPACE}" \
199+
--namespace "${WORKLOAD_NAMESPACE}" \
169200
--create-namespace \
170201
--set name="${DEPLOYMENT_NAME}" \
171-
--set namespace="${GATEWAY_NAMESPACE}" \
202+
--set namespace="${WORKLOAD_NAMESPACE}" \
172203
--set model="${MODEL_PRESET}" \
173204
--set replicas="${REPLICAS}" \
174205
--set instanceType="${INSTANCE_TYPE}" \
@@ -184,15 +215,15 @@ jobs:
184215
echo "── Waiting for InferenceSet ${DEPLOYMENT_NAME} pods to be Ready (timeout 10m) ──"
185216
186217
# Wait for the EPP deployment first (ext_proc gRPC endpoint that the Gateway reaches).
187-
kubectl -n "${GATEWAY_NAMESPACE}" rollout status \
218+
kubectl -n "${WORKLOAD_NAMESPACE}" rollout status \
188219
"deployment/${DEPLOYMENT_NAME}-inferencepool-epp" --timeout=5m
189220
190221
# Wait for the inference (shadow) pods. KAITO labels each
191222
# inference pod with `inferenceset.kaito.sh/created-by=<name>`.
192223
deadline=$(( $(date +%s) + 600 ))
193224
ready=0
194225
while [ "$(date +%s)" -lt "${deadline}" ]; do
195-
ready=$(kubectl -n "${GATEWAY_NAMESPACE}" get pods \
226+
ready=$(kubectl -n "${WORKLOAD_NAMESPACE}" get pods \
196227
-l "inferenceset.kaito.sh/created-by=${DEPLOYMENT_NAME}" \
197228
-o jsonpath='{range .items[*]}{.status.conditions[?(@.type=="Ready")].status}{"\n"}{end}' \
198229
2>/dev/null | grep -c '^True$' || true)
@@ -205,23 +236,23 @@ jobs:
205236
done
206237
if [ "${ready:-0}" -lt "${REPLICAS}" ]; then
207238
echo "✗ inference pods did not all become Ready within 10m"
208-
kubectl -n "${GATEWAY_NAMESPACE}" get pods -l "inferenceset.kaito.sh/created-by=${DEPLOYMENT_NAME}" -o wide
239+
kubectl -n "${WORKLOAD_NAMESPACE}" get pods -l "inferenceset.kaito.sh/created-by=${DEPLOYMENT_NAME}" -o wide
209240
exit 1
210241
fi
211242
212243
# ---------- 2. Wait for the default APIKey Secret ----------
213-
echo "── Waiting for Secret ${GATEWAY_NAMESPACE}/${APIKEY_SECRET_NAME} (created by the apikey-operator from the cluster-wide APIKey CR) ──"
244+
echo "── Waiting for Secret ${WORKLOAD_NAMESPACE}/${APIKEY_SECRET_NAME} (created by the apikey-operator from the cluster-wide APIKey CR) ──"
214245
secret_found=0
215246
for _ in $(seq 1 60); do
216-
if kubectl -n "${GATEWAY_NAMESPACE}" get secret "${APIKEY_SECRET_NAME}" >/dev/null 2>&1; then
247+
if kubectl -n "${WORKLOAD_NAMESPACE}" get secret "${APIKEY_SECRET_NAME}" >/dev/null 2>&1; then
217248
echo "✓ Secret found"
218249
secret_found=1
219250
break
220251
fi
221252
sleep 5
222253
done
223254
if [ "${secret_found}" -ne 1 ]; then
224-
echo "✗ Secret ${GATEWAY_NAMESPACE}/${APIKEY_SECRET_NAME} did not appear within 5m"
255+
echo "✗ Secret ${WORKLOAD_NAMESPACE}/${APIKEY_SECRET_NAME} did not appear within 5m"
225256
exit 1
226257
fi
227258
@@ -246,7 +277,7 @@ jobs:
246277
# Start kubectl port-forward in the background. Output goes to a
247278
# log file so the workflow can dump it on failure.
248279
mkdir -p "${PF_DIR}"
249-
nohup kubectl -n "${GATEWAY_NAMESPACE}" port-forward \
280+
nohup kubectl -n "${WORKLOAD_NAMESPACE}" port-forward \
250281
"svc/${SERVICE}" "${GATEWAY_LOCAL_PORT}:80" \
251282
> "${PF_DIR}/port-forward.log" 2>&1 &
252283
PF_PID=$!
@@ -267,7 +298,7 @@ jobs:
267298
exit 1
268299
fi
269300
270-
API_KEY="$(kubectl -n "${GATEWAY_NAMESPACE}" get secret \
301+
API_KEY="$(kubectl -n "${WORKLOAD_NAMESPACE}" get secret \
271302
"${APIKEY_SECRET_NAME}" \
272303
-o jsonpath="{.data.${APIKEY_SECRET_KEY}}" | base64 -d)"
273304
# Use the subdomain-based FQDN (mapped to 127.0.0.1 above) so the
@@ -387,7 +418,7 @@ jobs:
387418
echo "| --- | --- |"
388419
echo "| model preset | \`${MODEL_PRESET}\` |"
389420
echo "| deployment | \`${DEPLOYMENT_NAME}\` |"
390-
echo "| namespace | \`${GATEWAY_NAMESPACE}\` |"
421+
echo "| namespace | \`${WORKLOAD_NAMESPACE}\` |"
391422
echo "| replicas | ${REPLICAS} |"
392423
echo "| instance type | \`${INSTANCE_TYPE}\` |"
393424
echo "| guidellm profile | \`${BENCHMARK_PROFILE}\` |"
@@ -449,14 +480,14 @@ jobs:
449480
# on a real node and labelled `kaito.sh/managed-by=gpu-mocker` /
450481
# `kaito.sh/shadow-pod-for=<ns>.<original-name>`. Resolve each
451482
# original pod to its shadow before scraping /metrics.
452-
ORIGINAL_PODS="$(kubectl -n "${GATEWAY_NAMESPACE}" get pods \
483+
ORIGINAL_PODS="$(kubectl -n "${WORKLOAD_NAMESPACE}" get pods \
453484
-l "inferenceset.kaito.sh/created-by=${DEPLOYMENT_NAME}" \
454485
-o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}')"
455486
456487
PODS=""
457488
for ORIG in ${ORIGINAL_PODS}; do
458-
SHADOW_SELECTOR="kaito.sh/shadow-pod-for=${GATEWAY_NAMESPACE}.${ORIG}"
459-
SHADOW_POD="$(kubectl -n "${GATEWAY_NAMESPACE}" get pods \
489+
SHADOW_SELECTOR="kaito.sh/shadow-pod-for=${WORKLOAD_NAMESPACE}.${ORIG}"
490+
SHADOW_POD="$(kubectl -n "${WORKLOAD_NAMESPACE}" get pods \
460491
-l "${SHADOW_SELECTOR}" \
461492
-o jsonpath='{.items[0].metadata.name}' 2>/dev/null || true)"
462493
if [ -z "${SHADOW_POD}" ]; then
@@ -500,8 +531,8 @@ jobs:
500531
for POD in ${PODS}; do
501532
RAW="./benchmark-results/vllm-metrics/${POD}.metrics.txt"
502533
echo "── ${POD} ──"
503-
if ! kubectl -n "${GATEWAY_NAMESPACE}" get \
504-
--raw "/api/v1/namespaces/${GATEWAY_NAMESPACE}/pods/${POD}:${MODEL_SERVER_PORT}/proxy/metrics" \
534+
if ! kubectl -n "${WORKLOAD_NAMESPACE}" get \
535+
--raw "/api/v1/namespaces/${WORKLOAD_NAMESPACE}/pods/${POD}:${MODEL_SERVER_PORT}/proxy/metrics" \
505536
> "${RAW}" 2>"${SCRAPE_ERR}"; then
506537
echo " ✗ failed to scrape /metrics:"
507538
sed 's/^/ /' "${SCRAPE_ERR}" || true
@@ -590,14 +621,23 @@ jobs:
590621
591622
# ---------- 2. Uninstall modeldeployment Helm release ----------
592623
helm uninstall "${DEPLOYMENT_NAME}" \
593-
--namespace "${GATEWAY_NAMESPACE}" \
624+
--namespace "${WORKLOAD_NAMESPACE}" \
625+
--ignore-not-found --wait || true
626+
627+
# ---------- 3. Uninstall modelharness Helm release ----------
628+
helm uninstall modelharness \
629+
--namespace "${WORKLOAD_NAMESPACE}" \
594630
--ignore-not-found --wait || true
595631
596-
# ---------- 3. Dump cluster state on failure ----------
632+
# ---------- 4. Delete workload namespace ----------
633+
kubectl delete namespace "${WORKLOAD_NAMESPACE}" \
634+
--ignore-not-found --wait=false || true
635+
636+
# ---------- 5. Dump cluster state on failure ----------
597637
if [ "${JOB_STATUS}" = "failure" ]; then
598638
echo "── Job failed — dumping cluster state ──"
599639
make e2e-dump || true
600640
fi
601641
602-
# ---------- 4. Teardown cluster ----------
642+
# ---------- 6. Teardown cluster ----------
603643
make e2e-teardown || true

0 commit comments

Comments
 (0)