Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 34 additions & 20 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
.PHONY: deploy deploy-all undeploy undeploy-kserve status help check-kubeconfig sync clear-cache
.PHONY: deploy-cert-manager deploy-istio deploy-lws deploy-kserve
.PHONY: deploy-cert-manager deploy-istio deploy-lws deploy-kserve deploy-opendatahub-prerequisites deploy-cert-manager-pki
.PHONY: test conformance

HELMFILE_CACHE := $(HOME)/.cache/helmfile
KSERVE_REF ?= release-v0.15
KSERVE_NAMESPACE ?= opendatahub

check-kubeconfig:
Expand All @@ -13,8 +12,8 @@ help:
@echo "rhaii-on-xks - Infrastructure for llm-d on xKS (AKS/CoreWeave)"
@echo ""
@echo "Deploy:"
@echo " make deploy - Deploy cert-manager + istio"
@echo " make deploy-all - Deploy all (cert-manager + istio + lws)"
@echo " make deploy - Deploy cert-manager + istio + lws"
@echo " make deploy-all - Deploy all (cert-manager + istio + lws + kserve)"
@echo " make deploy-kserve - Deploy KServe"
@echo ""
@echo "Undeploy:"
Expand All @@ -40,10 +39,10 @@ sync: clear-cache
deploy: check-kubeconfig clear-cache
helmfile apply --selector name=cert-manager-operator
helmfile apply --selector name=sail-operator
helmfile apply --selector name=lws-operator
@$(MAKE) status

deploy-all: check-kubeconfig clear-cache
helmfile apply
deploy-all: check-kubeconfig deploy-cert-manager deploy-istio deploy-lws deploy-kserve
@$(MAKE) status

deploy-cert-manager: check-kubeconfig clear-cache
Expand All @@ -55,35 +54,44 @@ deploy-istio: check-kubeconfig clear-cache
deploy-lws: check-kubeconfig clear-cache
helmfile apply --selector name=lws-operator

deploy-kserve: check-kubeconfig
@echo "=== Deploying KServe (ref=$(KSERVE_REF)) ==="
deploy-opendatahub-prerequisites: check-kubeconfig
@echo "=== Deploying OpenDataHub prerequisites ==="
kubectl create namespace $(KSERVE_NAMESPACE) --dry-run=client -o yaml | kubectl apply -f -
-kubectl get secret redhat-pull-secret -n istio-system -o yaml 2>/dev/null | \
sed 's/namespace: istio-system/namespace: $(KSERVE_NAMESPACE)/' | \
kubectl apply -f - 2>/dev/null || true
kubectl apply -k "https://github.com/opendatahub-io/kserve/config/overlays/odh-test/cert-manager?ref=$(KSERVE_REF)"

deploy-cert-manager-pki: check-kubeconfig deploy-opendatahub-prerequisites
@kubectl get crd clusterissuers.cert-manager.io >/dev/null 2>&1 || \
(echo "ERROR: cert-manager CRDs not found. Run 'make deploy-cert-manager' first." && exit 1)
@echo "Waiting for cert-manager webhook..."
-kubectl delete secret cert-manager-webhook-ca -n cert-manager --ignore-not-found 2>/dev/null || true
kubectl rollout restart deployment/cert-manager-webhook -n cert-manager
kubectl rollout status deployment/cert-manager-webhook -n cert-manager --timeout=120s
@sleep 5
kubectl apply -f ./charts/kserve/pki-prereq.yaml
kubectl wait --for=condition=Ready clusterissuer/opendatahub-ca-issuer --timeout=120s
@echo "Applying CRDs and deployment (CR errors expected, will retry)..."
-kustomize build "https://github.com/opendatahub-io/kserve/config/overlays/odh-xks?ref=$(KSERVE_REF)" | kubectl apply --server-side --force-conflicts -f - 2>/dev/null || true
@echo "Removing webhooks to allow controller startup..."
-kubectl delete validatingwebhookconfiguration llminferenceservice.serving.kserve.io llminferenceserviceconfig.serving.kserve.io --ignore-not-found 2>/dev/null || true
kubectl wait --for=condition=Available deployment/kserve-controller-manager -n $(KSERVE_NAMESPACE) --timeout=300s
@echo "Controller ready, applying CRs..."
kustomize build "https://github.com/opendatahub-io/kserve/config/overlays/odh-xks?ref=$(KSERVE_REF)" | kubectl apply --server-side --force-conflicts -f -

deploy-kserve: check-kubeconfig deploy-cert-manager-pki
@echo "Applying KServe via Helm..."
helmfile sync --wait --selector name=kserve-rhaii-xks --skip-crds
@echo "=== KServe deployed ==="

# Undeploy
undeploy: check-kubeconfig
undeploy: check-kubeconfig undeploy-kserve
@./scripts/cleanup.sh -y

undeploy-kserve: check-kubeconfig
-@kubectl delete llminferenceservice --all -A --ignore-not-found 2>/dev/null || true
-@kubectl delete inferencepool --all -A --ignore-not-found 2>/dev/null || true
-@kubectl delete deployment kserve-controller-manager -n $(KSERVE_NAMESPACE) --ignore-not-found 2>/dev/null || true
-@helm uninstall kserve-rhaii-xks --namespace $(KSERVE_NAMESPACE) 2>/dev/null || true
-@kubectl delete validatingwebhookconfiguration llminferenceservice.serving.kserve.io llminferenceserviceconfig.serving.kserve.io --ignore-not-found 2>/dev/null || true
-@# Removes KServe CRDs and Inference Extension CRDs (InferencePool, InferenceModel)
-@# Removes KServe CRDs and Inference Extension CRDs (Helm does not remove CRDs on uninstall)
-@kubectl get crd -o name | grep -E "serving.kserve.io|inference.networking" | xargs -r kubectl delete --ignore-not-found 2>/dev/null || true
-@kubectl delete clusterissuer opendatahub-ca-issuer --ignore-not-found 2>/dev/null || true
-@# Removes cluster-scoped RBAC resources
-@kubectl get clusterrole,clusterrolebinding -o name | grep -i kserve | xargs -r kubectl delete --ignore-not-found 2>/dev/null || true
-@kubectl delete clusterissuer opendatahub-ca-issuer opendatahub-selfsigned-issuer --ignore-not-found 2>/dev/null || true
-@kubectl delete certificate opendatahub-ca -n cert-manager --ignore-not-found 2>/dev/null || true
-@kubectl delete namespace $(KSERVE_NAMESPACE) --ignore-not-found --wait=false 2>/dev/null || true
@echo "=== KServe removed ==="

Expand All @@ -103,6 +111,12 @@ status: check-kubeconfig
@echo "lws-operator:"
@kubectl get pods -n openshift-lws-operator 2>/dev/null || echo " Not deployed"
@echo ""
@echo "kserve:"
@kubectl get pods -n $(KSERVE_NAMESPACE) -l control-plane=kserve-controller-manager 2>/dev/null || echo " Not deployed"
@echo ""
@echo "kserve config:"
@kubectl get llminferenceserviceconfig -n $(KSERVE_NAMESPACE) 2>/dev/null || echo " Not deployed"
@echo ""
@echo "=== API Versions ==="
@echo -n "InferencePool API: "
@if kubectl get crd inferencepools.inference.networking.k8s.io >/dev/null 2>&1; then \
Expand Down
Loading