diff --git a/deployment/README.md b/deployment/README.md index ab6e3c5aa..d368a0a68 100644 --- a/deployment/README.md +++ b/deployment/README.md @@ -191,18 +191,13 @@ AUD="$(kubectl create token default --duration=10m \ echo "Patching AuthPolicy with audience: $AUD" -# Note: Auth policy path may vary depending on your deployment -# For consolidated deployment structure: - -# Patch MaaS API AuthPolicy -kubectl patch --local -f ${PROJECT_DIR}/deployment/base/policies/maas-auth-policy.yaml \ +kubectl patch authpolicy maas-api-auth-policy -n maas-api \ --type='json' \ -p "$(jq -nc --arg aud "$AUD" '[{ op:"replace", path:"/spec/rules/authentication/openshift-identities/kubernetesTokenReview/audiences/0", value:$aud - }]')" \ - -o yaml | kubectl apply -f - + }]')" ``` ## Testing the Deployment diff --git a/deployment/base/maas-api/clusterrole.yaml b/deployment/base/maas-api/clusterrole.yaml deleted file mode 100644 index 30c3e1a21..000000000 --- a/deployment/base/maas-api/clusterrole.yaml +++ /dev/null @@ -1,35 +0,0 @@ -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - name: maas-api - namespace: maas-api -rules: -- apiGroups: [""] - resources: ["configmaps"] - verbs: ["get", "list", "create", "update", "patch", "delete"] - -# SA token provider resources -- apiGroups: [""] - resources: ["namespaces"] - verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] -- apiGroups: [""] - resources: ["serviceaccounts"] - verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] -- apiGroups: [""] - resources: ["serviceaccounts/token"] - verbs: ["create"] - -# Token review for authentication -- apiGroups: ["authentication.k8s.io"] - resources: ["tokenreviews"] - verbs: ["create"] - -# KServe resources for model management -- apiGroups: ["serving.kserve.io"] - resources: ["inferenceservices", "llminferenceservices"] - verbs: ["get", "list", "watch"] - -# Metrics and monitoring -- apiGroups: [""] - resources: ["pods", "services", "endpoints"] - verbs: ["get", "list", "watch"] diff --git a/deployment/base/maas-api/clusterrolebinding.yaml b/deployment/base/maas-api/clusterrolebinding.yaml deleted file mode 100644 index cc7aba6ba..000000000 --- a/deployment/base/maas-api/clusterrolebinding.yaml +++ /dev/null @@ -1,13 +0,0 @@ -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRoleBinding -metadata: - name: maas-api - namespace: maas-api -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: ClusterRole - name: maas-api -subjects: -- kind: ServiceAccount - name: maas-api - namespace: maas-api diff --git a/deployment/base/maas-api/deployment.yaml b/deployment/base/maas-api/deployment.yaml index 867dfaa15..232c751c5 100644 --- a/deployment/base/maas-api/deployment.yaml +++ b/deployment/base/maas-api/deployment.yaml @@ -1,59 +1,36 @@ ---- apiVersion: apps/v1 kind: Deployment metadata: name: maas-api - namespace: maas-api - labels: - app: maas-api - version: v2 spec: replicas: 1 - selector: - matchLabels: - app: maas-api template: - metadata: - labels: - app: maas-api - version: v2 - annotations: - sidecar.istio.io/inject: "false" spec: serviceAccountName: maas-api securityContext: runAsNonRoot: true containers: - name: maas-api - image: quay.io/opendatahub/maas-api:latest + image: maas-api imagePullPolicy: Always ports: - containerPort: 8080 name: http protocol: TCP env: - - name: PROVIDER - value: "sa-tokens" - name: NAMESPACE valueFrom: fieldRef: fieldPath: metadata.namespace - - name: KEY_NAMESPACE - value: llm - - name: SECRET_SELECTOR_LABEL - value: kuadrant.io/apikeys-by - - name: SECRET_SELECTOR_VALUE - value: rhcl-keys - - name: PORT - value: "8080" - - name: CREATE_DEFAULT_TEAM - value: "true" - - name: TOKEN_RATE_LIMIT_POLICY_NAME - value: "gateway-token-rate-limits" - - name: AUTH_POLICY_NAME - value: "gateway-auth-policy" - - name: GIN_MODE - value: "debug" + - name: PROVIDER + value: sa-tokens + resources: + requests: + memory: "64Mi" + cpu: "50m" + limits: + memory: "128Mi" + cpu: "200m" livenessProbe: httpGet: path: /health @@ -70,13 +47,6 @@ spec: periodSeconds: 5 timeoutSeconds: 3 failureThreshold: 3 - resources: - requests: - memory: "64Mi" - cpu: "250m" - limits: - memory: "128Mi" - cpu: "500m" securityContext: allowPrivilegeEscalation: false capabilities: @@ -84,3 +54,4 @@ spec: - ALL readOnlyRootFilesystem: true runAsNonRoot: true + terminationGracePeriodSeconds: 30 diff --git a/deployment/base/maas-api/kustomization.yaml b/deployment/base/maas-api/kustomization.yaml index fdbb0fc8e..4a65d8a9f 100644 --- a/deployment/base/maas-api/kustomization.yaml +++ b/deployment/base/maas-api/kustomization.yaml @@ -7,9 +7,19 @@ resources: - namespace.yaml - deployment.yaml - service.yaml - - httproute.yaml - - maas-auth-policy.yaml - - tier-mapping-configmap.yaml - - clusterrolebinding.yaml - - clusterrole.yaml - - serviceaccount.yaml + - rbac + - networking + - policies + - resources + +labels: + - includeSelectors: true + pairs: + app.kubernetes.io/part-of: model-as-a-service + app.kubernetes.io/component: api + app.kubernetes.io/name: maas-api + +images: + - name: maas-api + newName: quay.io/opendatahub/maas-api + newTag: latest \ No newline at end of file diff --git a/deployment/base/maas-api/maas-auth-policy.yaml b/deployment/base/maas-api/maas-auth-policy.yaml deleted file mode 100644 index 2244a2a09..000000000 --- a/deployment/base/maas-api/maas-auth-policy.yaml +++ /dev/null @@ -1,19 +0,0 @@ ---- -apiVersion: kuadrant.io/v1 -kind: AuthPolicy -metadata: - name: maas-api-auth-policy - namespace: maas-api -spec: - targetRef: - group: gateway.networking.k8s.io - kind: HTTPRoute - name: maas-api-route - rules: - # Allow any authenticated user to access the API - authentication: - openshift-identities: - kubernetesTokenReview: - audiences: - - https://kubernetes.default.svc - - openshift-ai-inference-sa diff --git a/deployment/base/maas-api/httproute.yaml b/deployment/base/maas-api/networking/httproute.yaml similarity index 100% rename from deployment/base/maas-api/httproute.yaml rename to deployment/base/maas-api/networking/httproute.yaml diff --git a/maas-api/deploy/overlays/dev/infra/networking/kustomization.yaml b/deployment/base/maas-api/networking/kustomization.yaml similarity index 90% rename from maas-api/deploy/overlays/dev/infra/networking/kustomization.yaml rename to deployment/base/maas-api/networking/kustomization.yaml index 060b6b072..f3cc2b631 100644 --- a/maas-api/deploy/overlays/dev/infra/networking/kustomization.yaml +++ b/deployment/base/maas-api/networking/kustomization.yaml @@ -5,5 +5,4 @@ metadata: name: maas-api-gw-api-routing-infra resources: -- gateway.yaml - httproute.yaml diff --git a/maas-api/deploy/policies/maas-api/auth-policy.yaml b/deployment/base/maas-api/policies/auth-policy.yaml similarity index 100% rename from maas-api/deploy/policies/maas-api/auth-policy.yaml rename to deployment/base/maas-api/policies/auth-policy.yaml diff --git a/maas-api/deploy/policies/maas-api/kustomization.yaml b/deployment/base/maas-api/policies/kustomization.yaml similarity index 66% rename from maas-api/deploy/policies/maas-api/kustomization.yaml rename to deployment/base/maas-api/policies/kustomization.yaml index 09d7f63d5..24a496a90 100644 --- a/maas-api/deploy/policies/maas-api/kustomization.yaml +++ b/deployment/base/maas-api/policies/kustomization.yaml @@ -4,5 +4,6 @@ kind: Kustomization metadata: name: maas-api-policies +# This requires Kuadrant/Red Hat Connectivity Link to be installed resources: - auth-policy.yaml \ No newline at end of file diff --git a/maas-api/deploy/rbac/clusterrole.yaml b/deployment/base/maas-api/rbac/clusterrole.yaml similarity index 100% rename from maas-api/deploy/rbac/clusterrole.yaml rename to deployment/base/maas-api/rbac/clusterrole.yaml diff --git a/maas-api/deploy/rbac/clusterrolebinding.yaml b/deployment/base/maas-api/rbac/clusterrolebinding.yaml similarity index 100% rename from maas-api/deploy/rbac/clusterrolebinding.yaml rename to deployment/base/maas-api/rbac/clusterrolebinding.yaml diff --git a/maas-api/deploy/rbac/kustomization.yaml b/deployment/base/maas-api/rbac/kustomization.yaml similarity index 100% rename from maas-api/deploy/rbac/kustomization.yaml rename to deployment/base/maas-api/rbac/kustomization.yaml diff --git a/deployment/base/maas-api/serviceaccount.yaml b/deployment/base/maas-api/rbac/serviceaccount.yaml similarity index 100% rename from deployment/base/maas-api/serviceaccount.yaml rename to deployment/base/maas-api/rbac/serviceaccount.yaml diff --git a/maas-api/deploy/overlays/dev/resources/kustomization.yaml b/deployment/base/maas-api/resources/kustomization.yaml similarity index 100% rename from maas-api/deploy/overlays/dev/resources/kustomization.yaml rename to deployment/base/maas-api/resources/kustomization.yaml diff --git a/deployment/base/maas-api/tier-mapping-configmap.yaml b/deployment/base/maas-api/resources/tier-mapping-configmap.yaml similarity index 100% rename from deployment/base/maas-api/tier-mapping-configmap.yaml rename to deployment/base/maas-api/resources/tier-mapping-configmap.yaml diff --git a/deployment/base/maas-api/service.yaml b/deployment/base/maas-api/service.yaml index fcfb9e96b..742383ecc 100644 --- a/deployment/base/maas-api/service.yaml +++ b/deployment/base/maas-api/service.yaml @@ -1,15 +1,13 @@ -# Service definition for MaaS API ---- apiVersion: v1 kind: Service metadata: name: maas-api - namespace: maas-api spec: - type: ClusterIP selector: - app: maas-api + app.kubernetes.io/name: maas-api ports: - - name: http - port: 8080 - targetPort: 8080 + - name: http + port: 8080 + targetPort: http + protocol: TCP + type: ClusterIP diff --git a/deployment/base/policies/kustomization.yaml b/deployment/base/policies/kustomization.yaml index 0e7d51f68..c43bc35aa 100644 --- a/deployment/base/policies/kustomization.yaml +++ b/deployment/base/policies/kustomization.yaml @@ -5,7 +5,7 @@ metadata: name: maas-auth resources: -# Authentication and rate limiting policies for the gateway +# Gateway policies - default for all models - gateway-auth-policy.yaml - token-limit-policy.yaml - rate-limit-policy.yaml \ No newline at end of file diff --git a/deployment/components/kserve/kserve-config-openshift.yaml b/deployment/components/kserve/kserve-config-openshift.yaml deleted file mode 100644 index bfa1c7061..000000000 --- a/deployment/components/kserve/kserve-config-openshift.yaml +++ /dev/null @@ -1,61 +0,0 @@ -# KServe Configuration for Kuadrant Integration on OpenShift ---- -apiVersion: v1 -kind: ConfigMap -metadata: - name: inferenceservice-config - namespace: kserve -data: - storageInitializer: |- - { - "image" : "kserve/storage-initializer:v0.15.2", - "memoryRequest": "4Gi", - "memoryLimit": "8Gi", - "cpuRequest": "2", - "cpuLimit": "4", - "caBundleConfigMapName": "", - "caBundleVolumeMountPath": "/etc/ssl/custom-certs", - "enableDirectPvcVolumeMount": true, - "enableModelcar": true, - "cpuModelcar": "10m", - "memoryModelcar": "15Mi", - "uidModelcar": 1010 - } - logger: |- - { - "image" : "kserve/agent:v0.15.2", - "memoryRequest": "100Mi", - "memoryLimit": "1Gi", - "cpuRequest": "100m", - "cpuLimit": "1", - "defaultUrl": "http://default-broker" - } - batcher: |- - { - "image" : "kserve/agent:v0.15.2", - "memoryRequest": "1Gi", - "memoryLimit": "1Gi", - "cpuRequest": "1", - "cpuLimit": "1", - "maxBatchSize": "32", - "maxLatency": "5000" - } - agent: |- - { - "image" : "kserve/agent:v0.15.2", - "memoryRequest": "100Mi", - "memoryLimit": "1Gi", - "cpuRequest": "100m", - "cpuLimit": "1" - } - ingress: |- - { - "enableGatewayApi": true, - "kserveIngressGateway": "openshift-ingress/openshift-ai-inference", - "ingressGateway": "istio-system/istio-ingressgateway", - "ingressDomain": "example.com" - } - deploy: |- - { - "defaultDeploymentMode": "RawDeployment" - } diff --git a/deployment/components/kserve/kustomization.yaml b/deployment/components/kserve/kustomization.yaml deleted file mode 100644 index f620632b4..000000000 --- a/deployment/components/kserve/kustomization.yaml +++ /dev/null @@ -1,8 +0,0 @@ -apiVersion: kustomize.config.k8s.io/v1beta1 -kind: Kustomization - -namespace: kserve - -resources: - - openshift-scc.yaml - - kserve-config-openshift.yaml \ No newline at end of file diff --git a/deployment/components/kserve/openshift-scc.yaml b/deployment/components/kserve/openshift-scc.yaml deleted file mode 100644 index d6508530e..000000000 --- a/deployment/components/kserve/openshift-scc.yaml +++ /dev/null @@ -1,46 +0,0 @@ -# Security Context Constraints for KServe InferenceServices on OpenShift -# This allows model containers to run with the required security permissions ---- -apiVersion: v1 -kind: ServiceAccount -metadata: - name: kserve-service-account - namespace: llm ---- -apiVersion: security.openshift.io/v1 -kind: SecurityContextConstraints -metadata: - name: kserve-scc -allowHostDirVolumePlugin: false -allowHostIPC: false -allowHostNetwork: false -allowHostPID: false -allowHostPorts: false -allowPrivilegedContainer: false -allowPrivilegeEscalation: true -allowedCapabilities: -- NET_ADMIN -- NET_RAW -defaultAddCapabilities: null -fsGroup: - type: RunAsAny -priority: 10 -readOnlyRootFilesystem: false -requiredDropCapabilities: null -runAsUser: - type: RunAsAny -seLinuxContext: - type: RunAsAny -supplementalGroups: - type: RunAsAny -users: -- system:serviceaccount:llm:kserve-service-account -- system:serviceaccount:llm:default -volumes: -- configMap -- downwardAPI -- emptyDir -- persistentVolumeClaim -- projected -- secret -- hostPath diff --git a/maas-api/deploy/infra/odh/kustomization.yaml b/deployment/components/odh/kserve/kustomization.yaml similarity index 100% rename from maas-api/deploy/infra/odh/kustomization.yaml rename to deployment/components/odh/kserve/kustomization.yaml diff --git a/deployment/components/odh/README.md b/deployment/components/odh/operator/README.md similarity index 100% rename from deployment/components/odh/README.md rename to deployment/components/odh/operator/README.md diff --git a/deployment/components/odh/datasciencecluster.yaml b/deployment/components/odh/operator/datasciencecluster.yaml similarity index 100% rename from deployment/components/odh/datasciencecluster.yaml rename to deployment/components/odh/operator/datasciencecluster.yaml diff --git a/deployment/components/odh/dscinitialisation.yaml b/deployment/components/odh/operator/dscinitialisation.yaml similarity index 100% rename from deployment/components/odh/dscinitialisation.yaml rename to deployment/components/odh/operator/dscinitialisation.yaml diff --git a/deployment/components/odh/kustomization.yaml b/deployment/components/odh/operator/kustomization.yaml similarity index 100% rename from deployment/components/odh/kustomization.yaml rename to deployment/components/odh/operator/kustomization.yaml diff --git a/deployment/scripts/deploy-openshift.sh b/deployment/scripts/deploy-openshift.sh index e0b6071d0..cab4b23b4 100755 --- a/deployment/scripts/deploy-openshift.sh +++ b/deployment/scripts/deploy-openshift.sh @@ -5,6 +5,99 @@ set -e +# Helper function to wait for CRD to be established +wait_for_crd() { + local crd="$1" + local timeout="${2:-60s}" + + echo "⏳ Waiting for CRD ${crd} to appear (timeout: ${timeout})…" + if ! timeout "$timeout" bash -c 'until kubectl get crd "$1" &>/dev/null; do sleep 2; done' _ "$crd"; then + echo "❌ Timed out after $timeout waiting for CRD $crd to appear." >&2 + return 1 + fi + + echo "⏳ CRD ${crd} detected — waiting for it to become Established (timeout: ${timeout})…" + kubectl wait --for=condition=Established --timeout="$timeout" "crd/$crd" +} + +# Helper function to wait for pods in a namespace to be ready +wait_for_pods() { + local namespace="$1" + local timeout="${2:-120}" + + kubectl get namespace "$namespace" &>/dev/null || return 0 + + echo "⏳ Waiting for pods in $namespace to be ready..." + local end=$((SECONDS + timeout)) + while [ $SECONDS -lt $end ]; do + local not_ready=$(kubectl get pods -n "$namespace" --no-headers 2>/dev/null | grep -v -E 'Running|Completed|Succeeded' | wc -l) + [ "$not_ready" -eq 0 ] && return 0 + sleep 5 + done + echo "⚠️ Timeout waiting for pods in $namespace" >&2 + return 1 +} + +# version_compare +# Compares two version strings in semantic version format (e.g., "4.19.9") +# Returns 0 if version1 >= version2, 1 otherwise +version_compare() { + local version1="$1" + local version2="$2" + + local v1=$(echo "$version1" | awk -F. '{printf "%d%03d%03d", $1, $2, $3}') + local v2=$(echo "$version2" | awk -F. '{printf "%d%03d%03d", $1, $2, $3}') + + [ "$v1" -ge "$v2" ] +} + +wait_for_validating_webhooks() { + local namespace="$1" + local timeout="${2:-60}" + local interval=2 + local end=$((SECONDS+timeout)) + + echo "⏳ Waiting for validating webhooks in namespace $namespace (timeout: $timeout sec)..." + + while [ $SECONDS -lt $end ]; do + local not_ready=0 + + local services + services=$(kubectl get validatingwebhookconfigurations \ + -o jsonpath='{range .items[*].webhooks[*].clientConfig.service}{.namespace}/{.name}{"\n"}{end}' \ + | grep "^$namespace/" | sort -u) + + if [ -z "$services" ]; then + echo "⚠️ No validating webhooks found in namespace $namespace" + return 0 + fi + + for svc in $services; do + local ns name ready + ns=$(echo "$svc" | cut -d/ -f1) + name=$(echo "$svc" | cut -d/ -f2) + + ready=$(kubectl get endpoints -n "$ns" "$name" -o jsonpath='{.subsets[*].addresses[*].ip}' 2>/dev/null || true) + if [ -z "$ready" ]; then + echo "🔴 Webhook service $ns/$name not ready" + not_ready=1 + else + echo "✅ Webhook service $ns/$name has ready endpoints" + fi + done + + if [ "$not_ready" -eq 0 ]; then + echo "🎉 All validating webhook services in $namespace are ready" + return 0 + fi + + sleep $interval + done + + echo "❌ Timed out waiting for validating webhooks in $namespace" + return 1 +} + echo "=========================================" echo "🚀 MaaS Platform OpenShift Deployment" echo "=========================================" @@ -13,22 +106,26 @@ echo "" # Check if running on OpenShift if ! kubectl api-resources | grep -q "route.openshift.io"; then echo "❌ This script is for OpenShift clusters only." - echo " Use 'deploy-kubernetes.sh' for standard Kubernetes clusters." exit 1 fi -# Note about Service Mesh +# Check prerequisites echo "📋 Checking prerequisites..." +echo "" +echo "Required tools:" +echo " - oc: $(oc version --client --short 2>/dev/null | head -n1 || echo 'not found')" +echo " - jq: $(jq --version 2>/dev/null || echo 'not found')" +echo " - kustomize: $(kustomize version --short 2>/dev/null || echo 'not found')" +echo "" echo "ℹ️ Note: OpenShift Service Mesh should be automatically installed when GatewayClass is created." echo " If the Gateway gets stuck in 'Waiting for controller', you may need to manually" echo " install the Red Hat OpenShift Service Mesh operator from OperatorHub." -# Step 1: Enable Gateway API features (if needed) echo "" echo "1️⃣ Checking OpenShift version and Gateway API requirements..." # Get OpenShift version -OCP_VERSION=$(oc version -o json | jq -r '.openshiftVersion' 2>/dev/null || echo "unknown") +OCP_VERSION=$(oc get clusterversion version -o jsonpath='{.status.desired.version}' 2>/dev/null || echo "unknown") echo " OpenShift version: $OCP_VERSION" # Check if version is 4.19.9 or higher @@ -38,43 +135,23 @@ if [[ "$OCP_VERSION" == "unknown" ]]; then -p '{"spec":{"featureSet":"CustomNoUpgrade","customNoUpgrade":{"enabled":["GatewayAPI","GatewayAPIController"]}}}' || true echo " Waiting for feature gates to reconcile (30 seconds)..." sleep 30 +elif version_compare "$OCP_VERSION" "4.19.9"; then + echo " ✅ OpenShift $OCP_VERSION supports Gateway API via GatewayClass (no feature gates needed)" else - # Extract major.minor.patch version numbers - VERSION_REGEX="^[v]?([0-9]+)\.([0-9]+)\.([0-9]+)" - if [[ "$OCP_VERSION" =~ $VERSION_REGEX ]]; then - MAJOR="${BASH_REMATCH[1]}" - MINOR="${BASH_REMATCH[2]}" - PATCH="${BASH_REMATCH[3]}" - - # Check if version is 4.19.9 or higher - if [[ $MAJOR -gt 4 ]] || \ - [[ $MAJOR -eq 4 && $MINOR -gt 19 ]] || \ - [[ $MAJOR -eq 4 && $MINOR -eq 19 && $PATCH -ge 9 ]]; then - echo " ✅ OpenShift $OCP_VERSION supports Gateway API via GatewayClass (no feature gates needed)" - else - echo " Applying Gateway API feature gates for OpenShift < 4.19.9" - oc patch featuregate/cluster --type='merge' \ - -p '{"spec":{"featureSet":"CustomNoUpgrade","customNoUpgrade":{"enabled":["GatewayAPI","GatewayAPIController"]}}}' || true - echo " Waiting for feature gates to reconcile (30 seconds)..." - sleep 30 - fi - else - echo " ⚠️ Could not parse version, applying feature gates to be safe" - oc patch featuregate/cluster --type='merge' \ - -p '{"spec":{"featureSet":"CustomNoUpgrade","customNoUpgrade":{"enabled":["GatewayAPI","GatewayAPIController"]}}}' || true - echo " Waiting for feature gates to reconcile (30 seconds)..." - sleep 30 - fi + echo " Applying Gateway API feature gates for OpenShift < 4.19.9" + oc patch featuregate/cluster --type='merge' \ + -p '{"spec":{"featureSet":"CustomNoUpgrade","customNoUpgrade":{"enabled":["GatewayAPI","GatewayAPIController"]}}}' || true + echo " Waiting for feature gates to reconcile (30 seconds)..." + sleep 30 fi -# Step 2: Create namespaces echo "" echo "2️⃣ Creating namespaces..." -for ns in kserve kuadrant-system llm maas-api; do +echo " ℹ️ Note: If ODH/RHOAI is already installed, some namespaces may already exist" +for ns in opendatahub kserve kuadrant-system llm maas-api; do kubectl create namespace $ns 2>/dev/null || echo " Namespace $ns already exists" done -# Step 3: Install dependencies echo "" echo "3️⃣ Installing dependencies..." @@ -84,9 +161,10 @@ PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" echo " Installing cert-manager..." "$SCRIPT_DIR/install-dependencies.sh" --cert-manager -# Note: KServe should be installed as part of ODH/RHOAI, not separately -# If ODH/RHOAI is not installed, uncomment the following line: -# "$SCRIPT_DIR/install-dependencies.sh" --kserve +# Wait for cert-manager CRDs to be ready +echo " Waiting for cert-manager CRDs to be established..." +wait_for_crd "certificates.cert-manager.io" "120s" || \ + echo " ⚠️ Certificate CRD not yet available" # Clean up any leftover Kuadrant CRDs from previous installations echo " Checking for leftover Kuadrant CRDs..." @@ -99,9 +177,15 @@ fi echo " Installing Kuadrant..." "$SCRIPT_DIR/install-dependencies.sh" --kuadrant -# Step 4: Deploy core infrastructure +# Wait for Kuadrant CRDs to be ready +echo " Waiting for Kuadrant CRDs to be established..." +wait_for_crd "authpolicies.kuadrant.io" "120s" || \ + echo " ⚠️ AuthPolicy CRD not yet available" +wait_for_crd "ratelimitpolicies.kuadrant.io" "120s" || \ + echo " ⚠️ RateLimitPolicy CRD not yet available" + echo "" -echo "4️⃣ Deploying core infrastructure..." +echo "4️⃣ Deploying Gateway and networking infrastructure..." CLUSTER_DOMAIN=$(kubectl get ingresses.config.openshift.io cluster -o jsonpath='{.spec.domain}') if [ -z "$CLUSTER_DOMAIN" ]; then echo "❌ Failed to retrieve cluster domain from OpenShift" @@ -110,19 +194,68 @@ fi export CLUSTER_DOMAIN echo " Cluster domain: $CLUSTER_DOMAIN" +echo " Deploying Gateway API and Kuadrant configuration..." cd "$PROJECT_ROOT" -kustomize build deployment/overlays/openshift | envsubst | kubectl apply -f - +kustomize build deployment/base/networking | envsubst | kubectl apply --server-side=true --force-conflicts -f - + +# Wait for Gateway API CRDs if not already present +if ! kubectl get crd gateways.gateway.networking.k8s.io &>/dev/null 2>&1; then + echo " Waiting for Gateway API CRDs..." + wait_for_crd "gateways.gateway.networking.k8s.io" "120s" || \ + echo " ⚠️ Gateway API CRDs not yet available" +fi -# Step 5: Apply OpenShift-specific patches echo "" -echo "5️⃣ Applying OpenShift-specific configurations..." +echo "5️⃣ Checking for OpenDataHub/RHOAI KServe..." +if kubectl get crd llminferenceservices.serving.kserve.io &>/dev/null 2>&1; then + echo " ✅ KServe CRDs already present (ODH/RHOAI detected)" +else + echo " ⚠️ KServe not detected. Deploying ODH KServe components..." + echo " Note: This may require multiple attempts as CRDs need to be established first." + + # First attempt + echo " Attempting ODH KServe deployment (attempt 1/2)..." + if kustomize build "$PROJECT_ROOT/deployment/components/odh/kserve" | kubectl apply --server-side=true --force-conflicts -f - 2>/dev/null; then + echo " ✅ Initial deployment successful" + else + echo " ⚠️ First attempt failed (expected if CRDs not yet ready)" + fi + + # Wait for CRDs and operator pods, then retry + echo " Waiting for KServe CRDs to be established..." + if wait_for_crd "llminferenceservices.serving.kserve.io" "120s"; then + + wait_for_pods "opendatahub" 120 || true + wait_for_validating_webhooks opendatahub 90 || true + + echo " Retrying deployment (attempt 2/2)..." + kustomize build "$PROJECT_ROOT/deployment/components/odh/kserve" | kubectl apply --server-side=true --force-conflicts -f - && \ + echo " ✅ ODH KServe components deployed successfully" || \ + echo " ⚠️ ODH KServe deployment failed. This may be expected if ODH operator manages these resources." + else + echo " ⚠️ CRDs did not become ready in time. Continuing anyway..." + echo " Run: kustomize build $PROJECT_ROOT/deployment/components/odh/kserve | kubectl apply --server-side=true --force-conflicts -f -" + fi +fi + +echo "" +echo "6️⃣ Deploying MaaS API..." +cd "$PROJECT_ROOT" +kustomize build deployment/base/maas-api | envsubst | kubectl apply -f - + +echo "" +echo "7️⃣ Applying OpenShift-specific configurations..." # Patch Kuadrant for OpenShift Gateway Controller echo " Patching Kuadrant operator..." -kubectl -n kuadrant-system patch deployment kuadrant-operator-controller-manager \ - --type='json' \ - -p='[{"op":"add","path":"/spec/template/spec/containers/0/env/-","value":{"name":"ISTIO_GATEWAY_CONTROLLER_NAMES","value":"openshift.io/gateway-controller/v1"}}]' 2>/dev/null || \ - echo " Kuadrant operator already patched" +if ! kubectl -n kuadrant-system get deployment kuadrant-operator-controller-manager -o jsonpath='{.spec.template.spec.containers[0].env[?(@.name=="ISTIO_GATEWAY_CONTROLLER_NAMES")]}' | grep -q "ISTIO_GATEWAY_CONTROLLER_NAMES"; then + kubectl -n kuadrant-system patch deployment kuadrant-operator-controller-manager \ + --type='json' \ + -p='[{"op":"add","path":"/spec/template/spec/containers/0/env/-","value":{"name":"ISTIO_GATEWAY_CONTROLLER_NAMES","value":"openshift.io/gateway-controller/v1"}}]' + echo " ✅ Kuadrant operator patched" +else + echo " ✅ Kuadrant operator already configured" +fi # Update KServe Ingress Domain echo " Updating KServe configuration..." @@ -131,31 +264,34 @@ kubectl -n kserve patch configmap inferenceservice-config \ -p="[{\"op\": \"replace\", \"path\": \"/data/ingress\", \"value\": \"{\\\"enableGatewayApi\\\": true, \\\"kserveIngressGateway\\\": \\\"openshift-ingress/openshift-ai-inference\\\", \\\"ingressGateway\\\": \\\"istio-system/istio-ingressgateway\\\", \\\"ingressDomain\\\": \\\"$CLUSTER_DOMAIN\\\"}\" }]" 2>/dev/null || \ echo " KServe already configured" -# Step 6: Wait for Gateway to be ready echo "" -echo "6️⃣ Waiting for Gateway to be ready..." +echo "8️⃣ Waiting for Gateway to be ready..." echo " Note: This may take a few minutes if Service Mesh is being automatically installed..." -# Check if Service Mesh is being installed +# Wait for Service Mesh CRDs to be established if kubectl get crd istios.sailoperator.io &>/dev/null 2>&1; then - echo " Service Mesh operator detected" + echo " ✅ Service Mesh operator already detected" else - echo " Waiting for automatic Service Mesh installation (up to 5 minutes)..." - for i in {1..30}; do - if kubectl get crd istios.sailoperator.io &>/dev/null 2>&1; then - echo " Service Mesh operator installed!" - break - fi - sleep 10 - done + echo " Waiting for automatic Service Mesh installation..." + if wait_for_crd "istios.sailoperator.io" "300s"; then + echo " ✅ Service Mesh operator installed" + else + echo " ⚠️ Service Mesh CRD not detected within timeout" + echo " Gateway may take longer to become ready or require manual Service Mesh installation" + fi fi +echo " Waiting for Gateway to become ready..." kubectl wait --for=condition=Programmed gateway openshift-ai-inference -n openshift-ingress --timeout=300s || \ - echo " Gateway is taking longer than expected, continuing..." + echo " ⚠️ Gateway is taking longer than expected, continuing..." + +echo "" +echo "9️⃣ Applying Gateway Policies..." +cd "$PROJECT_ROOT" +kustomize build deployment/base/policies | kubectl apply --server-side=true --force-conflicts -f - -# Step 7: Restart Kuadrant operators for policy enforcement echo "" -echo "7️⃣ Restarting Kuadrant operators for policy enforcement..." +echo "🔟 Restarting Kuadrant operators for policy enforcement..." kubectl rollout restart deployment/kuadrant-operator-controller-manager -n kuadrant-system kubectl rollout restart deployment/authorino-operator -n kuadrant-system kubectl rollout restart deployment/limitador-operator-controller-manager -n kuadrant-system @@ -166,11 +302,29 @@ kubectl rollout status deployment/kuadrant-operator-controller-manager -n kuadra kubectl rollout status deployment/authorino-operator -n kuadrant-system --timeout=120s kubectl rollout status deployment/limitador-operator-controller-manager -n kuadrant-system --timeout=120s -# Step 8: Restart KServe controller -# echo "" -# echo "8️⃣ Restarting KServe controller..." -# kubectl rollout restart deployment kserve-controller-manager -n kserve -# kubectl rollout status deployment/kserve-controller-manager -n kserve --timeout=120s +echo "" +echo "1️⃣1️⃣ Patching AuthPolicy with correct audience..." +AUD="$(kubectl create token default --duration=10m 2>/dev/null | cut -d. -f2 | base64 -d 2>/dev/null | jq -r '.aud[0]' 2>/dev/null)" +if [ -n "$AUD" ] && [ "$AUD" != "null" ]; then + echo " Detected audience: $AUD" + kubectl patch authpolicy maas-api-auth-policy -n maas-api \ + --type='json' \ + -p "$(jq -nc --arg aud "$AUD" '[{ + op:"replace", + path:"/spec/rules/authentication/openshift-identities/kubernetesTokenReview/audiences/0", + value:$aud + }]')" 2>/dev/null && echo " ✅ AuthPolicy patched" || echo " ⚠️ Failed to patch AuthPolicy (may need manual configuration)" +else + echo " ⚠️ Could not detect audience, skipping AuthPolicy patch" + echo " You may need to manually configure the audience later" +fi + +echo "" +echo "1️⃣2️⃣ Updating Limitador image for metrics exposure..." +kubectl -n kuadrant-system patch limitador limitador --type merge \ + -p '{"spec":{"image":"quay.io/kuadrant/limitador:1a28eac1b42c63658a291056a62b5d940596fd4c","version":""}}' 2>/dev/null && \ + echo " ✅ Limitador image updated" || \ + echo " ⚠️ Could not update Limitador image (may not be critical)" # Verification echo "" @@ -185,7 +339,7 @@ echo "" echo "Component Status:" kubectl get pods -n maas-api --no-headers | grep Running | wc -l | xargs echo " MaaS API pods running:" kubectl get pods -n kuadrant-system --no-headers | grep Running | wc -l | xargs echo " Kuadrant pods running:" -kubectl get pods -n kserve --no-headers | grep Running | wc -l | xargs echo " KServe pods running:" +kubectl get pods -n opendatahub --no-headers | grep Running | wc -l | xargs echo " KServe pods running:" echo "" echo "Gateway Status:" diff --git a/docs/samples/models/qwen3/kustomization.yaml b/docs/samples/models/qwen3/kustomization.yaml index b8226d1a4..3afc6f322 100644 --- a/docs/samples/models/qwen3/kustomization.yaml +++ b/docs/samples/models/qwen3/kustomization.yaml @@ -6,9 +6,7 @@ metadata: namespace: llm -# Note: namePrefix causes issues with KServe-generated service names -# The LLMInferenceService creates services based on its own name, not the prefixed name -# namePrefix: qwen3- +namePrefix: qwen3- resources: - model.yaml @@ -18,7 +16,7 @@ patches: - patch: |- - op: add path: /rules/0/resourceNames - value: ["single-node-no-scheduler-nvidia-gpu"] + value: ["qwen3-single-node-no-scheduler-nvidia-gpu"] target: kind: Role name: model-user diff --git a/maas-api/DEV.md b/maas-api/DEV.md index 5fbb11e0e..d27ad2b15 100644 --- a/maas-api/DEV.md +++ b/maas-api/DEV.md @@ -6,7 +6,6 @@ - jq - kustomize 5.7 - OCP 4.19.9+ (for GW API) -- [jwt](https://github.com/mike-engel/jwt-cli) CLI tool (for inspecting tokens) ### Setup @@ -31,14 +30,14 @@ for ns in opendatahub kuadrant-system llm maas-api; do kubectl create ns $ns || ```shell PROJECT_DIR=$(git rev-parse --show-toplevel) -kustomize build ${PROJECT_DIR}/maas-api/deploy/infra/openshift-gateway-api | kubectl apply --server-side=true --force-conflicts -f - +kustomize build ${PROJECT_DIR}/deployment/base/networking | kubectl apply --server-side=true --force-conflicts -f - ``` ### Deploying Opendatahub KServe ```shell PROJECT_DIR=$(git rev-parse --show-toplevel) -kustomize build ${PROJECT_DIR}/maas-api/deploy/infra/odh | kubectl apply --server-side=true --force-conflicts -f - +kustomize build ${PROJECT_DIR}/deployment/components/odh/kserve | kubectl apply --server-side=true --force-conflicts -f - ``` > [!NOTE] @@ -52,8 +51,8 @@ make deploy-dev ``` This will: -- Deploy MaaS API component with Service Account Token provider -- Set up demo policies (see `deploy/policies`) +- Deploy MaaS API component with Service Account Token provider in debug mode + #### Patch Kuadrant deployment @@ -84,26 +83,32 @@ kubectl patch csv kuadrant-operator.v0.0.0 -n kuadrant-system --type='json' -p=' ]' ``` +#### Apply Gateway Policies + +```shell +PROJECT_DIR=$(git rev-parse --show-toplevel) +kustomize build ${PROJECT_DIR}/deployment/base/policies | kubectl apply --server-side=true --force-conflicts -f - +``` + #### Ensure the correct audience is set for AuthPolicy Patch `AuthPolicy` with the correct audience for Openshift Identities: ```shell -PROJECT_DIR=$(git rev-parse --show-toplevel) AUD="$(kubectl create token default --duration=10m \ - | jwt decode --json - \ - | jq -r '.payload.aud[0]')" + | cut -d. -f2 \ + | base64 -d 2>/dev/null \ + | jq -r '.aud[0]')" echo "Patching AuthPolicy with audience: $AUD" -kubectl patch --local -f ${PROJECT_DIR}/maas-api/deploy/policies/maas-api/auth-policy.yaml \ +kubectl patch authpolicy maas-api-auth-policy -n maas-api \ --type='json' \ -p "$(jq -nc --arg aud "$AUD" '[{ op:"replace", path:"/spec/rules/authentication/openshift-identities/kubernetesTokenReview/audiences/0", value:$aud - }]')" \ - -o yaml | kubectl apply -f - + }]')" ``` #### Update Limitador image to expose metrics @@ -122,7 +127,7 @@ kubectl -n $NS patch limitador limitador --type merge \ ```shell PROJECT_DIR=$(git rev-parse --show-toplevel) -kustomize build ${PROJECT_DIR}/maas-api/deploy/models/simulator | kubectl apply --server-side=true --force-conflicts -f - +kustomize build ${PROJECT_DIR}/docs/samples/models/simulator | kubectl apply --server-side=true --force-conflicts -f - ``` #### Getting the token @@ -142,7 +147,8 @@ TOKEN_RESPONSE=$(curl -sSk \ "${HOST}/maas-api/v1/tokens") echo $TOKEN_RESPONSE | jq -r . -echo $TOKEN_RESPONSE | jq -r .token | jwt decode --json - + +echo $TOKEN_RESPONSE | jq -r .token | cut -d. -f2 | base64 -d 2>/dev/null | jq . TOKEN=$(echo $TOKEN_RESPONSE | jq -r .token) ``` diff --git a/maas-api/Makefile b/maas-api/Makefile index fa4b85fd5..6eb35e198 100644 --- a/maas-api/Makefile +++ b/maas-api/Makefile @@ -119,17 +119,13 @@ define deploy trap 'mv kustomization.yaml.backup kustomization.yaml 2>/dev/null || true' EXIT INT TERM && \ kustomize edit set image maas-api=$(FULL_IMAGE) && \ $(if $(PRE_DEPLOY_STEP),$(PRE_DEPLOY_STEP) &&) \ - kustomize build . | kubectl apply -f - + kustomize build . --load-restrictor LoadRestrictionsNone | kubectl apply -f - endef .PHONY: deploy-dev deploy-dev: ## Deploy development version $(call deploy,overlays/dev) -.PHONY: deploy -deploy: ## Deploy base component - $(call deploy,base) - default_run_flags := --debug RUN_FLAGS ?= .PHONY: run diff --git a/maas-api/deploy/README.md b/maas-api/deploy/README.md deleted file mode 100644 index 251e3d839..000000000 --- a/maas-api/deploy/README.md +++ /dev/null @@ -1,32 +0,0 @@ -## Overview - -```shell -├── base <1> -├── infra <2> -│ ├── kuadrant -│ ├── odh <*> -│ └── openshift-gateway-api -├── models <3> -├── overlays <4> -│ ├── dev -│ ├── odh -│ └── secret -├── policies <5> -└── rbac <6> -``` - -**<1> base** - Core MaaS API deployment manifests (service, deployment) with common labels and RBAC - -**<2> infra** - Infrastructure dependencies for Gateway API, Kuadrant, and OpenDataHub integration - * `<*>` - ODH minimal deployment to support models (`LLMInferenceService` machinery) - -**<3> models** - Model simulation resources for testing and development environments - -**<4> overlays** - Environment-specific configurations: -- `dev` - Development overlay with debug mode and local infrastructure -- `odh` - OpenDataHub operator overlay for core MaaS API component deployment (no policies/infra) -- `secret` - Secret provider-based deployment configuration - -**<5> policies** - Kuadrant policies for authentication, rate limiting, and token management - -**<6> rbac** - Role-based access control manifests (ServiceAccount, ClusterRole, ClusterRoleBinding) \ No newline at end of file diff --git a/maas-api/deploy/base/deployment.yaml b/maas-api/deploy/base/deployment.yaml deleted file mode 100644 index 232c751c5..000000000 --- a/maas-api/deploy/base/deployment.yaml +++ /dev/null @@ -1,57 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - name: maas-api -spec: - replicas: 1 - template: - spec: - serviceAccountName: maas-api - securityContext: - runAsNonRoot: true - containers: - - name: maas-api - image: maas-api - imagePullPolicy: Always - ports: - - containerPort: 8080 - name: http - protocol: TCP - env: - - name: NAMESPACE - valueFrom: - fieldRef: - fieldPath: metadata.namespace - - name: PROVIDER - value: sa-tokens - resources: - requests: - memory: "64Mi" - cpu: "50m" - limits: - memory: "128Mi" - cpu: "200m" - livenessProbe: - httpGet: - path: /health - port: http - initialDelaySeconds: 30 - periodSeconds: 10 - timeoutSeconds: 5 - failureThreshold: 3 - readinessProbe: - httpGet: - path: /health - port: http - initialDelaySeconds: 5 - periodSeconds: 5 - timeoutSeconds: 3 - failureThreshold: 3 - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: true - runAsNonRoot: true - terminationGracePeriodSeconds: 30 diff --git a/maas-api/deploy/base/kustomization.yaml b/maas-api/deploy/base/kustomization.yaml deleted file mode 100644 index 09839baa8..000000000 --- a/maas-api/deploy/base/kustomization.yaml +++ /dev/null @@ -1,19 +0,0 @@ -apiVersion: kustomize.config.k8s.io/v1beta1 -kind: Kustomization - -metadata: - name: maas-api-base - -resources: -- ../rbac/ -- service.yaml -- deployment.yaml - -namespace: maas-api - -labels: -- includeSelectors: true - pairs: - app.kubernetes.io/component: api - app.kubernetes.io/name: maas-api - diff --git a/maas-api/deploy/base/service.yaml b/maas-api/deploy/base/service.yaml deleted file mode 100644 index 742383ecc..000000000 --- a/maas-api/deploy/base/service.yaml +++ /dev/null @@ -1,13 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: maas-api -spec: - selector: - app.kubernetes.io/name: maas-api - ports: - - name: http - port: 8080 - targetPort: http - protocol: TCP - type: ClusterIP diff --git a/maas-api/deploy/infra/kuadrant/kuadrant.yaml b/maas-api/deploy/infra/kuadrant/kuadrant.yaml deleted file mode 100644 index 2a84ddd2f..000000000 --- a/maas-api/deploy/infra/kuadrant/kuadrant.yaml +++ /dev/null @@ -1,6 +0,0 @@ -apiVersion: kuadrant.io/v1beta1 -kind: Kuadrant -metadata: - name: kuadrant - namespace: kuadrant-system -spec: {} diff --git a/maas-api/deploy/infra/kuadrant/kustomization.yaml b/maas-api/deploy/infra/kuadrant/kustomization.yaml deleted file mode 100644 index 002540a42..000000000 --- a/maas-api/deploy/infra/kuadrant/kustomization.yaml +++ /dev/null @@ -1,9 +0,0 @@ -apiVersion: kustomize.config.k8s.io/v1beta1 -kind: Kustomization - -metadata: - name: maas-api-kuadrant-infra - -resources: -- kuadrant.yaml - diff --git a/maas-api/deploy/infra/kustomization.yaml b/maas-api/deploy/infra/kustomization.yaml deleted file mode 100644 index 11004380a..000000000 --- a/maas-api/deploy/infra/kustomization.yaml +++ /dev/null @@ -1,10 +0,0 @@ -apiVersion: kustomize.config.k8s.io/v1beta1 -kind: Kustomization - -metadata: - name: maas-api-infra - -resources: -- openshift-gateway-api -- kuadrant -- odh diff --git a/maas-api/deploy/infra/openshift-gateway-api/gateway-api.yaml b/maas-api/deploy/infra/openshift-gateway-api/gateway-api.yaml deleted file mode 100644 index 29e45583d..000000000 --- a/maas-api/deploy/infra/openshift-gateway-api/gateway-api.yaml +++ /dev/null @@ -1,9 +0,0 @@ ---- -apiVersion: gateway.networking.k8s.io/v1 -kind: GatewayClass -metadata: - name: openshift-default - namespace: openshift-ingress -spec: - controllerName: "openshift.io/gateway-controller/v1" - diff --git a/maas-api/deploy/infra/openshift-gateway-api/kustomization.yaml b/maas-api/deploy/infra/openshift-gateway-api/kustomization.yaml deleted file mode 100644 index f8d6eebd5..000000000 --- a/maas-api/deploy/infra/openshift-gateway-api/kustomization.yaml +++ /dev/null @@ -1,8 +0,0 @@ -apiVersion: kustomize.config.k8s.io/v1beta1 -kind: Kustomization - -metadata: - name: ocp-gateway-api - -resources: -- gateway-api.yaml diff --git a/maas-api/deploy/kustomization.yaml b/maas-api/deploy/kustomization.yaml deleted file mode 100644 index 04fdc22be..000000000 --- a/maas-api/deploy/kustomization.yaml +++ /dev/null @@ -1,15 +0,0 @@ -apiVersion: kustomize.config.k8s.io/v1beta1 -kind: Kustomization - -metadata: - name: maas-api-deploy - -resources: -- base - -labels: -- pairs: - app.kubernetes.io/part-of: maas-billing - app.kubernetes.io/version: "0.0.1" - -namespace: maas-api diff --git a/maas-api/deploy/models/facebook-opt-125m-cpu/kustomization.yaml b/maas-api/deploy/models/facebook-opt-125m-cpu/kustomization.yaml deleted file mode 100644 index 60e9e8765..000000000 --- a/maas-api/deploy/models/facebook-opt-125m-cpu/kustomization.yaml +++ /dev/null @@ -1,29 +0,0 @@ -apiVersion: kustomize.config.k8s.io/v1beta1 -kind: Kustomization - -metadata: - name: facebook-opt-125m-cpu-single-node-no-scheduler-cpu - -namespace: llm - -namePrefix: facebook-opt-125m-cpu- - -resources: -- model.yaml -- ../rbac/ - -patches: - - patch: |- - - op: add - path: /rules/0/resourceNames - value: ["facebook-opt-125m-cpu-single-node-no-scheduler-cpu"] - target: - kind: Role - name: model-user - - patch: |- - - op: replace - path: /roleRef/name - value: facebook-opt-125m-cpu-model-user - target: - kind: RoleBinding - name: model-user-tier-binding \ No newline at end of file diff --git a/maas-api/deploy/models/facebook-opt-125m-cpu/model.yaml b/maas-api/deploy/models/facebook-opt-125m-cpu/model.yaml deleted file mode 100644 index 6255e3e0e..000000000 --- a/maas-api/deploy/models/facebook-opt-125m-cpu/model.yaml +++ /dev/null @@ -1,30 +0,0 @@ -apiVersion: serving.kserve.io/v1alpha1 -kind: LLMInferenceService -metadata: - name: single-node-no-scheduler-cpu -spec: - model: - uri: hf://facebook/opt-125m - name: facebook/opt-125m - replicas: 1 - router: - route: { } - template: - containers: - - name: main - image: quay.io/pierdipi/vllm-cpu:latest - env: - - name: VLLM_LOGGING_LEVEL - value: DEBUG - resources: - limits: - cpu: '1' - memory: 10Gi - requests: - cpu: '100m' - memory: 8Gi - livenessProbe: - initialDelaySeconds: 30 - periodSeconds: 30 - timeoutSeconds: 30 - failureThreshold: 5 diff --git a/maas-api/deploy/models/qwen3-0.6B/kustomization.yaml b/maas-api/deploy/models/qwen3-0.6B/kustomization.yaml deleted file mode 100644 index f786ee9a5..000000000 --- a/maas-api/deploy/models/qwen3-0.6B/kustomization.yaml +++ /dev/null @@ -1,30 +0,0 @@ -apiVersion: kustomize.config.k8s.io/v1beta1 -kind: Kustomization - -metadata: - name: qwen3-single-node-no-scheduler-nvidia-gpu - -namespace: llm - -namePrefix: qwen3- - -resources: -- model.yaml -- ../rbac/ - -patches: -- patch: |- - - op: add - path: /rules/0/resourceNames - value: ["qwen3-single-node-no-scheduler-nvidia-gpu"] - target: - kind: Role - name: model-user -- patch: |- - - op: replace - path: /roleRef/name - value: qwen3-model-user - target: - kind: RoleBinding - name: model-user-tier-binding - diff --git a/maas-api/deploy/models/qwen3-0.6B/model.yaml b/maas-api/deploy/models/qwen3-0.6B/model.yaml deleted file mode 100644 index e4f6d84eb..000000000 --- a/maas-api/deploy/models/qwen3-0.6B/model.yaml +++ /dev/null @@ -1,38 +0,0 @@ -apiVersion: serving.kserve.io/v1alpha1 -kind: LLMInferenceService -metadata: - name: single-node-no-scheduler-nvidia-gpu -spec: - model: - uri: hf://Qwen/Qwen3-0.6B - name: Qwen/Qwen3-0.6B - replicas: 1 - router: - route: { } - template: - nodeSelector: - nvidia.com/gpu.present: "true" - tolerations: - - effect: NoSchedule - key: nvidia.com/gpu - operator: Exists - containers: - - name: main - resources: - limits: - cpu: "4" - memory: 8Gi - nvidia.com/gpu: "1" - requests: - cpu: "1" - memory: 4Gi - nvidia.com/gpu: "1" - livenessProbe: - httpGet: - path: /health - port: 8000 - scheme: HTTPS - initialDelaySeconds: 120 - periodSeconds: 30 - timeoutSeconds: 30 - failureThreshold: 5 diff --git a/maas-api/deploy/models/rbac/all-tiers.yaml b/maas-api/deploy/models/rbac/all-tiers.yaml deleted file mode 100644 index df68274af..000000000 --- a/maas-api/deploy/models/rbac/all-tiers.yaml +++ /dev/null @@ -1,28 +0,0 @@ ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: Role -metadata: - name: model-user -rules: - - apiGroups: ["serving.kserve.io"] - resources: ["llminferenceservices"] - verbs: ["post"] ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: RoleBinding -metadata: - name: model-user-tier-binding -subjects: - - kind: Group - name: system:serviceaccounts:openshift-ai-inference-tier-free - apiGroup: rbac.authorization.k8s.io - - kind: Group - name: system:serviceaccounts:openshift-ai-inference-tier-premium - apiGroup: rbac.authorization.k8s.io - - kind: Group - name: system:serviceaccounts:openshift-ai-inference-tier-enterprise - apiGroup: rbac.authorization.k8s.io -roleRef: - kind: Role - name: model-user - apiGroup: rbac.authorization.k8s.io diff --git a/maas-api/deploy/models/rbac/kustomization.yaml b/maas-api/deploy/models/rbac/kustomization.yaml deleted file mode 100644 index 3f93d21bf..000000000 --- a/maas-api/deploy/models/rbac/kustomization.yaml +++ /dev/null @@ -1,10 +0,0 @@ -apiVersion: kustomize.config.k8s.io/v1beta1 -kind: Kustomization - -metadata: - name: maas-tiers-rbac - -namespace: llm - -resources: -- all-tiers.yaml diff --git a/maas-api/deploy/models/simulator/kustomization.yaml b/maas-api/deploy/models/simulator/kustomization.yaml deleted file mode 100644 index 9ab200c53..000000000 --- a/maas-api/deploy/models/simulator/kustomization.yaml +++ /dev/null @@ -1,29 +0,0 @@ -apiVersion: kustomize.config.k8s.io/v1beta1 -kind: Kustomization - -metadata: - name: facebook-opt-125m-simulated - -namespace: llm - -namePrefix: facebook-opt-125m- - -resources: -- model.yaml -- ../rbac/ - -patches: - - patch: |- - - op: add - path: /rules/0/resourceNames - value: ["facebook-opt-125m-simulated"] - target: - kind: Role - name: model-user - - patch: |- - - op: replace - path: /roleRef/name - value: facebook-opt-125m-model-user - target: - kind: RoleBinding - name: model-user-tier-binding diff --git a/maas-api/deploy/models/simulator/model.yaml b/maas-api/deploy/models/simulator/model.yaml deleted file mode 100644 index 4865f9be2..000000000 --- a/maas-api/deploy/models/simulator/model.yaml +++ /dev/null @@ -1,53 +0,0 @@ -apiVersion: serving.kserve.io/v1alpha1 -kind: LLMInferenceService -metadata: - name: simulated -spec: - model: - uri: hf://facebook/opt-125m - name: facebook/opt-125m - replicas: 1 - router: - route: { } - template: - containers: - - name: main - image: "ghcr.io/llm-d/llm-d-inference-sim:v0.5.1" - imagePullPolicy: Always - command: ["/app/llm-d-inference-sim"] - args: - - --port - - "8000" - - --model - - facebook-opt-125m-simulated - - --mode - - random - - --ssl-certfile - - /etc/ssl/certs/tls.crt - - --ssl-keyfile - - /etc/ssl/certs/tls.key - env: - - name: POD_NAME - valueFrom: - fieldRef: - apiVersion: v1 - fieldPath: metadata.name - - name: POD_NAMESPACE - valueFrom: - fieldRef: - apiVersion: v1 - fieldPath: metadata.namespace - ports: - - name: https - containerPort: 8000 - protocol: TCP - livenessProbe: - httpGet: - path: /health - port: https - scheme: HTTPS - readinessProbe: - httpGet: - path: /ready - port: https - scheme: HTTPS diff --git a/maas-api/deploy/overlays/dev/infra/kustomization.yaml b/maas-api/deploy/overlays/dev/infra/kustomization.yaml deleted file mode 100644 index 4b9ccbb9d..000000000 --- a/maas-api/deploy/overlays/dev/infra/kustomization.yaml +++ /dev/null @@ -1,9 +0,0 @@ -apiVersion: kustomize.config.k8s.io/v1beta1 -kind: Kustomization - -metadata: - name: maas-api-all-infra - -resources: -- ../../../infra/kuadrant -- networking diff --git a/maas-api/deploy/overlays/dev/infra/networking/gateway.yaml b/maas-api/deploy/overlays/dev/infra/networking/gateway.yaml deleted file mode 100644 index fb88f6439..000000000 --- a/maas-api/deploy/overlays/dev/infra/networking/gateway.yaml +++ /dev/null @@ -1,15 +0,0 @@ ---- -apiVersion: gateway.networking.k8s.io/v1 -kind: Gateway -metadata: - name: openshift-ai-inference - namespace: openshift-ingress -spec: - gatewayClassName: openshift-default - listeners: - - name: http - port: 80 - protocol: HTTP - allowedRoutes: - namespaces: - from: All diff --git a/maas-api/deploy/overlays/dev/infra/networking/httproute.yaml b/maas-api/deploy/overlays/dev/infra/networking/httproute.yaml deleted file mode 100644 index f3542ea50..000000000 --- a/maas-api/deploy/overlays/dev/infra/networking/httproute.yaml +++ /dev/null @@ -1,24 +0,0 @@ -apiVersion: gateway.networking.k8s.io/v1 -kind: HTTPRoute -metadata: - name: maas-api-route - namespace: maas-api -spec: - parentRefs: - - name: openshift-ai-inference - namespace: openshift-ingress - rules: - - matches: - - path: - type: PathPrefix - value: /maas-api - filters: - - type: URLRewrite - urlRewrite: - path: - type: ReplacePrefixMatch - replacePrefixMatch: / - backendRefs: - - name: maas-api - port: 8080 - weight: 100 diff --git a/maas-api/deploy/overlays/dev/kustomization.yaml b/maas-api/deploy/overlays/dev/kustomization.yaml index cded2101a..3ba74317b 100644 --- a/maas-api/deploy/overlays/dev/kustomization.yaml +++ b/maas-api/deploy/overlays/dev/kustomization.yaml @@ -5,15 +5,7 @@ metadata: name: maas-api-dev resources: -- ../../base/ -- ./infra/ -- ./resources/tier-mapping-configmap.yaml -- ../../policies - -namespace: maas-api - -transformers: -- transformers/namespace.yaml +- ../../../../deployment/base/maas-api patches: - path: patches/debug-mode.yaml diff --git a/maas-api/deploy/overlays/dev/patches/debug-mode.yaml b/maas-api/deploy/overlays/dev/patches/debug-mode.yaml index 969f213f1..dba040cff 100644 --- a/maas-api/deploy/overlays/dev/patches/debug-mode.yaml +++ b/maas-api/deploy/overlays/dev/patches/debug-mode.yaml @@ -2,6 +2,7 @@ apiVersion: apps/v1 kind: Deployment metadata: name: maas-api + namespace: maas-api spec: template: spec: diff --git a/maas-api/deploy/overlays/dev/resources/tier-mapping-configmap.yaml b/maas-api/deploy/overlays/dev/resources/tier-mapping-configmap.yaml deleted file mode 100644 index ad82008c1..000000000 --- a/maas-api/deploy/overlays/dev/resources/tier-mapping-configmap.yaml +++ /dev/null @@ -1,22 +0,0 @@ -apiVersion: v1 -kind: ConfigMap -metadata: - name: tier-to-group-mapping -data: - tiers: | - - name: free - description: Free tier for basic users - level: 1 - groups: - - system:authenticated - - name: premium - description: Premium tier for paying customers - level: 10 - groups: - - premium-users - - name: enterprise - description: Enterprise tier for corporate customers - level: 20 - groups: - - enterprise-users - - admin-users diff --git a/maas-api/deploy/overlays/dev/transformers/namespace.yaml b/maas-api/deploy/overlays/dev/transformers/namespace.yaml deleted file mode 100644 index 2517cc89d..000000000 --- a/maas-api/deploy/overlays/dev/transformers/namespace.yaml +++ /dev/null @@ -1,54 +0,0 @@ -# This transformer is to prevent from overwriting the namespace in Gateway resource. -# It needs to be openshift-ingress for having cluster DNSRecord -apiVersion: builtin -kind: PatchTransformer -metadata: - name: set-gateway-namespace -patch: |- - - op: replace - path: /metadata/namespace - value: openshift-ingress -target: - group: gateway.networking.k8s.io - version: v1 - kind: Gateway ---- -apiVersion: builtin -kind: PatchTransformer -metadata: - name: set-auth-policy-namespace -patch: |- - - op: replace - path: /metadata/namespace - value: openshift-ingress -target: - group: kuadrant.io - version: v1 - kind: AuthPolicy - name: gateway-auth-policy ---- -apiVersion: builtin -kind: PatchTransformer -metadata: - name: set-rate-limit-policy-namespace -patch: |- - - op: replace - path: /metadata/namespace - value: openshift-ingress -target: - group: kuadrant.io - version: v1 - kind: RateLimitPolicy ---- -apiVersion: builtin -kind: PatchTransformer -metadata: - name: set-kuadrant-namespace -patch: |- - - op: replace - path: /metadata/namespace - value: kuadrant-system -target: - group: kuadrant.io - version: v1beta1 - kind: Kuadrant diff --git a/maas-api/deploy/overlays/odh/kustomization.yaml b/maas-api/deploy/overlays/odh/kustomization.yaml index 811d7a6fd..e9101516e 100644 --- a/maas-api/deploy/overlays/odh/kustomization.yaml +++ b/maas-api/deploy/overlays/odh/kustomization.yaml @@ -6,8 +6,7 @@ metadata: # Overlay to be used by OpenDataHub Operator to install MaaS component itself. resources: -- ../../base -- ../../policies/maas-api +- ../../../../deployment/base/maas-api namespace: opendatahub diff --git a/maas-api/deploy/overlays/secret/kustomization.yaml b/maas-api/deploy/overlays/secret/kustomization.yaml index 895e8592a..b06c0f24a 100644 --- a/maas-api/deploy/overlays/secret/kustomization.yaml +++ b/maas-api/deploy/overlays/secret/kustomization.yaml @@ -5,7 +5,7 @@ metadata: name: maas-api-secrets-provider resources: -- ../../base/ +- ../../../../deployment/base/maas-api patches: - path: patches/secrets-provider.yaml diff --git a/maas-api/deploy/policies/kustomization.yaml b/maas-api/deploy/policies/kustomization.yaml deleted file mode 100644 index 14da5d50d..000000000 --- a/maas-api/deploy/policies/kustomization.yaml +++ /dev/null @@ -1,9 +0,0 @@ -apiVersion: kustomize.config.k8s.io/v1beta1 -kind: Kustomization - -metadata: - name: all-policies - -resources: -- maas-api -- model \ No newline at end of file diff --git a/maas-api/deploy/policies/model/gateway-auth-policy.yaml b/maas-api/deploy/policies/model/gateway-auth-policy.yaml deleted file mode 100644 index 61e3d78b4..000000000 --- a/maas-api/deploy/policies/model/gateway-auth-policy.yaml +++ /dev/null @@ -1,68 +0,0 @@ ---- -apiVersion: kuadrant.io/v1 -kind: AuthPolicy -metadata: - name: gateway-auth-policy - namespace: openshift-ingress -spec: - targetRef: - group: gateway.networking.k8s.io - kind: Gateway - name: openshift-ai-inference - rules: - metadata: - # Enriching identity metadata with a proper subscription tier based on user groups - matchedTier: - http: - # TODO: network policy to limit access to this endpoint - url: http://maas-api.maas-api.svc.cluster.local:8080/v1/tiers/lookup - contentType: application/json - method: POST - body: - expression: |- - { "groups": auth.identity.user.groups } - cache: - key: - selector: auth.identity.user.username - ttl: 300 - authentication: - service-accounts: - kubernetesTokenReview: - audiences: - - openshift-ai-inference-sa - defaults: - # token normalization - https://docs.kuadrant.io/1.2.x/authorino/docs/user-guides/token-normalization/ - # full username: system:serviceaccount:: - userid: - expression: | - auth.identity.user.username.split(":")[3] - authorization: - tier-access: - kubernetesSubjectAccessReview: - user: - expression: auth.identity.user.username - authorizationGroups: - expression: auth.identity.user.groups - resourceAttributes: - group: - value: serving.kserve.io - resource: - value: llminferenceservices - namespace: - expression: | - request.path.split("/")[1] - name: - expression: | - request.path.split("/")[2] - verb: - value: post - response: - success: - filters: - identity: - json: - properties: - userid: - expression: auth.identity.userid - tier: - expression: auth.metadata.matchedTier["tier"] diff --git a/maas-api/deploy/policies/model/kustomization.yaml b/maas-api/deploy/policies/model/kustomization.yaml deleted file mode 100644 index 53d1d522e..000000000 --- a/maas-api/deploy/policies/model/kustomization.yaml +++ /dev/null @@ -1,10 +0,0 @@ -apiVersion: kustomize.config.k8s.io/v1beta1 -kind: Kustomization - -metadata: - name: model-gateway-level-policies - -resources: -- gateway-auth-policy.yaml -- token-limit-policy.yaml -- rate-limit-policy.yaml \ No newline at end of file diff --git a/maas-api/deploy/policies/model/rate-limit-policy.yaml b/maas-api/deploy/policies/model/rate-limit-policy.yaml deleted file mode 100644 index 997e373ce..000000000 --- a/maas-api/deploy/policies/model/rate-limit-policy.yaml +++ /dev/null @@ -1,38 +0,0 @@ -apiVersion: kuadrant.io/v1 -kind: RateLimitPolicy -metadata: - name: gateway-rate-limits - namespace: openshift-ingress -spec: - targetRef: - group: gateway.networking.k8s.io - kind: Gateway - name: openshift-ai-inference - limits: - free: - rates: - - limit: 5 - window: 2m - when: - - predicate: | - auth.identity.tier == "free" - counters: - - expression: auth.identity.userid - premium: - rates: - - limit: 20 - window: 2m - when: - - predicate: | - auth.identity.tier == "premium" - counters: - - expression: auth.identity.userid - enterprise: - rates: - - limit: 50 - window: 2m - when: - - predicate: | - auth.identity.tiers == "enterprise" - counters: - - expression: auth.identity.userid diff --git a/maas-api/deploy/policies/model/token-limit-policy.yaml b/maas-api/deploy/policies/model/token-limit-policy.yaml deleted file mode 100644 index 444246ddd..000000000 --- a/maas-api/deploy/policies/model/token-limit-policy.yaml +++ /dev/null @@ -1,44 +0,0 @@ -# Gateway-level Token Rate Limiting Policy for OpenShift -# Automatically tracks tokens from response bodies (usage.total_tokens) -# Uses the same user groups as the auth policy configuration ---- -apiVersion: kuadrant.io/v1alpha1 -kind: TokenRateLimitPolicy -metadata: - name: gateway-token-rate-limits - namespace: openshift-ingress -spec: - targetRef: - group: gateway.networking.k8s.io - kind: Gateway - name: inference-gateway - limits: - free-user-tokens: - rates: - - limit: 100 - window: 1m - when: - - predicate: | - auth.identity.tier == "free" - counters: - - expression: auth.identity.userid - - premium-user-tokens: - rates: - - limit: 50000 - window: 1m - when: - - predicate: | - auth.identity.tier == "premium" - counters: - - expression: auth.identity.userid - enterprise-user-tokens: - rates: - - limit: 100000 - window: 1m - when: - - predicate: | - auth.identity.tier == "enterprise" - counters: - - expression: auth.identity.userid - diff --git a/maas-api/deploy/rbac/serviceaccount.yaml b/maas-api/deploy/rbac/serviceaccount.yaml deleted file mode 100644 index 734b95e77..000000000 --- a/maas-api/deploy/rbac/serviceaccount.yaml +++ /dev/null @@ -1,5 +0,0 @@ -apiVersion: v1 -kind: ServiceAccount -metadata: - name: maas-api -