Merge branch 'main' into kserve-image-pull-secret

pierDipi · web-flow · commit ede117933dc0 · 2026-02-25T15:55:41.000+01:00
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
@@ -0,0 +1,49 @@
+name: Build docs
+on:
+  push:
+    branches:
+      - main
+permissions:
+  contents: read
+  pages: write
+  id-token: write
+jobs:
+  deploy:
+    environment:
+      name: github-pages
+      url: ${{ steps.deployment.outputs.page_url }}
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/configure-pages@v5
+      - uses: actions/checkout@v5
+      - uses: actions/setup-python@v5
+        with:
+          python-version: 3.x
+      - run: pip install zensical
+      - name: Copy README to docs with fixed relative links
+        run: |
+          python3 -c "
+          import re, os
+          repo_url = os.environ['REPO_URL']
+          with open('README.md') as f:
+              content = f.read()
+          # Strip ./docs/ prefix so links resolve within the docs directory
+          content = content.replace('./docs/', './')
+          # Convert remaining relative links with directory paths to absolute GitHub URLs
+          # (these point to repo files outside docs/, e.g. ./validation/README.md)
+          content = re.sub(
+              r'(\[[^\]]*\])\(\./([^)]*\/[^)]*)\)',
+              rf'\1({repo_url}/blob/main/\2)',
+              content,
+          )
+          with open('docs/index.md', 'w') as f:
+              f.write(content)
+          "
+        env:
+          REPO_URL: ${{ github.server_url }}/${{ github.repository }}
+      - run: zensical build --clean
+      - uses: actions/upload-pages-artifact@v4
+        with:
+          path: site
+      - uses: actions/deploy-pages@v4
+        id: deployment
diff --git a/.github/workflows/link-check.yaml b/.github/workflows/link-check.yaml
@@ -0,0 +1,27 @@
+name: Check links
+
+on:
+  pull_request:
+    paths:
+      - '**/*.md'
+      - '.lychee.toml'
+      - '.github/workflows/link-check.yaml'
+
+permissions:
+  contents: read
+
+jobs:
+  link-check:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v5
+
+      - uses: lycheeverse/lychee-action@v2
+        with:
+          args: >
+            --no-progress
+            --exclude '^https?://localhost'
+            '**/*.md'
+          fail: true
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.gitignore b/.gitignore
@@ -1 +1,5 @@
 /.idea
+.claude/
+site/
+*.swp
+*.swo
diff --git a/.lychee.toml b/.lychee.toml
@@ -0,0 +1,14 @@
+# https://lychee.cli.rs/usage/config/
+include_mail = false
+max_concurrency = 16
+max_retries = 3
+user_agent = "Mozilla/5.0 (compatible; lychee/0.18; +https://github.com/lycheeverse/lychee)"
+timeout = 30
+accept = [200, 203, 429]
+include_fragments = false
+include_verbatim = true
+exclude = [
+  "^https?://[0-9]+\\.xx\\.",
+  "^hf://",
+  "^registry\\.redhat\\.io",
+]
diff --git a/README.md b/README.md
@@ -1,4 +1,4 @@
-# rhaii-on-xks
+# Red Hat AI Inference on managed Kubernetes
 
 Infrastructure Helm charts for deploying Red Hat AI Inference Server (KServe LLMInferenceService) on managed Kubernetes platforms (AKS, CoreWeave).
 
diff --git a/charts/cert-manager-operator/README.md b/charts/cert-manager-operator/README.md
@@ -148,7 +148,7 @@ The update-bundle.sh script:
 Personal Red Hat pull secrets and tokens typically expire (yearly). Registry
 Service Accounts created via the [Red Hat terms-based registry](https://access.redhat.com/terms-based-registry/)
 do not expire and are recommended for production (see Section 1.3 of the
-[deployment guide](../docs/deploying-llm-d-on-managed-kubernetes.md)).
+[deployment guide](../../docs/deploying-llm-d-on-managed-kubernetes.md)).
 
 To update expiring credentials:
 
diff --git a/charts/kserve/files/resources.yaml b/charts/kserve/files/resources.yaml
@@ -648,7 +648,7 @@ data:
     }
   oauthProxy: |-
     {
-      "image" : "registry.redhat.io/rhoai/odh-kube-auth-proxy-rhel9@sha256:169d9fe4dc6032344b295221ccbfa20f28e54f6ef490452b21459488bf472f8d",
+      "image" : "registry.redhat.io/rhoai/odh-kube-auth-proxy-rhel9@sha256:2c4be58b9cbbfbf0cce82771f9823f5df664a21c139feee9e4f8beb9cf3ad76a",
       "memoryRequest": "64Mi",
       "memoryLimit": "128Mi",
       "cpuRequest": "100m",
@@ -711,7 +711,7 @@ data:
   kserve-llm-d-routing-sidecar: registry.redhat.io/rhoai/odh-llm-d-routing-sidecar-rhel9@sha256:7f93742da18df2ce220cd8d6a0310c18af6fe04905c83f23d022e065716ebd88
   kserve-router: registry.redhat.io/rhoai/odh-kserve-router-rhel9@sha256:26dc51b1f099964196c35bbc0801a5523da75c16095733c9870b7c46b1677871
   kserve-storage-initializer: registry.redhat.io/rhoai/odh-kserve-storage-initializer-rhel9@sha256:37d31edc075adf26a529197281797a24b76bd4924d7903c2754992174959ee91
-  kube-rbac-proxy: registry.redhat.io/rhoai/odh-kube-auth-proxy-rhel9@sha256:169d9fe4dc6032344b295221ccbfa20f28e54f6ef490452b21459488bf472f8d
+  kube-rbac-proxy: registry.redhat.io/rhoai/odh-kube-auth-proxy-rhel9@sha256:2c4be58b9cbbfbf0cce82771f9823f5df664a21c139feee9e4f8beb9cf3ad76a
 kind: ConfigMap
 metadata:
   name: kserve-parameters
diff --git a/docs/azure-lb-health-probe-workaround.md b/docs/azure-lb-health-probe-workaround.md
@@ -0,0 +1,80 @@
+# Workaround: Azure Load Balancer Health Probe for Istio Gateway
+
+## Problem
+
+When deploying KServe with Istio Gateway API on AKS, external traffic to the inference gateway on port 80 times out, even though the gateway pod is running and works fine from inside the cluster.
+
+Port 15021 (Istio health port) works externally, but port 80 does not.
+
+## Root Cause
+
+AKS automatically creates an **HTTP health probe** for LoadBalancer service ports that have `appProtocol: http` set. The Istio Gateway controller sets `appProtocol: http` on port 80 by default.
+
+The HTTP health probe sends `GET /` to the nodePort backing port 80. Since no HTTPRoute matches `/`, Istio returns **404**. The Azure Load Balancer treats this as unhealthy and **stops forwarding all traffic** to port 80.
+
+Port 15021 works because its health probe uses **TCP** (just checks if the port is open).
+
+```text
+Azure LB health probe → HTTP GET / → nodePort → Istio port 80 → 404 → backend marked unhealthy → all traffic dropped
+```
+
+## Why deploying an HTTPRoute doesn't fix it
+
+Deploying an HTTPRoute for your model (e.g., `/llm-inference/qwen2-7b-instruct/...`) does not fix this because the health probe hits `/`, not your model path. Unless you have a route that explicitly matches `/` and returns 200, the probe will continue to fail.
+
+## Fix
+
+Annotate the `inference-gateway-istio` service to use a **TCP health probe** for the affected port instead of HTTP.
+
+> **Note:** The port number in the annotation (`port_80`) must match the Gateway listener port. Port 80 is used here because that is what `setup-gateway.sh` configures in the Gateway's `listeners` spec. If your Gateway uses a different port, update the annotation key accordingly (e.g., `port_8080_health-probe_protocol`).
+
+```bash
+kubectl annotate svc inference-gateway-istio -n opendatahub \
+  "service.beta.kubernetes.io/port_80_health-probe_protocol=tcp" \
+  --overwrite
+```
+
+This annotation is applied automatically on AKS when using `setup-gateway.sh`. The manual command above is only needed if you recreate the Gateway without re-running the setup script.
+
+### Verify the probe changed
+
+```bash
+# Find the MC resource group
+NODE_RG=$(az aks show --resource-group <rg> --name <cluster> --query nodeResourceGroup -o tsv)
+
+# Check probes
+az network lb probe list --resource-group "$NODE_RG" --lb-name kubernetes -o table
+```
+
+The port 80 probe should now show `Protocol: Tcp` instead of `Http`.
+
+## How to diagnose this issue
+
+1. Verify the gateway works from inside the cluster (bypasses Azure LB):
+   ```bash
+   kubectl run curl-test --rm -i --restart=Never --image=curlimages/curl \
+     -- curl -s -o /dev/null -w "HTTP %{http_code}" \
+     http://inference-gateway-istio.opendatahub.svc.cluster.local:80/
+   ```
+   If this returns 404 but external access times out, the LB health probe is the issue.
+
+2. Check the Azure LB health probe configuration:
+   ```bash
+   NODE_RG=$(az aks show --resource-group <rg> --name <cluster> --query nodeResourceGroup -o tsv)
+   az network lb probe list --resource-group "$NODE_RG" --lb-name kubernetes -o table
+   ```
+   If the port 80 probe shows `Protocol: Http` and `RequestPath: /`, that confirms the problem.
+
+   > **Note:** If the `inference-gateway-istio` service is annotated with `service.beta.kubernetes.io/azure-load-balancer-internal: "true"`, use `--lb-name kubernetes-internal` instead.
+
+## Notes
+
+- This is an AKS-specific issue. AWS and GCP load balancers default to TCP health checks.
+- On AKS clusters v1.24+, `spec.ports.appProtocol` is used as the health probe protocol with `/` as the default request path. Since the Istio Gateway controller sets `appProtocol: http` on port 80, AKS creates an HTTP probe by default.
+- The annotation `service.beta.kubernetes.io/port_80_health-probe_protocol` is a per-port override. The generic `service.beta.kubernetes.io/azure-load-balancer-health-probe-protocol` applies to all ports but may not take effect if the gateway controller reconciles the service.
+- The Istio gateway service is managed by the Gateway controller and has no annotations by default.
+
+## References
+
+- [Configure a Public Standard Load Balancer in AKS](https://learn.microsoft.com/en-us/azure/aks/configure-load-balancer-standard) — Microsoft documentation on per-port health probe annotation overrides and default probe behavior.
+- [Troubleshoot AKS Health Probe Mode](https://learn.microsoft.com/en-us/troubleshoot/azure/azure-kubernetes/availability-performance/cluster-service-health-probe-mode-issues) — Troubleshooting guide for health probe issues.
diff --git a/docs/deploying-llm-d-on-managed-kubernetes.md b/docs/deploying-llm-d-on-managed-kubernetes.md
@@ -165,18 +165,16 @@ Red Hat AI Inference Server on managed Kubernetes consists of the following comp
 
 ### Component Interaction
 
-```text
-                                    ┌─────────────────────────────────────┐
-                                    │         Kubernetes Cluster          │
-┌──────────┐    ┌──────────────┐    │  ┌─────────┐    ┌────────────────┐  │
-│  Client  │───▶│   Gateway    │───▶│  │   EPP   │───▶│  vLLM Pods     │  │
-│          │    │   (Istio)    │    │  │Scheduler│    │  (Model)       │  │
-└──────────┘    └──────────────┘    │  └─────────┘    └────────────────┘  │
-                                    │        ▲               ▲            │
-                                    │        │    mTLS      │            │
-                                    │        └───────────────┘            │
-                                    │              cert-manager           │
-                                    └─────────────────────────────────────┘
+```mermaid
+graph LR
+    Client --> Gateway["Gateway<br/>(Istio)"]
+
+    subgraph Kubernetes Cluster
+        Gateway --> EPP["EPP<br/>Scheduler"]
+        EPP --> vLLM["vLLM Pods<br/>(Model)"]
+        cm["cert-manager"] -. mTLS .-> EPP
+        cm["cert-manager"] -. mTLS .-> vLLM
+    end
 ```
 
 ---
@@ -271,6 +269,20 @@ Verify the Gateway pod is running:
 kubectl get pods -n opendatahub -l gateway.networking.k8s.io/gateway-name=inference-gateway
 ```
 
+### 4.3 AKS: Fix Load Balancer Health Probe
+
+On AKS, external traffic to the inference gateway on port 80 may time out due to the Azure Load Balancer using an HTTP health probe that fails against the Istio gateway. This is handled automatically by `setup-gateway.sh` on AKS.
+
+If you need to apply it manually (e.g., after recreating the Gateway):
+
+```bash
+kubectl annotate svc inference-gateway-istio -n opendatahub \
+  "service.beta.kubernetes.io/port_80_health-probe_protocol=tcp" \
+  --overwrite
+```
+
+> **Note:** The port number in the annotation must match the Gateway listener port (`80` here, as configured in `setup-gateway.sh`). If the Gateway is deleted and recreated without re-running `setup-gateway.sh`, the annotation will be lost and must be reapplied. See [Azure LB Health Probe Workaround](./azure-lb-health-probe-workaround.md) for full details.
+
 ---
 
 ## 5. Deploying an LLM Inference Service
@@ -588,7 +600,8 @@ make deploy-kserve
 For assistance with Red Hat AI Inference Server deployments, contact Red Hat Support or consult the product documentation.
 
 **Additional Resources:**
-- [KServe Chart README](https://github.com/opendatahub-io/rhaii-on-xks/blob/main/charts/kserve/README.md) - KServe Helm chart details, PKI prerequisites, and OCI registry install
-- [Preflight Validation](https://github.com/opendatahub-io/rhaii-on-xks/blob/main/validation/README.md) - Cluster readiness and post-deployment validation checks
-- [Monitoring Setup Guide](../monitoring-stack/) - Optional Prometheus/Grafana configuration for dashboards and autoscaling
-- [KServe LLMInferenceService Samples](https://github.com/red-hat-data-services/kserve/tree/rhoai-3.4/docs/samples/llmisvc)
+
+* [KServe Chart README](https://github.com/opendatahub-io/rhaii-on-xks/blob/main/charts/kserve/README.md) - KServe Helm chart details, PKI prerequisites, and OCI registry install
+* [Preflight Validation](https://github.com/opendatahub-io/rhaii-on-xks/blob/main/validation/README.md) - Cluster readiness and post-deployment validation checks
+* [Monitoring Setup Guide](../monitoring-stack/) - Optional Prometheus/Grafana configuration for dashboards and autoscaling
+* [KServe LLMInferenceService Samples](https://github.com/red-hat-data-services/kserve/tree/rhoai-3.4/docs/samples/llmisvc)
diff --git a/docs/images/odh.png b/docs/images/odh.png
diff --git a/monitoring-stack/README.md b/monitoring-stack/README.md
@@ -52,4 +52,4 @@ kubectl port-forward -n monitoring svc/prometheus-operated 9090:9090
 ## Dashboards
 
 Community dashboards available at:
-- [llm-d Dashboards](https://github.com/llm-d/llm-d/tree/main/docs/monitoring/dashboards)
+- [llm-d Dashboards](https://github.com/llm-d/llm-d/tree/main/docs/monitoring/grafana/dashboards)
diff --git a/scripts/setup-gateway.sh b/scripts/setup-gateway.sh
@@ -118,6 +118,10 @@ data:
             - name: odh-ca-bundle
               mountPath: ${CA_MOUNT_PATH}
               readOnly: true
+  service: |
+    metadata:
+      annotations:
+        service.beta.kubernetes.io/port_80_health-probe_protocol: tcp
 EOF
 
   log_success "Gateway config ConfigMap created: ${GATEWAY_NAME}-config"
diff --git a/zensical.toml b/zensical.toml
@@ -0,0 +1,44 @@
+[project]
+site_name = "Red Hat AI Inference on managed Kubernetes"
+site_url = "https://opendatahub-io.github.io/rhaii-on-xks"
+repo_url = "https://github.com/opendatahub-io/rhaii-on-xks"
+edit_uri = "edit/main/docs/"
+nav = [
+    {"Home" = [
+    {"Get started" = "index.md"},
+    {"Architecture" = "architecture-xks-deployment.md"},
+    ]},
+    {"Deploy" = [
+        {"Deploy" = "deploying-llm-d-on-managed-kubernetes.md"},
+        {"Set up the gateway" = "gateway-setup-for-kserve.md"},
+        {"Debug" = "collecting-debug-information.md"},
+    ]},
+]
+
+[project.theme]
+logo = "images/odh.png"
+features = [
+    "navigation.tabs",
+    "navigation.tabs.sticky",
+    "navigation.sections",
+    "navigation.expand",
+    "content.action.edit",
+    "content.action.view",
+]
+
+[[project.theme.palette]]
+media = "(prefers-color-scheme: light)"
+scheme = "default"
+toggle.icon = "lucide/sun"
+toggle.name = "Switch to dark mode"
+
+[[project.theme.palette]]
+media = "(prefers-color-scheme: dark)"
+scheme = "slate"
+toggle.icon = "lucide/moon"
+toggle.name = "Switch to light mode"
+
+[project.markdown_extensions.pymdownx.superfences]
+custom_fences = [
+  { name = "mermaid", class = "mermaid", format = "pymdownx.superfences.fence_code_format" }
+]

-Original file line number
+Diff line change
@@ @@ -1 +1,5 @@ @@
 /.idea
 +.claude/
 +site/
 +*.swp
 +*.swo
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-# rhaii-on-xks`
	`1`	`+# Red Hat AI Inference on managed Kubernetes`
`2`	`2`
`3`	`3`	`Infrastructure Helm charts for deploying Red Hat AI Inference Server (KServe LLMInferenceService) on managed Kubernetes platforms (AKS, CoreWeave).`
`4`	`4`