|
| 1 | +#!/usr/bin/env bash |
| 2 | +# |
| 3 | +# Deploy the llama-stack-provider-ragas e2e test environment on an OpenShift cluster. |
| 4 | +# |
| 5 | +# Usage: |
| 6 | +# ./deploy-e2e.sh --build |
| 7 | +# ./deploy-e2e.sh --image <image-ref> |
| 8 | +# |
| 9 | +# Reads credentials from ../../.env (repo root) and creates a single |
| 10 | +# 'ragas-env' k8s secret from it. |
| 11 | +# |
| 12 | +# Prerequisites: |
| 13 | +# - oc CLI installed and logged into an OpenShift cluster |
| 14 | +# - podman (only required for --build mode) |
| 15 | +# |
| 16 | + |
| 17 | +set -e |
| 18 | + |
| 19 | +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" |
| 20 | +REPO_ROOT="${SCRIPT_DIR}/../.." |
| 21 | +IMAGE_NAME="llama-stack-provider-ragas-distro-image" |
| 22 | +NAMESPACE="ragas-test" |
| 23 | + |
| 24 | +# --------------------------------------------------------------------------- |
| 25 | +# Parse arguments |
| 26 | +# --------------------------------------------------------------------------- |
| 27 | +MODE="" |
| 28 | +IMAGE_REF="" |
| 29 | + |
| 30 | +while [[ $# -gt 0 ]]; do |
| 31 | + case "$1" in |
| 32 | + --build) |
| 33 | + MODE="build" |
| 34 | + shift |
| 35 | + ;; |
| 36 | + --image) |
| 37 | + MODE="image" |
| 38 | + IMAGE_REF="$2" |
| 39 | + if [[ -z "${IMAGE_REF}" ]]; then |
| 40 | + echo "Error: --image requires an image reference argument." |
| 41 | + exit 1 |
| 42 | + fi |
| 43 | + shift 2 |
| 44 | + ;; |
| 45 | + *) |
| 46 | + echo "Unknown option: $1" |
| 47 | + echo "Usage: $0 --build | --image <image-ref>" |
| 48 | + exit 1 |
| 49 | + ;; |
| 50 | + esac |
| 51 | +done |
| 52 | + |
| 53 | +if [[ -z "${MODE}" ]]; then |
| 54 | + echo "Usage: $0 --build | --image <image-ref>" |
| 55 | + exit 1 |
| 56 | +fi |
| 57 | + |
| 58 | +# --------------------------------------------------------------------------- |
| 59 | +# Prerequisites |
| 60 | +# --------------------------------------------------------------------------- |
| 61 | +echo "Checking prerequisites..." |
| 62 | + |
| 63 | +if ! command -v oc &> /dev/null; then |
| 64 | + echo "Error: oc is not installed." |
| 65 | + exit 1 |
| 66 | +fi |
| 67 | + |
| 68 | +if ! oc whoami &> /dev/null; then |
| 69 | + echo "Error: Not logged into an OpenShift cluster. Run 'oc login' first." |
| 70 | + exit 1 |
| 71 | +fi |
| 72 | + |
| 73 | +echo " Logged in as: $(oc whoami)" |
| 74 | +echo " Cluster: $(oc whoami --show-server)" |
| 75 | + |
| 76 | +# --------------------------------------------------------------------------- |
| 77 | +# Resolve image |
| 78 | +# --------------------------------------------------------------------------- |
| 79 | +if [[ "${MODE}" == "build" ]]; then |
| 80 | + if ! command -v podman &> /dev/null; then |
| 81 | + echo "Error: podman is not installed (required for --build)." |
| 82 | + exit 1 |
| 83 | + fi |
| 84 | + |
| 85 | + echo "" |
| 86 | + echo "=== Building image from Containerfile ===" |
| 87 | + |
| 88 | + # Detect cluster node architecture (not local host arch) |
| 89 | + NODE_ARCH=$(oc get nodes -o jsonpath='{.items[0].status.nodeInfo.architecture}' 2>/dev/null || echo "amd64") |
| 90 | + case "${NODE_ARCH}" in |
| 91 | + amd64) PLATFORM="linux/amd64" ;; |
| 92 | + arm64) PLATFORM="linux/arm64" ;; |
| 93 | + *) echo "Warning: unknown cluster architecture ${NODE_ARCH}, defaulting to linux/amd64"; PLATFORM="linux/amd64" ;; |
| 94 | + esac |
| 95 | + echo " Cluster node architecture: ${NODE_ARCH} -> ${PLATFORM}" |
| 96 | + |
| 97 | + # Build the image |
| 98 | + LOCAL_TAG="${IMAGE_NAME}:latest" |
| 99 | + echo " Building ${LOCAL_TAG}..." |
| 100 | + podman build --platform "${PLATFORM}" \ |
| 101 | + --build-arg HF_TOKEN="${HF_TOKEN:-}" \ |
| 102 | + -t "${LOCAL_TAG}" \ |
| 103 | + -f "${SCRIPT_DIR}/Containerfile" "${REPO_ROOT}" |
| 104 | + |
| 105 | + # Expose the OpenShift internal registry route (idempotent) |
| 106 | + echo " Exposing OpenShift internal registry..." |
| 107 | + oc patch configs.imageregistry.operator.openshift.io/cluster \ |
| 108 | + --type=merge --patch '{"spec":{"defaultRoute":true}}' 2>/dev/null || true |
| 109 | + |
| 110 | + # Wait briefly for the route to appear |
| 111 | + for i in $(seq 1 12); do |
| 112 | + REGISTRY_ROUTE=$(oc get route default-route -n openshift-image-registry \ |
| 113 | + --template='{{ .spec.host }}' 2>/dev/null) && break |
| 114 | + sleep 5 |
| 115 | + done |
| 116 | + |
| 117 | + if [[ -z "${REGISTRY_ROUTE}" ]]; then |
| 118 | + echo "Error: Could not determine the OpenShift internal registry route." |
| 119 | + exit 1 |
| 120 | + fi |
| 121 | + echo " Registry route: ${REGISTRY_ROUTE}" |
| 122 | + |
| 123 | + # Login to the registry |
| 124 | + echo " Logging into registry..." |
| 125 | + podman login --tls-verify=false -u "$(oc whoami)" -p "$(oc whoami -t)" "${REGISTRY_ROUTE}" |
| 126 | + |
| 127 | + # Ensure the namespace exists before pushing (registry needs the namespace/project) |
| 128 | + oc create namespace "${NAMESPACE}" 2>/dev/null || true |
| 129 | + |
| 130 | + # Tag and push |
| 131 | + REMOTE_TAG="${REGISTRY_ROUTE}/${NAMESPACE}/${IMAGE_NAME}:latest" |
| 132 | + echo " Tagging ${LOCAL_TAG} -> ${REMOTE_TAG}" |
| 133 | + podman tag "${LOCAL_TAG}" "${REMOTE_TAG}" |
| 134 | + |
| 135 | + echo " Pushing to internal registry..." |
| 136 | + podman push --tls-verify=false "${REMOTE_TAG}" |
| 137 | + |
| 138 | + # The in-cluster image reference uses the internal service address |
| 139 | + IMAGE_REF="image-registry.openshift-image-registry.svc:5000/${NAMESPACE}/${IMAGE_NAME}:latest" |
| 140 | + echo " In-cluster image ref: ${IMAGE_REF}" |
| 141 | + |
| 142 | +elif [[ "${MODE}" == "image" ]]; then |
| 143 | + echo "" |
| 144 | + echo "=== Using pre-built image ===" |
| 145 | + echo " Image: ${IMAGE_REF}" |
| 146 | +fi |
| 147 | + |
| 148 | +# --------------------------------------------------------------------------- |
| 149 | +# Install operators |
| 150 | +# --------------------------------------------------------------------------- |
| 151 | +echo "" |
| 152 | +echo "=== Installing Open Data Hub operator ===" |
| 153 | +oc apply -f "${SCRIPT_DIR}/manifests/operators/opendatahub-operator.yaml" |
| 154 | + |
| 155 | +echo "Waiting for ODH operator to be ready..." |
| 156 | +for i in $(seq 1 60); do |
| 157 | + if oc get csv -n openshift-operators 2>/dev/null | grep -q "opendatahub-operator.*Succeeded"; then |
| 158 | + echo " ODH operator is ready." |
| 159 | + break |
| 160 | + fi |
| 161 | + if [ "$i" -eq 60 ]; then |
| 162 | + echo "Error: Timed out waiting for ODH operator to install." |
| 163 | + exit 1 |
| 164 | + fi |
| 165 | + sleep 10 |
| 166 | +done |
| 167 | + |
| 168 | +echo "" |
| 169 | +echo "=== Configuring DataScienceCluster ===" |
| 170 | +oc apply -f "${SCRIPT_DIR}/manifests/operators/datasciencecluster.yaml" |
| 171 | + |
| 172 | +echo "Waiting for DataScienceCluster to be ready..." |
| 173 | +for i in $(seq 1 60); do |
| 174 | + if oc get dsc default-dsc -o jsonpath='{.status.phase}' 2>/dev/null | grep -q "Ready"; then |
| 175 | + echo " DataScienceCluster is ready." |
| 176 | + break |
| 177 | + fi |
| 178 | + if [ "$i" -eq 60 ]; then |
| 179 | + echo "Error: Timed out waiting for DataScienceCluster to become ready." |
| 180 | + exit 1 |
| 181 | + fi |
| 182 | + sleep 10 |
| 183 | +done |
| 184 | + |
| 185 | +echo "" |
| 186 | +echo "=== Installing LlamaStack operator ===" |
| 187 | +oc apply -f https://raw.githubusercontent.com/llamastack/llama-stack-k8s-operator/main/release/operator.yaml |
| 188 | + |
| 189 | +echo "Waiting for LlamaStack operator to be ready..." |
| 190 | +oc wait --for=condition=available deployment/llama-stack-k8s-operator-controller-manager \ |
| 191 | + -n llama-stack-k8s-operator-system --timeout=120s |
| 192 | + |
| 193 | +# --------------------------------------------------------------------------- |
| 194 | +# Create namespace and apply manifests |
| 195 | +# --------------------------------------------------------------------------- |
| 196 | +echo "" |
| 197 | +echo "=== Setting up ${NAMESPACE} namespace ===" |
| 198 | +oc create namespace "${NAMESPACE}" 2>/dev/null || true |
| 199 | + |
| 200 | +echo "Applying configmaps and secrets..." |
| 201 | +oc apply -f "${SCRIPT_DIR}/manifests/configmap-and-secrets.yaml" |
| 202 | + |
| 203 | +echo "Creating ragas-env secret from .env..." |
| 204 | +ENV_FILE="${REPO_ROOT}/.env" |
| 205 | +if [[ ! -f "${ENV_FILE}" ]]; then |
| 206 | + echo "Error: ${ENV_FILE} not found." |
| 207 | + exit 1 |
| 208 | +fi |
| 209 | +oc create secret generic ragas-env -n "${NAMESPACE}" \ |
| 210 | + --from-env-file="${ENV_FILE}" \ |
| 211 | + --dry-run=client -o yaml | oc apply -f - |
| 212 | + |
| 213 | +echo "Applying MinIO (results storage)..." |
| 214 | +oc apply -f "${SCRIPT_DIR}/manifests/minio.yaml" |
| 215 | + |
| 216 | +echo "Applying Kubeflow pipeline resources (aws-credentials)..." |
| 217 | +oc apply -f "${SCRIPT_DIR}/manifests/kubeflow-pipeline-resources.yaml" |
| 218 | + |
| 219 | +echo "Applying DataSciencePipelinesApplication..." |
| 220 | +oc apply -f "${SCRIPT_DIR}/manifests/datasciencepipelinesapplication.yaml" |
| 221 | + |
| 222 | +echo "Applying LlamaStackDistribution CR (image: ${IMAGE_REF})..." |
| 223 | +sed "s|__LLAMA_STACK_IMAGE__|${IMAGE_REF}|g" \ |
| 224 | + "${SCRIPT_DIR}/manifests/llama-stack-distribution.yaml" | oc apply -f - |
| 225 | + |
| 226 | +# --------------------------------------------------------------------------- |
| 227 | +# Wait for MinIO (results storage) |
| 228 | +# --------------------------------------------------------------------------- |
| 229 | +echo "" |
| 230 | +echo "=== Waiting for MinIO ===" |
| 231 | +echo "Waiting for MinIO deployment..." |
| 232 | +oc wait --for=condition=available deployment/ragas-results-minio -n "${NAMESPACE}" --timeout=120s |
| 233 | + |
| 234 | +echo "Waiting for MinIO bucket creation job..." |
| 235 | +oc wait --for=condition=complete job/minio-create-bucket -n "${NAMESPACE}" --timeout=120s |
| 236 | + |
| 237 | +# --------------------------------------------------------------------------- |
| 238 | +# Wait for Data Science Pipelines |
| 239 | +# --------------------------------------------------------------------------- |
| 240 | +echo "" |
| 241 | +echo "=== Waiting for Data Science Pipelines ===" |
| 242 | +echo "Waiting for DSPA to be ready..." |
| 243 | +for i in $(seq 1 60); do |
| 244 | + DSPA_READY=$(oc get dspa ragas-e2e-dspa -n "${NAMESPACE}" \ |
| 245 | + -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}' 2>/dev/null) |
| 246 | + if [ "${DSPA_READY}" = "True" ]; then |
| 247 | + echo " DSPA is ready." |
| 248 | + break |
| 249 | + fi |
| 250 | + if [ "$i" -eq 60 ]; then |
| 251 | + echo "Error: Timed out waiting for DSPA to become ready." |
| 252 | + exit 1 |
| 253 | + fi |
| 254 | + sleep 10 |
| 255 | +done |
| 256 | + |
| 257 | +# --------------------------------------------------------------------------- |
| 258 | +# Wait for operator reconciliation and deployments |
| 259 | +# --------------------------------------------------------------------------- |
| 260 | +echo "" |
| 261 | +echo "=== Waiting for deployments ===" |
| 262 | + |
| 263 | +echo "Waiting for operator to reconcile LlamaStackDistribution..." |
| 264 | +for i in $(seq 1 30); do |
| 265 | + if oc get deployment/lsd-ragas-test -n "${NAMESPACE}" &>/dev/null; then |
| 266 | + echo " Deployment created." |
| 267 | + break |
| 268 | + fi |
| 269 | + if [ "$i" -eq 30 ]; then |
| 270 | + echo "Error: Timed out waiting for deployment/lsd-ragas-test to be created by the operator." |
| 271 | + exit 1 |
| 272 | + fi |
| 273 | + sleep 5 |
| 274 | +done |
| 275 | + |
| 276 | +echo "Waiting for llama-stack deployment..." |
| 277 | +oc wait --for=condition=available deployment/lsd-ragas-test -n "${NAMESPACE}" --timeout=300s |
| 278 | + |
| 279 | +# --------------------------------------------------------------------------- |
| 280 | +# Summary |
| 281 | +# --------------------------------------------------------------------------- |
| 282 | +echo "" |
| 283 | +echo "=========================================" |
| 284 | +echo " E2E deployment complete!" |
| 285 | +echo "=========================================" |
| 286 | +echo "" |
| 287 | +echo " Namespace: ${NAMESPACE}" |
| 288 | +echo " Image: ${IMAGE_REF}" |
| 289 | +echo " Env file: ${ENV_FILE}" |
| 290 | +echo "" |
| 291 | +echo "Next steps:" |
| 292 | +echo " 1. Verify pods: oc get pods -n ${NAMESPACE}" |
| 293 | +echo " 2. Port forward: oc port-forward -n ${NAMESPACE} svc/lsd-ragas-test-service 8321:8321 &" |
| 294 | +echo " 3. Test API: curl http://localhost:8321/v1/models" |
| 295 | +echo "" |
| 296 | +echo "To tear down:" |
| 297 | +echo " ./teardown-e2e.sh" |
0 commit comments