Skip to content

Commit e95a9fe

Browse files
authored
fix: install/deploy fixes (#154)
- Add helper functions for CSV and CRD waiting with proper timeouts instead of sleep commands - Fix Kuadrant installation timing: split gateway and kuadrant deployment, wait for OLM operators before applying Kuadrant config (fixes circular dependencies) - Add OpenShift routes deployment step - Only cleanup leftover CRDs if Kuadrant is not already installed - Make install and deployment script idempotent Signed-off-by: Brent Salisbury <bsalisbu@redhat.com>
1 parent e8a5654 commit e95a9fe

File tree

2 files changed

+143
-60
lines changed

2 files changed

+143
-60
lines changed

deployment/scripts/deploy-openshift.sh

Lines changed: 138 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,62 @@ set -e
88
# Helper function to wait for CRD to be established
99
wait_for_crd() {
1010
local crd="$1"
11-
local timeout="${2:-60s}"
11+
local timeout="${2:-60}" # timeout in seconds
12+
local interval=2
13+
local elapsed=0
14+
15+
echo "⏳ Waiting for CRD ${crd} to appear (timeout: ${timeout}s)…"
16+
while [ $elapsed -lt $timeout ]; do
17+
if kubectl get crd "$crd" &>/dev/null; then
18+
echo "✅ CRD ${crd} detected, waiting for it to become Established..."
19+
kubectl wait --for=condition=Established --timeout="${timeout}s" "crd/$crd" 2>/dev/null
20+
return 0
21+
fi
22+
sleep $interval
23+
elapsed=$((elapsed + interval))
24+
done
1225

13-
echo "⏳ Waiting for CRD ${crd} to appear (timeout: ${timeout})…"
14-
if ! timeout "$timeout" bash -c 'until kubectl get crd "$1" &>/dev/null; do sleep 2; done' _ "$crd"; then
15-
echo "❌ Timed out after $timeout waiting for CRD $crd to appear." >&2
16-
return 1
17-
fi
26+
echo "❌ Timed out after ${timeout}s waiting for CRD $crd to appear." >&2
27+
return 1
28+
}
29+
30+
# Helper function to wait for CSV to reach Succeeded state
31+
wait_for_csv() {
32+
local csv_name="$1"
33+
local namespace="${2:-kuadrant-system}"
34+
local timeout="${3:-180}" # timeout in seconds
35+
local interval=5
36+
local elapsed=0
37+
local last_status_print=0
38+
39+
echo "⏳ Waiting for CSV ${csv_name} to succeed (timeout: ${timeout}s)..."
40+
while [ $elapsed -lt $timeout ]; do
41+
local phase=$(kubectl get csv -n "$namespace" "$csv_name" -o jsonpath='{.status.phase}' 2>/dev/null || echo "NotFound")
42+
43+
case "$phase" in
44+
"Succeeded")
45+
echo "✅ CSV ${csv_name} succeeded"
46+
return 0
47+
;;
48+
"Failed")
49+
echo "❌ CSV ${csv_name} failed" >&2
50+
kubectl get csv -n "$namespace" "$csv_name" -o jsonpath='{.status.message}' 2>/dev/null
51+
return 1
52+
;;
53+
*)
54+
if [ $((elapsed - last_status_print)) -ge 30 ]; then
55+
echo " CSV ${csv_name} status: ${phase} (${elapsed}s elapsed)"
56+
last_status_print=$elapsed
57+
fi
58+
;;
59+
esac
60+
61+
sleep $interval
62+
elapsed=$((elapsed + interval))
63+
done
1864

19-
echo "⏳ CRD ${crd} detected — waiting for it to become Established (timeout: ${timeout})…"
20-
kubectl wait --for=condition=Established --timeout="$timeout" "crd/$crd"
65+
echo "❌ Timed out after ${timeout}s waiting for CSV ${csv_name}" >&2
66+
return 1
2167
}
2268

2369
# Helper function to wait for pods in a namespace to be ready
@@ -158,34 +204,33 @@ echo "3️⃣ Installing dependencies..."
158204
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
159205
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
160206

207+
# Only clean up leftover CRDs if Kuadrant operators are NOT already installed
208+
echo " Checking for existing Kuadrant installation..."
209+
if ! kubectl get csv -n kuadrant-system kuadrant-operator.v1.3.0-rc2 &>/dev/null 2>&1; then
210+
echo " No existing installation found, checking for leftover CRDs..."
211+
LEFTOVER_CRDS=$(kubectl get crd 2>/dev/null | grep -E "kuadrant|authorino|limitador" | awk '{print $1}')
212+
if [ -n "$LEFTOVER_CRDS" ]; then
213+
echo " Found leftover CRDs, cleaning up before installation..."
214+
echo "$LEFTOVER_CRDS" | xargs -r kubectl delete crd --timeout=30s 2>/dev/null || true
215+
sleep 5 # Brief wait for cleanup to complete
216+
fi
217+
else
218+
echo " ✅ Kuadrant operator already installed, skipping CRD cleanup"
219+
fi
220+
161221
echo " Installing cert-manager..."
162222
"$SCRIPT_DIR/install-dependencies.sh" --cert-manager
163223

164224
# Wait for cert-manager CRDs to be ready
165225
echo " Waiting for cert-manager CRDs to be established..."
166-
wait_for_crd "certificates.cert-manager.io" "120s" || \
226+
wait_for_crd "certificates.cert-manager.io" 120 || \
167227
echo " ⚠️ Certificate CRD not yet available"
168228

169-
# Clean up any leftover Kuadrant CRDs from previous installations
170-
echo " Checking for leftover Kuadrant CRDs..."
171-
LEFTOVER_CRDS=$(kubectl get crd 2>/dev/null | grep -E "kuadrant|authorino|limitador" | awk '{print $1}')
172-
if [ -n "$LEFTOVER_CRDS" ]; then
173-
echo " Found leftover CRDs, cleaning up..."
174-
echo "$LEFTOVER_CRDS" | xargs -r kubectl delete crd --timeout=30s 2>/dev/null || true
175-
fi
176-
177229
echo " Installing Kuadrant..."
178230
"$SCRIPT_DIR/install-dependencies.sh" --kuadrant
179231

180-
# Wait for Kuadrant CRDs to be ready
181-
echo " Waiting for Kuadrant CRDs to be established..."
182-
wait_for_crd "authpolicies.kuadrant.io" "120s" || \
183-
echo " ⚠️ AuthPolicy CRD not yet available"
184-
wait_for_crd "ratelimitpolicies.kuadrant.io" "120s" || \
185-
echo " ⚠️ RateLimitPolicy CRD not yet available"
186-
187232
echo ""
188-
echo "4️⃣ Deploying Gateway and networking infrastructure..."
233+
echo "4️⃣ Deploying Gateway infrastructure..."
189234
CLUSTER_DOMAIN=$(kubectl get ingresses.config.openshift.io cluster -o jsonpath='{.spec.domain}')
190235
if [ -z "$CLUSTER_DOMAIN" ]; then
191236
echo "❌ Failed to retrieve cluster domain from OpenShift"
@@ -194,14 +239,14 @@ fi
194239
export CLUSTER_DOMAIN
195240
echo " Cluster domain: $CLUSTER_DOMAIN"
196241

197-
echo " Deploying Gateway API and Kuadrant configuration..."
242+
echo " Deploying Gateway and GatewayClass..."
198243
cd "$PROJECT_ROOT"
199-
kustomize build deployment/base/networking | envsubst | kubectl apply --server-side=true --force-conflicts -f -
244+
envsubst < deployment/base/networking/gateway-api.yaml | kubectl apply --server-side=true --force-conflicts -f -
200245

201246
# Wait for Gateway API CRDs if not already present
202247
if ! kubectl get crd gateways.gateway.networking.k8s.io &>/dev/null 2>&1; then
203248
echo " Waiting for Gateway API CRDs..."
204-
wait_for_crd "gateways.gateway.networking.k8s.io" "120s" || \
249+
wait_for_crd "gateways.gateway.networking.k8s.io" 120 || \
205250
echo " ⚠️ Gateway API CRDs not yet available"
206251
fi
207252

@@ -223,7 +268,7 @@ else
223268

224269
# Wait for CRDs and operator pods, then retry
225270
echo " Waiting for KServe CRDs to be established..."
226-
if wait_for_crd "llminferenceservices.serving.kserve.io" "120s"; then
271+
if wait_for_crd "llminferenceservices.serving.kserve.io" 120; then
227272

228273
wait_for_pods "opendatahub" 120 || true
229274
wait_for_validating_webhooks opendatahub 90 || true
@@ -239,24 +284,61 @@ else
239284
fi
240285

241286
echo ""
242-
echo "6️⃣ Deploying MaaS API..."
287+
echo "6️⃣ Waiting for Kuadrant operators to be installed by OLM..."
288+
# Wait for CSVs to reach Succeeded state (this ensures CRDs are created and deployments are ready)
289+
wait_for_csv "kuadrant-operator.v1.3.0-rc2" "kuadrant-system" 300 || \
290+
echo " ⚠️ Kuadrant operator CSV did not succeed, continuing anyway..."
291+
292+
wait_for_csv "authorino-operator.v0.22.0" "kuadrant-system" 60 || \
293+
echo " ⚠️ Authorino operator CSV did not succeed"
294+
295+
wait_for_csv "limitador-operator.v0.16.0" "kuadrant-system" 60 || \
296+
echo " ⚠️ Limitador operator CSV did not succeed"
297+
298+
wait_for_csv "dns-operator.v0.15.0" "kuadrant-system" 60 || \
299+
echo " ⚠️ DNS operator CSV did not succeed"
300+
301+
# Verify CRDs are present
302+
echo " Verifying Kuadrant CRDs are available..."
303+
wait_for_crd "kuadrants.kuadrant.io" 30 || echo " ⚠️ kuadrants.kuadrant.io CRD not found"
304+
wait_for_crd "authpolicies.kuadrant.io" 10 || echo " ⚠️ authpolicies.kuadrant.io CRD not found"
305+
wait_for_crd "ratelimitpolicies.kuadrant.io" 10 || echo " ⚠️ ratelimitpolicies.kuadrant.io CRD not found"
306+
wait_for_crd "tokenratelimitpolicies.kuadrant.io" 10 || echo " ⚠️ tokenratelimitpolicies.kuadrant.io CRD not found"
307+
308+
echo ""
309+
echo "7️⃣ Deploying Kuadrant configuration (now that CRDs exist)..."
243310
cd "$PROJECT_ROOT"
244-
kustomize build deployment/base/maas-api | envsubst | kubectl apply -f -
311+
kubectl apply -f deployment/base/networking/kuadrant.yaml
245312

246313
echo ""
247-
echo "7️⃣ Applying OpenShift-specific configurations..."
314+
echo "8️⃣ Deploying MaaS API..."
315+
cd "$PROJECT_ROOT"
316+
kustomize build deployment/base/maas-api | envsubst | kubectl apply -f -
248317

318+
echo ""
319+
echo "9️⃣ Applying OpenShift-specific configurations..."
320+
321+
# Patch Kuadrant for OpenShift Gateway Controller
322+
echo " Patching Kuadrant operator..."
323+
if ! kubectl -n kuadrant-system get deployment kuadrant-operator-controller-manager -o jsonpath='{.spec.template.spec.containers[0].env[?(@.name=="ISTIO_GATEWAY_CONTROLLER_NAMES")]}' | grep -q "ISTIO_GATEWAY_CONTROLLER_NAMES"; then
324+
kubectl get csv kuadrant-operator.v1.3.0-rc2 -n kuadrant-system -o json | \
325+
jq '.spec.install.spec.deployments[0].spec.template.spec.containers[0].env |= map(if .name == "ISTIO_GATEWAY_CONTROLLER_NAMES" then . + {"value": "istio.io/gateway-controller,openshift.io/gateway-controller/v1"} else . end)' | \
326+
kubectl apply -f -
327+
echo " ✅ Kuadrant operator patched"
328+
else
329+
echo " ✅ Kuadrant operator already configured"
330+
fi
249331

250332
echo ""
251-
echo "8️⃣ Waiting for Gateway to be ready..."
333+
echo "🔟 Waiting for Gateway to be ready..."
252334
echo " Note: This may take a few minutes if Service Mesh is being automatically installed..."
253335

254336
# Wait for Service Mesh CRDs to be established
255337
if kubectl get crd istios.sailoperator.io &>/dev/null 2>&1; then
256338
echo " ✅ Service Mesh operator already detected"
257339
else
258340
echo " Waiting for automatic Service Mesh installation..."
259-
if wait_for_crd "istios.sailoperator.io" "300s"; then
341+
if wait_for_crd "istios.sailoperator.io" 300; then
260342
echo " ✅ Service Mesh operator installed"
261343
else
262344
echo " ⚠️ Service Mesh CRD not detected within timeout"
@@ -269,24 +351,18 @@ kubectl wait --for=condition=Programmed gateway maas-default-gateway -n openshif
269351
echo " ⚠️ Gateway is taking longer than expected, continuing..."
270352

271353
echo ""
272-
echo "9️⃣ Applying Gateway Policies..."
354+
echo "1️⃣1️⃣ Applying Gateway Policies..."
273355
cd "$PROJECT_ROOT"
274356
kustomize build deployment/base/policies | kubectl apply --server-side=true --force-conflicts -f -
275357

276358
echo ""
277-
echo "🔟 Restarting Kuadrant operators for policy enforcement..."
278-
kubectl rollout restart deployment/kuadrant-operator-controller-manager -n kuadrant-system
279-
kubectl rollout restart deployment/authorino-operator -n kuadrant-system
280-
kubectl rollout restart deployment/limitador-operator-controller-manager -n kuadrant-system
281-
282-
# Wait for rollouts to complete
283-
echo " Waiting for operators to restart..."
284-
kubectl rollout status deployment/kuadrant-operator-controller-manager -n kuadrant-system --timeout=120s
285-
kubectl rollout status deployment/authorino-operator -n kuadrant-system --timeout=120s
286-
kubectl rollout status deployment/limitador-operator-controller-manager -n kuadrant-system --timeout=120s
359+
echo "1️⃣2️⃣ Deploying OpenShift Routes..."
360+
cd "$PROJECT_ROOT"
361+
envsubst < deployment/overlays/openshift/openshift-routes.yaml | kubectl apply -f -
362+
envsubst < deployment/overlays/openshift/gateway-route.yaml | kubectl apply -f -
287363

288364
echo ""
289-
echo "1️⃣1️⃣ Patching AuthPolicy with correct audience..."
365+
echo "1️⃣3️⃣ Patching AuthPolicy with correct audience..."
290366
AUD="$(kubectl create token default --duration=10m 2>/dev/null | cut -d. -f2 | base64 -d 2>/dev/null | jq -r '.aud[0]' 2>/dev/null)"
291367
if [ -n "$AUD" ] && [ "$AUD" != "null" ]; then
292368
echo " Detected audience: $AUD"
@@ -303,7 +379,7 @@ else
303379
fi
304380

305381
echo ""
306-
echo "1️⃣2️⃣ Updating Limitador image for metrics exposure..."
382+
echo "1️⃣4️⃣ Updating Limitador image for metrics exposure..."
307383
kubectl -n kuadrant-system patch limitador limitador --type merge \
308384
-p '{"spec":{"image":"quay.io/kuadrant/limitador:1a28eac1b42c63658a291056a62b5d940596fd4c","version":""}}' 2>/dev/null && \
309385
echo " ✅ Limitador image updated" || \
@@ -342,14 +418,20 @@ echo ""
342418
echo "1. Deploy a sample model:"
343419
echo " kustomize build docs/samples/models/simulator | kubectl apply -f -"
344420
echo ""
345-
echo "2. Test the API:"
346-
echo " Access the MaaS API at: https://maas-api.$CLUSTER_DOMAIN"
347-
echo " Access models through: https://gateway.$CLUSTER_DOMAIN"
421+
echo "2. Get Gateway endpoint:"
422+
echo " CLUSTER_DOMAIN=\$(kubectl get ingresses.config.openshift.io cluster -o jsonpath='{.spec.domain}')"
423+
echo " HOST=\"maas-api.\${CLUSTER_DOMAIN}\""
348424
echo ""
349-
echo "3. Get a token:"
350-
echo " curl -sSk -H \"Authorization: Bearer \$(oc whoami -t)\" \\"
351-
echo " -H \"Content-Type: application/json\" -X POST \\"
352-
echo " -d '{\"expiration\": \"10m\"}' \\"
353-
echo " \"https://maas-api.$CLUSTER_DOMAIN/v1/tokens\""
425+
echo "3. Get authentication token:"
426+
echo " TOKEN_RESPONSE=\$(curl -sSk -H \"Authorization: Bearer \$(oc whoami -t)\" -H \"Content-Type: application/json\" -X POST -d '{\"expiration\": \"10m\"}' \"\${HOST}/maas-api/v1/tokens\")"
427+
echo " TOKEN=\$(echo \$TOKEN_RESPONSE | jq -r .token)"
354428
echo ""
355-
echo "For troubleshooting, check the deployment guide at deployment/README.md"
429+
echo "4. Test model endpoint:"
430+
echo " MODELS=\$(curl \${HOST}/maas-api/v1/models -H \"Content-Type: application/json\" -H \"Authorization: Bearer \$TOKEN\" | jq -r .)"
431+
echo " MODEL_NAME=\$(echo \$MODELS | jq -r '.data[0].id')"
432+
echo " MODEL_URL=\"\${HOST}/llm/\${MODEL_NAME}/v1/chat/completions\""
433+
echo " curl -sSk -H \"Authorization: Bearer \$TOKEN\" -H \"Content-Type: application/json\" -d '{\"model\": \"\${MODEL_NAME}\", \"prompt\": \"Hello\", \"max_tokens\": 50}' \"\${MODEL_URL}\""
434+
echo ""
435+
echo "5. Test rate limiting:"
436+
echo " for i in {1..16}; do curl -sSk -o /dev/null -w \"%{http_code}\\n\" -H \"Authorization: Bearer \$TOKEN\" -H \"Content-Type: application/json\" -d '{\"model\": \"\${MODEL_NAME}\", \"prompt\": \"Hello\", \"max_tokens\": 50}' \"\${MODEL_URL}\"; done"
437+

deployment/scripts/install-dependencies.sh

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -98,11 +98,11 @@ install_component() {
9898
return 0
9999
fi
100100

101-
# Inline handler for Kuadrant (installed via Helm)
101+
# Inline handler for Kuadrant (installed via OLM)
102102
if [[ "$component" == "kuadrant" ]]; then
103103
# Ensure kuadrant-system namespace exists
104104
kubectl create namespace kuadrant-system 2>/dev/null || echo "✅ Namespace kuadrant-system already exists"
105-
105+
106106

107107
echo "🚀 Creating Kuadrant OperatorGroup..."
108108
kubectl apply -f - <<EOF
@@ -115,7 +115,7 @@ spec:
115115
targetNamespaces:
116116
- kuadrant-system
117117
EOF
118-
118+
119119
# Check if the CatalogSource already exists before applying
120120
if kubectl get catalogsource kuadrant-operator-catalog -n kuadrant-system &>/dev/null; then
121121
echo "✅ Kuadrant CatalogSource already exists in namespace kuadrant-system, skipping creation."
@@ -156,7 +156,7 @@ EOF
156156
ATTEMPTS=0
157157
MAX_ATTEMPTS=5
158158
while true; do
159-
159+
160160
if kubectl get deployment/kuadrant-operator-controller-manager -n kuadrant-system &>/dev/null; then
161161
break
162162
else
@@ -362,3 +362,4 @@ if [[ "$COMPONENT_SELECTED" == true ]]; then
362362
echo "🎉 Selected components installed successfully!"
363363
fi
364364
fi
365+

0 commit comments

Comments
 (0)