-
Notifications
You must be signed in to change notification settings - Fork 59
Expand file tree
/
Copy pathinstall-odh.sh
More file actions
executable file
·190 lines (168 loc) · 6.86 KB
/
install-odh.sh
File metadata and controls
executable file
·190 lines (168 loc) · 6.86 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
#!/usr/bin/env bash
# Install OpenDataHub (ODH) operator and apply DataScienceCluster (KServe).
# MaaS is deployed separately via deploy.sh --deployment-mode kustomize.
#
# Prerequisites: cert-manager and LWS operators (run install-cert-manager-and-lws.sh first).
#
# Environment variables:
# OPERATOR_CATALOG - Custom catalog image (optional). When unset, uses community-operators.
# Set to e.g. quay.io/opendatahub/opendatahub-operator-catalog:latest for custom builds.
# OPERATOR_CHANNEL - Subscription channel (default: fast-3)
# OPERATOR_STARTING_CSV - Pin Subscription startingCSV (default: opendatahub-operator.v3.4.0-ea.1). Set to "-" to omit.
# OPERATOR_INSTALL_PLAN_APPROVAL - Manual (default) or Automatic; use "-" to omit.
# Manual: blocks auto-upgrades; this script auto-approves only the first InstallPlan so install does not stall.
# OPERATOR_IMAGE - Custom operator image to patch into CSV (optional)
#
# Usage: ./install-odh.sh
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
DATA_DIR="${REPO_ROOT}/scripts/data"
NAMESPACE="${OPERATOR_NAMESPACE:-opendatahub}"
OPERATOR_CATALOG="${OPERATOR_CATALOG:-}"
OPERATOR_CHANNEL="${OPERATOR_CHANNEL:-}"
OPERATOR_STARTING_CSV="${OPERATOR_STARTING_CSV:-}"
OPERATOR_INSTALL_PLAN_APPROVAL="${OPERATOR_INSTALL_PLAN_APPROVAL:-}"
OPERATOR_IMAGE="${OPERATOR_IMAGE:-}"
# Source deployment helpers
source "$REPO_ROOT/scripts/deployment-helpers.sh"
patch_operator_csv_if_needed() {
[[ -z "$OPERATOR_IMAGE" ]] && return 0
local operator_prefix="$1"
local namespace="$2"
log_info "Patching operator CSV with custom image: $OPERATOR_IMAGE"
local csv_name=""
local timeout=60
local elapsed=0
local interval=5
while [[ $elapsed -lt $timeout ]]; do
csv_name=$(kubectl get csv -n "$namespace" --no-headers 2>/dev/null | grep "^${operator_prefix}" | head -n1 | awk '{print $1}')
[[ -n "$csv_name" ]] && break
sleep $interval
elapsed=$((elapsed + interval))
done
if [[ -z "$csv_name" ]]; then
log_warn "Could not find CSV for $operator_prefix after ${timeout}s, skipping image patch"
return 0
fi
kubectl annotate csv "$csv_name" -n "$namespace" opendatahub.io/managed=false --overwrite 2>/dev/null || true
kubectl patch csv "$csv_name" -n "$namespace" --type='json' -p="[
{\"op\": \"replace\", \"path\": \"/spec/install/spec/deployments/0/spec/template/spec/containers/0/image\", \"value\": \"$OPERATOR_IMAGE\"}
]"
log_info "CSV $csv_name patched with image $OPERATOR_IMAGE"
}
echo "=== Installing OpenDataHub operator ==="
echo ""
# 1. Catalog setup: community-operators by default, or custom catalog when OPERATOR_CATALOG is set
echo "1. Setting up ODH catalog..."
if [[ -n "$OPERATOR_CATALOG" ]]; then
echo " Using custom catalog: $OPERATOR_CATALOG"
create_custom_catalogsource "odh-custom-catalog" "openshift-marketplace" "$OPERATOR_CATALOG"
catalog_source="odh-custom-catalog"
channel="${OPERATOR_CHANNEL:-fast-3}"
else
echo " Using community-operators"
catalog_source="community-operators"
channel="${OPERATOR_CHANNEL:-fast-3}"
fi
# Pin to ODH 3.4 EA1 unless overridden (omit with OPERATOR_STARTING_CSV=- to follow channel head)
starting_csv="${OPERATOR_STARTING_CSV:-opendatahub-operator.v3.4.0-ea.1}"
[[ "$starting_csv" == "-" ]] && starting_csv=""
# Manual = no auto-upgrades; install_olm_operator still approves the first InstallPlan programmatically
plan_approval="${OPERATOR_INSTALL_PLAN_APPROVAL:-Manual}"
[[ "$plan_approval" == "-" ]] && plan_approval=""
# 2. Install ODH operator via OLM
echo "2. Installing ODH operator..."
install_olm_operator \
"opendatahub-operator" \
"$NAMESPACE" \
"$catalog_source" \
"$channel" \
"$starting_csv" \
"AllNamespaces" \
"openshift-marketplace" \
"$plan_approval"
# 3. Patch CSV with custom image if specified
if [[ -n "$OPERATOR_IMAGE" ]]; then
echo "3. Patching operator image..."
patch_operator_csv_if_needed "opendatahub-operator" "$NAMESPACE"
else
echo "3. Skipping operator image patch (OPERATOR_IMAGE not set)"
fi
# 4. Wait for CRDs
echo "4. Waiting for operator CRDs..."
wait_for_crd "datascienceclusters.datasciencecluster.opendatahub.io" 180 || {
log_error "DataScienceCluster CRD not available - operator may not have installed correctly"
exit 1
}
# 5. Wait for webhook
echo "5. Waiting for operator webhook..."
wait_for_resource "deployment" "opendatahub-operator-controller-manager" "$NAMESPACE" 120 || {
log_warn "Webhook deployment not found after 120s, proceeding anyway..."
}
if kubectl get deployment opendatahub-operator-controller-manager -n "$NAMESPACE" &>/dev/null; then
kubectl wait --for=condition=Available --timeout=120s \
deployment/opendatahub-operator-controller-manager -n "$NAMESPACE" 2>/dev/null || {
log_warn "Webhook deployment not fully ready, proceeding anyway..."
}
fi
# 6. Apply DSCInitialization (with retries)
echo "6. Applying DSCInitialization..."
if kubectl get dscinitializations default-dsci &>/dev/null; then
echo " DSCInitialization already exists, skipping"
else
dsci_applied=false
for attempt in $(seq 1 5); do
if kubectl apply -f - <<EOF
apiVersion: dscinitialization.opendatahub.io/v1
kind: DSCInitialization
metadata:
name: default-dsci
spec:
applicationsNamespace: ${NAMESPACE}
monitoring:
managementState: Managed
namespace: ${NAMESPACE}-monitoring
metrics: {}
trustedCABundle:
managementState: Managed
EOF
then
dsci_applied=true
break
fi
echo " Attempt $attempt/5 failed (webhook may not be ready), retrying in 15s..."
sleep 15
done
if [[ "$dsci_applied" != "true" ]]; then
log_error "Failed to apply DSCInitialization after 5 attempts"
exit 1
fi
fi
# 7. Apply DataScienceCluster (modelsAsService Unmanaged - MaaS deployed separately)
echo "7. Applying DataScienceCluster..."
if kubectl get datasciencecluster -A --no-headers 2>/dev/null | grep -q .; then
echo " DataScienceCluster already exists, skipping"
else
kubectl apply --server-side=true -f "${DATA_DIR}/datasciencecluster-unmanaged.yaml"
fi
# 8. Wait for DataScienceCluster ready (KServe)
echo "8. Waiting for DataScienceCluster (KServe)..."
wait_datasciencecluster_ready "default-dsc" 600 || {
log_error "DataScienceCluster did not become ready"
exit 1
}
# 9. Wait for odh-model-controller webhook to be ready
# The odh-model-controller registers a ConfigMap validating webhook. If we proceed before
# its pods are ready, any ConfigMap create/update fails with "no endpoints available".
echo "9. Waiting for odh-model-controller webhook..."
wait_for_validating_webhooks "$NAMESPACE" 180 || {
log_warn "Validating webhooks in $NAMESPACE not ready after 180s, proceeding anyway..."
}
echo ""
echo "=== ODH installation complete ==="
echo ""
echo "Verify:"
echo " kubectl get datasciencecluster -A"
echo " kubectl get pods -n opendatahub"
echo " kubectl get pods -n kserve"