Skip to content

Commit c423e51

Browse files
committed
Merge remote-tracking branch 'origin/main' into architecture-section
2 parents 37fa40a + bfd36ea commit c423e51

File tree

6 files changed

+99
-149
lines changed

6 files changed

+99
-149
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# # ODH - Models as a Service with Policy Management
1+
# ODH - Models as a Service with Policy Management
22

33
Our goal is to create a comprehensive platform for **Models as a Service** with real-time policy management.
44

deployment/scripts/deploy-openshift.sh

Lines changed: 2 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,7 @@ echo "Required tools:"
162162
echo " - oc: $(oc version --client --short 2>/dev/null | head -n1 || echo 'not found')"
163163
echo " - jq: $(jq --version 2>/dev/null || echo 'not found')"
164164
echo " - kustomize: $(kustomize version --short 2>/dev/null || echo 'not found')"
165+
echo " - git: $(git --version 2>/dev/null || echo 'not found')"
165166
echo ""
166167
echo "ℹ️ Note: OpenShift Service Mesh should be automatically installed when GatewayClass is created."
167168
echo " If the Gateway gets stuck in 'Waiting for controller', you may need to manually"
@@ -243,44 +244,13 @@ echo " Deploying Gateway and GatewayClass..."
243244
cd "$PROJECT_ROOT"
244245
envsubst < deployment/base/networking/gateway-api.yaml | kubectl apply --server-side=true --force-conflicts -f -
245246

246-
# Wait for Gateway API CRDs if not already present
247-
if ! kubectl get crd gateways.gateway.networking.k8s.io &>/dev/null 2>&1; then
248-
echo " Waiting for Gateway API CRDs..."
249-
wait_for_crd "gateways.gateway.networking.k8s.io" 120 || \
250-
echo " ⚠️ Gateway API CRDs not yet available"
251-
fi
252-
253247
echo ""
254248
echo "5️⃣ Checking for OpenDataHub/RHOAI KServe..."
255249
if kubectl get crd llminferenceservices.serving.kserve.io &>/dev/null 2>&1; then
256250
echo " ✅ KServe CRDs already present (ODH/RHOAI detected)"
257251
else
258252
echo " ⚠️ KServe not detected. Deploying ODH KServe components..."
259-
echo " Note: This may require multiple attempts as CRDs need to be established first."
260-
261-
# First attempt
262-
echo " Attempting ODH KServe deployment (attempt 1/2)..."
263-
if kustomize build "$PROJECT_ROOT/deployment/components/odh/kserve" | kubectl apply --server-side=true --force-conflicts -f - 2>/dev/null; then
264-
echo " ✅ Initial deployment successful"
265-
else
266-
echo " ⚠️ First attempt failed (expected if CRDs not yet ready)"
267-
fi
268-
269-
# Wait for CRDs and operator pods, then retry
270-
echo " Waiting for KServe CRDs to be established..."
271-
if wait_for_crd "llminferenceservices.serving.kserve.io" 120; then
272-
273-
wait_for_pods "opendatahub" 120 || true
274-
wait_for_validating_webhooks opendatahub 90 || true
275-
276-
echo " Retrying deployment (attempt 2/2)..."
277-
kustomize build "$PROJECT_ROOT/deployment/components/odh/kserve" | kubectl apply --server-side=true --force-conflicts -f - && \
278-
echo " ✅ ODH KServe components deployed successfully" || \
279-
echo " ⚠️ ODH KServe deployment failed. This may be expected if ODH operator manages these resources."
280-
else
281-
echo " ⚠️ CRDs did not become ready in time. Continuing anyway..."
282-
echo " Run: kustomize build $PROJECT_ROOT/deployment/components/odh/kserve | kubectl apply --server-side=true --force-conflicts -f -"
283-
fi
253+
"$SCRIPT_DIR/install-dependencies.sh" --odh
284254
fi
285255

286256
echo ""

deployment/scripts/installers/install-odh.sh

Lines changed: 93 additions & 115 deletions
Original file line numberDiff line numberDiff line change
@@ -5,29 +5,71 @@
55

66
set -e
77

8+
9+
# TODO: For now, default to DEV_INSTALL, because no suitable ODH version for MaaS has
10+
# TODO: been released yet. Switch to false once ODH releases.
11+
DEV_INSTALL=true
12+
if [[ $# -eq 1 ]] && [[ "$1" == "--dev" ]]; then
13+
DEV_INSTALL=true
14+
elif [[ $# -ne 0 ]]; then
15+
echo "This script only supports a single argument: --dev"
16+
exit 1
17+
fi
18+
819
echo "========================================="
920
echo "🚀 OpenDataHub (ODH) Installation"
1021
echo "========================================="
1122
echo ""
1223

1324
# Step 1: Install ODH Operator
14-
echo "1️⃣ Installing ODH Operator..."
25+
if [[ "$DEV_INSTALL" == true ]]; then
26+
ODH_OPERATOR_NS="opendatahub-operator-system"
27+
echo "1️⃣ Installing ODH Operator from repository manifests..."
28+
cat <<EOF | kubectl apply -f -
29+
apiVersion: v1
30+
kind: Namespace
31+
metadata:
32+
name: $ODH_OPERATOR_NS
33+
EOF
34+
35+
TMP_DIR=$(mktemp -d)
36+
trap 'rm -fr -- "$TMP_DIR"' EXIT
37+
38+
pushd $TMP_DIR
39+
git clone -q --depth 1 "https://github.com/opendatahub-io/opendatahub-operator.git"
40+
if [[ $? -ne 0 ]]; then
41+
echo " Failed cloning repository https://github.com/opendatahub-io/opendatahub-operator.git"
42+
popd
43+
exit 1
44+
fi
45+
46+
pushd ./opendatahub-operator
47+
cp config/manager/kustomization.yaml.in config/manager/kustomization.yaml
48+
sed -i 's#REPLACE_IMAGE#quay.io/opendatahub/opendatahub-operator#' config/manager/kustomization.yaml
49+
kustomize build --load-restrictor LoadRestrictionsNone config/default | kubectl apply --namespace $ODH_OPERATOR_NS -f -
50+
popd
51+
popd
1552

16-
# Check if operator is already installed
17-
if kubectl get csv -n openshift-operators 2>/dev/null | grep -q opendatahub-operator; then
18-
echo " ✅ ODH operator already installed"
53+
echo " Waiting for operator to be ready (this may take a few minutes)..."
54+
kubectl wait deployment/opendatahub-operator-controller-manager -n $ODH_OPERATOR_NS --for condition=Available=True --timeout=300s
1955
else
20-
echo " Creating OperatorGroup..."
21-
cat <<EOF | kubectl apply -f -
56+
echo "1️⃣ Installing ODH Operator..."
57+
58+
# Check if operator is already installed
59+
if kubectl get csv -n openshift-operators 2>/dev/null | grep -q opendatahub-operator; then
60+
echo " ✅ ODH operator already installed"
61+
else
62+
echo " Creating OperatorGroup..."
63+
cat <<EOF | kubectl apply -f -
2264
apiVersion: operators.coreos.com/v1
2365
kind: OperatorGroup
2466
metadata:
2567
name: opendatahub
2668
namespace: openshift-operators
2769
EOF
2870

29-
echo " Creating Subscription..."
30-
cat <<EOF | kubectl apply -f -
71+
echo " Creating Subscription..."
72+
cat <<EOF | kubectl apply -f -
3173
apiVersion: operators.coreos.com/v1alpha1
3274
kind: Subscription
3375
metadata:
@@ -40,67 +82,36 @@ spec:
4082
sourceNamespace: openshift-marketplace
4183
EOF
4284

43-
echo " Waiting for operator to be ready (this may take a few minutes)..."
44-
sleep 30
45-
46-
# Wait for operator to be ready
47-
for i in {1..30}; do
48-
if kubectl get deployment -n openshift-operators | grep -q opendatahub-operator; then
49-
echo " Operator deployment found, waiting for ready state..."
50-
kubectl wait --for=condition=Available deployment -l app.kubernetes.io/name=opendatahub-operator -n openshift-operators --timeout=300s || true
51-
break
52-
fi
53-
echo " Waiting for operator deployment to appear... ($i/30)"
54-
sleep 10
55-
done
56-
fi
57-
58-
# Step 2: Create ODH namespace if it doesn't exist
59-
echo ""
60-
echo "2️⃣ Creating opendatahub namespace..."
61-
kubectl create namespace opendatahub 2>/dev/null || echo " Namespace already exists"
62-
63-
# Step 3: Wait for CRDs to be registered
64-
echo ""
65-
echo "3️⃣ Waiting for ODH CRDs to be registered..."
66-
for i in {1..30}; do
67-
if kubectl get crd dscinitializations.dscinitialization.opendatahub.io &>/dev/null 2>&1; then
68-
echo " ✅ DSCInitialization CRD found"
69-
break
85+
echo " Waiting for operator to be ready (this may take a few minutes)..."
86+
sleep 30
87+
88+
# Wait for operator to be ready
89+
for i in {1..30}; do
90+
if kubectl get deployment -n openshift-operators | grep -q opendatahub-operator; then
91+
echo " Operator deployment found, waiting for ready state..."
92+
kubectl wait --for=condition=Available deployment -l app.kubernetes.io/name=opendatahub-operator -n openshift-operators --timeout=300s || true
93+
break
94+
fi
95+
echo " Waiting for operator deployment to appear... ($i/30)"
96+
sleep 10
97+
done
7098
fi
71-
echo " Waiting for DSCInitialization CRD... ($i/30)"
72-
sleep 10
73-
done
74-
75-
if ! kubectl get crd dscinitializations.dscinitialization.opendatahub.io &>/dev/null 2>&1; then
76-
echo " ❌ DSCInitialization CRD not found after waiting"
77-
echo " Please check the operator logs:"
78-
echo " kubectl logs -n openshift-operators deployment/opendatahub-operator-controller-manager"
79-
exit 1
8099
fi
81100

82-
# Step 4: Create DSCInitialization (REQUIRED before DataScienceCluster)
101+
# Step 2: Create DSCInitialization (REQUIRED before DataScienceCluster)
83102
echo ""
84-
echo "4️⃣ Creating DSCInitialization resource..."
103+
echo "2️⃣ Creating DSCInitialization resource..."
85104
cat <<EOF | kubectl apply -f -
86-
apiVersion: dscinitialization.opendatahub.io/v1
105+
apiVersion: dscinitialization.opendatahub.io/v2
87106
kind: DSCInitialization
88107
metadata:
89108
name: default-dsci
90-
namespace: opendatahub
91109
spec:
92110
applicationsNamespace: opendatahub
93111
monitoring:
94112
managementState: Managed
95113
namespace: opendatahub
96-
serviceMesh:
97-
managementState: Managed
98-
auth:
99-
audiences:
100-
- "https://kubernetes.default.svc"
101-
controlPlane:
102-
name: data-science-smcp
103-
namespace: istio-system
114+
metrics: {}
104115
trustedCABundle:
105116
managementState: Managed
106117
EOF
@@ -115,66 +126,43 @@ for i in {1..30}; do
115126
sleep 10
116127
done
117128

118-
# Step 5: Create DataScienceCluster
129+
# Step 3: Create DataScienceCluster
119130
echo ""
120-
echo "5️⃣ Creating DataScienceCluster..."
131+
echo "3️⃣ Creating DataScienceCluster..."
121132
cat <<EOF | kubectl apply -f -
122-
apiVersion: datasciencecluster.opendatahub.io/v1
133+
apiVersion: datasciencecluster.opendatahub.io/v2
123134
kind: DataScienceCluster
124135
metadata:
125136
name: default-dsc
126-
namespace: opendatahub
127137
spec:
128138
components:
129-
# Core component for notebooks
130-
dashboard:
131-
managementState: Managed
132-
133-
# Notebook controller
134-
workbenches:
135-
managementState: Managed
136-
137-
# Model serving with KServe in RawDeployment mode (no Knative)
138139
kserve:
139140
managementState: Managed
140-
defaultDeploymentMode: RawDeployment
141141
nim:
142-
managementState: Managed # Enable NVIDIA NIM support
143-
rawDeploymentServiceConfig: Headless
144-
serving:
145-
ingressGateway:
146-
certificate:
147-
type: OpenshiftDefaultIngress
148-
managementState: Removed # Disable Knative serving (using RawDeployment)
149-
name: knative-serving
150-
151-
# Model serving platform
152-
modelmeshserving:
153-
managementState: Removed # Use KServe instead
154-
155-
# Data science pipelines
156-
datasciencepipelines:
157-
managementState: Removed # Not needed for MaaS
158-
159-
# Ray for distributed computing
142+
managementState: Managed
143+
rawDeploymentServiceConfig: Headed
144+
145+
# Components not needed for MaaS:
146+
dashboard:
147+
managementState: Removed
148+
workbenches:
149+
managementState: Removed
150+
aipipelines:
151+
managementState: Removed
160152
ray:
161-
managementState: Removed # Not needed for MaaS
162-
163-
# Kueue for job queueing
153+
managementState: Removed
164154
kueue:
165-
managementState: Removed # Not needed for MaaS
166-
167-
# Model registry
155+
managementState: Removed
168156
modelregistry:
169-
managementState: Removed # Not needed for MaaS
170-
171-
# TrustyAI for model explainability
157+
managementState: Removed
172158
trustyai:
173-
managementState: Removed # Not needed for MaaS
174-
175-
# Training operator
159+
managementState: Removed
176160
trainingoperator:
177-
managementState: Removed # Not needed for MaaS
161+
managementState: Removed
162+
feastoperator:
163+
managementState: Removed
164+
llamastackoperator:
165+
managementState: Removed
178166
EOF
179167

180168
echo " Waiting for DataScienceCluster to be ready..."
@@ -190,7 +178,7 @@ for i in {1..60}; do
190178
sleep 10
191179
done
192180

193-
# Step 6: Verify installation
181+
# Step 4: Verify installation
194182
echo ""
195183
echo "========================================="
196184
echo "📊 Verification"
@@ -204,25 +192,15 @@ echo ""
204192
echo "DataScienceCluster Status:"
205193
kubectl get datasciencecluster -n opendatahub
206194

207-
echo ""
208-
echo "KServe Components:"
209-
kubectl get pods -n kserve 2>/dev/null || echo "KServe namespace not yet created"
210-
211-
echo ""
212-
echo "Istio Components:"
213-
kubectl get pods -n istio-system 2>/dev/null || echo "Istio namespace not yet created"
214-
215195
echo ""
216196
echo "========================================="
217197
echo "✅ ODH Installation Complete!"
218198
echo "========================================="
219199
echo ""
220200
echo "Next steps:"
221-
echo "1. Verify KServe is running: kubectl get pods -n kserve"
222-
echo "2. Check Service Mesh: kubectl get smcp -n istio-system"
223-
echo "3. Deploy your models using KServe InferenceService"
201+
echo "1. Deploy your models using KServe InferenceService"
224202
echo ""
225203
echo "If you encounter issues, check the logs:"
226204
echo "- ODH Operator: kubectl logs -n openshift-operators deployment/opendatahub-operator-controller-manager"
227-
echo "- DSCInitialization: kubectl describe dscinitializations -n opendatahub default-dsci"
228-
echo "- DataScienceCluster: kubectl describe datasciencecluster -n opendatahub default-dsc"
205+
echo "- DSCInitialization: kubectl describe dscinitializations default-dsci"
206+
echo "- DataScienceCluster: kubectl describe datasciencecluster default-dsc"

docs/content/architecture.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,9 @@ sequenceDiagram
241241
MaaS-->>User: {<br/> "token": "...",<br/> "expiration": "4h",<br/> "expiresAt": 1234567890<br/>}
242242
```
243243

244-
### 2. Model Inference Flow
244+
### 3. Model Inference Flow
245+
246+
The inference flow routes validated requests to RHOAI models:
245247

246248
The Gateway API and RHCL components validate service account tokens and enforce policies:
247249

testing/maas_billing_tests_independent/artifacts/.gitkeep

Whitespace-only changes.

testing/maas_billing_tests_independent/reports/.gitkeep

Whitespace-only changes.

0 commit comments

Comments
 (0)