forked from opendatahub-io/models-as-a-service
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdeploy.sh
More file actions
executable file
·1771 lines (1507 loc) · 70.7 KB
/
deploy.sh
File metadata and controls
executable file
·1771 lines (1507 loc) · 70.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/bin/bash
################################################################################
# MaaS Deployment Script
#
# Unified deployment script for Models-as-a-Service (MaaS) platform.
# Supports RHOAI and ODH operators with configurable rate limiting.
#
# USAGE:
# ./scripts/deploy.sh [OPTIONS]
#
# OPTIONS:
# --operator-type <odh|rhoai> Operator to install (default: odh)
# Policy engine is auto-selected:
# odh → kuadrant (community v1.3.1)
# rhoai → rhcl (Red Hat Connectivity Link)
# --enable-tls-backend Enable TLS for Authorino/MaaS API (default: on)
# --enable-keycloak Deploy Keycloak for external OIDC (optional)
# --namespace <namespace> Target namespace
# --verbose Enable debug logging
# --dry-run Show what would be done
# --help Show full help with all options
#
# ADVANCED OPTIONS (PR Testing):
# --operator-catalog <image> Custom operator catalog image
# --operator-image <image> Custom operator image (patches CSV)
# --maas-api-image <image> Custom MaaS API container image
# --channel <channel> Operator channel override
#
# ENVIRONMENT VARIABLES:
# MAAS_API_IMAGE Custom MaaS API container image
# MAAS_CONTROLLER_IMAGE Custom MaaS controller container image
# OPERATOR_TYPE Operator type (rhoai/odh)
# LOG_LEVEL Logging verbosity (DEBUG, INFO, WARN, ERROR)
# KUSTOMIZE_FORCE_CONFLICTS When true, use --force-conflicts on kubectl apply in kustomize mode
#
# TIMEOUT CONFIGURATION (all in seconds, see deployment-helpers.sh for defaults):
# CUSTOM_RESOURCE_TIMEOUT DataScienceCluster wait (default: 600)
# NAMESPACE_TIMEOUT Namespace creation/ready (default: 300)
# RESOURCE_TIMEOUT Generic resource wait (default: 300)
# CRD_TIMEOUT CRD establishment (default: 180)
# CSV_TIMEOUT CSV installation (default: 180)
# SUBSCRIPTION_TIMEOUT Subscription install (default: 300)
# POD_TIMEOUT Pod ready wait (default: 120)
# WEBHOOK_TIMEOUT Webhook ready (default: 60)
# CUSTOM_CHECK_TIMEOUT Generic check (default: 120)
# AUTHORINO_TIMEOUT Authorino ready (default: 120)
# ROLLOUT_TIMEOUT kubectl rollout status (default: 120)
# CATALOGSOURCE_TIMEOUT CatalogSource ready (default: 120)
#
# EXAMPLES:
# # Deploy ODH (default, uses kuadrant policy engine)
# ./scripts/deploy.sh
#
# # Deploy RHOAI (uses rhcl policy engine)
# ./scripts/deploy.sh --operator-type rhoai
#
# # Deploy with Keycloak for external OIDC support
# ./scripts/deploy.sh --enable-keycloak
#
# # Test custom MaaS API image
# MAAS_API_IMAGE=quay.io/myuser/maas-api:pr-123 ./scripts/deploy.sh
#
# For detailed documentation, see:
# https://opendatahub-io.github.io/models-as-a-service/latest/install/maas-setup/
################################################################################
set -euo pipefail
# Source helpers
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
# shellcheck source=deployment-helpers.sh
source "${SCRIPT_DIR}/deployment-helpers.sh"
# Set log level from environment variable if provided
case "${LOG_LEVEL:-}" in
DEBUG)
CURRENT_LOG_LEVEL=$LOG_LEVEL_DEBUG
;;
INFO)
CURRENT_LOG_LEVEL=$LOG_LEVEL_INFO
;;
WARN)
CURRENT_LOG_LEVEL=$LOG_LEVEL_WARN
;;
ERROR)
CURRENT_LOG_LEVEL=$LOG_LEVEL_ERROR
;;
esac
#──────────────────────────────────────────────────────────────
# DEFAULT CONFIGURATION
#──────────────────────────────────────────────────────────────
DEPLOYMENT_MODE="${DEPLOYMENT_MODE:-operator}"
OPERATOR_TYPE="${OPERATOR_TYPE:-odh}"
POLICY_ENGINE="" # Auto-determined: odh→kuadrant, rhoai→rhcl
NAMESPACE="${DEPLOYMENT_NAMESPACE:-}" # Auto-determined based on operator type
ENABLE_TLS_BACKEND="${ENABLE_TLS_BACKEND:-true}"
ENABLE_KEYCLOAK="${ENABLE_KEYCLOAK:-false}"
VERBOSE="${VERBOSE:-false}"
DRY_RUN="${DRY_RUN:-false}"
OPERATOR_CATALOG="${OPERATOR_CATALOG:-}"
OPERATOR_IMAGE="${OPERATOR_IMAGE:-}"
OPERATOR_CHANNEL="${OPERATOR_CHANNEL:-}"
OPERATOR_STARTING_CSV="${OPERATOR_STARTING_CSV:-}"
OPERATOR_INSTALL_PLAN_APPROVAL="${OPERATOR_INSTALL_PLAN_APPROVAL:-}"
MAAS_API_IMAGE="${MAAS_API_IMAGE:-}"
MAAS_CONTROLLER_IMAGE="${MAAS_CONTROLLER_IMAGE:-}"
KUSTOMIZE_FORCE_CONFLICTS="${KUSTOMIZE_FORCE_CONFLICTS:-false}"
EXTERNAL_OIDC="${EXTERNAL_OIDC:-false}"
#──────────────────────────────────────────────────────────────
# HELP TEXT
#──────────────────────────────────────────────────────────────
show_help() {
cat <<EOF
Unified deployment script for Models-as-a-Service
USAGE:
./scripts/deploy.sh [OPTIONS]
OPTIONS:
--deployment-mode <operator|kustomize>
Deployment method (default: operator)
--operator-type <odh|rhoai>
Which operator to install (default: odh)
Policy engine is auto-selected based on operator type:
- rhoai → rhcl (Red Hat Connectivity Link)
- odh → kuadrant (community v1.3.1 with AuthPolicy v1)
Only applies when --deployment-mode=operator
--enable-tls-backend
Enable TLS backend for Authorino and MaaS API (default: enabled)
Configures HTTPS for Authorino to maas-api communication
--disable-tls-backend
Disable TLS backend for Authorino and MaaS API
Uses HTTP for Authorino to maas-api communication
--enable-keycloak
Deploy Keycloak identity provider for external OIDC support (optional)
Creates keycloak-system namespace and deploys Keycloak operator
See docs/samples/install/keycloak/ for configuration guide
--namespace <namespace>
Target namespace for deployment
Default: redhat-ods-applications (RHOAI) or opendatahub (ODH)
--verbose
Enable verbose/debug logging
--dry-run
Show what would be done without applying changes
--help
Display this help message
ADVANCED OPTIONS (PR Testing):
--operator-catalog <image>
Custom operator catalog/index image (for testing PRs)
Example: quay.io/opendatahub/opendatahub-operator-catalog:pr-456
--operator-image <image>
Custom operator image (patches CSV after install)
Example: quay.io/opendatahub/opendatahub-operator:pr-456
--maas-api-image <image>
Custom MaaS API container image (PR testing)
Example: quay.io/opendatahub/maas-api:pr-456
--maas-controller-image <image>
Custom MaaS controller container image (PR testing)
Example: quay.io/opendatahub/maas-controller:pr-406
--channel <channel>
Operator channel override
Default: fast-3 (ODH), fast-3.x (RHOAI)
--external-oidc
Enable external OIDC on the maas-api AuthPolicy.
Requires OIDC_ISSUER_URL or deployment/overlays/odh/params.env to provide
a real oidc-issuer-url value.
ENVIRONMENT VARIABLES:
MAAS_API_IMAGE Custom MaaS API container image
MAAS_CONTROLLER_IMAGE Custom MaaS controller container image
OPERATOR_CATALOG Custom operator catalog
OPERATOR_IMAGE Custom operator image
OPERATOR_STARTING_CSV ODH Subscription startingCSV (default: opendatahub-operator.v3.4.0-ea.1; "-" to omit)
OPERATOR_INSTALL_PLAN_APPROVAL ODH Subscription OLM approval (default: Manual — no auto-upgrades; first InstallPlan is auto-approved by the script)
OPERATOR_TYPE Operator type (rhoai/odh)
EXTERNAL_OIDC Enable external OIDC on maas-api (true/false)
OIDC_ISSUER_URL External OIDC issuer URL for maas-api AuthPolicy patching
LOG_LEVEL Logging verbosity (DEBUG, INFO, WARN, ERROR)
KUSTOMIZE_FORCE_CONFLICTS When true, pass --force-conflicts to kubectl apply in kustomize mode (default: false)
TIMEOUT CONFIGURATION (all values in seconds):
Customize timeouts for slow clusters or CI/CD environments:
- CUSTOM_RESOURCE_TIMEOUT=600 DataScienceCluster wait
- NAMESPACE_TIMEOUT=300 Namespace creation
- CRD_TIMEOUT=180 CRD establishment
- CSV_TIMEOUT=180 Operator CSV installation
- ROLLOUT_TIMEOUT=120 Deployment rollout
- AUTHORINO_TIMEOUT=120 Authorino ready
See deployment-helpers.sh for complete list and defaults
EXAMPLES:
# Deploy ODH (default, uses kuadrant policy engine)
./scripts/deploy.sh
# Deploy RHOAI (uses rhcl policy engine)
./scripts/deploy.sh --operator-type rhoai
# Deploy with Keycloak for external OIDC support
./scripts/deploy.sh --enable-keycloak
# Deploy via Kustomize
./scripts/deploy.sh --deployment-mode kustomize
# Test MaaS API PR #123
MAAS_API_IMAGE=quay.io/myuser/maas-api:pr-123 \\
./scripts/deploy.sh --operator-type odh
# Test ODH operator PR #456 with manifests
./scripts/deploy.sh \\
--operator-type odh \\
--operator-catalog quay.io/opendatahub/opendatahub-operator-catalog:pr-456 \\
--operator-image quay.io/opendatahub/opendatahub-operator:pr-456
For more information, see: https://github.com/opendatahub-io/models-as-a-service
EOF
}
#──────────────────────────────────────────────────────────────
# ARGUMENT PARSING
#──────────────────────────────────────────────────────────────
# Helper function to validate flag has a value
require_flag_value() {
local flag=$1
local value=${2:-}
if [[ -z "$value" || "$value" == --* ]]; then
log_error "Flag $flag requires a value"
log_error "Use --help for usage information"
exit 1
fi
}
parse_arguments() {
while [[ $# -gt 0 ]]; do
case $1 in
--deployment-mode)
require_flag_value "$1" "${2:-}"
DEPLOYMENT_MODE="$2"
shift 2
;;
--operator-type)
require_flag_value "$1" "${2:-}"
OPERATOR_TYPE="$2"
shift 2
;;
--enable-tls-backend)
ENABLE_TLS_BACKEND="true"
shift
;;
--disable-tls-backend)
ENABLE_TLS_BACKEND="false"
shift
;;
--enable-keycloak)
ENABLE_KEYCLOAK="true"
shift
;;
--namespace)
require_flag_value "$1" "${2:-}"
NAMESPACE="$2"
shift 2
;;
--verbose)
VERBOSE="true"
LOG_LEVEL="DEBUG"
CURRENT_LOG_LEVEL=$LOG_LEVEL_DEBUG
shift
;;
--dry-run)
DRY_RUN="true"
shift
;;
--operator-catalog)
require_flag_value "$1" "${2:-}"
OPERATOR_CATALOG="$2"
shift 2
;;
--operator-image)
require_flag_value "$1" "${2:-}"
OPERATOR_IMAGE="$2"
shift 2
;;
--maas-api-image)
require_flag_value "$1" "${2:-}"
MAAS_API_IMAGE="$2"
shift 2
;;
--maas-controller-image)
require_flag_value "$1" "${2:-}"
MAAS_CONTROLLER_IMAGE="$2"
shift 2
;;
--channel)
require_flag_value "$1" "${2:-}"
OPERATOR_CHANNEL="$2"
shift 2
;;
--external-oidc)
EXTERNAL_OIDC="true"
shift
;;
--help|-h)
show_help
exit 0
;;
*)
log_error "Unknown option: $1"
log_error "Use --help for usage information"
exit 1
;;
esac
done
}
#──────────────────────────────────────────────────────────────
# PREREQUISITE CHECKS
#──────────────────────────────────────────────────────────────
check_required_tools() {
local missing=()
local required_kustomize="5.7.0"
command -v oc &>/dev/null || missing+=("oc (OpenShift CLI)")
command -v kubectl &>/dev/null || missing+=("kubectl")
command -v jq &>/dev/null || missing+=("jq")
if command -v kustomize &>/dev/null; then
local kustomize_version
kustomize_version=$(kustomize version 2>/dev/null | grep -oE '[0-9]+\.[0-9]+\.[0-9]+' | head -1)
# Fallback: extract version from Go binary metadata (works for dev builds)
if [[ -z "$kustomize_version" ]] && command -v go &>/dev/null; then
kustomize_version=$(go version -m "$(command -v kustomize)" 2>/dev/null | grep -oE 'v[0-9]+\.[0-9]+\.[0-9]+' | head -1 | tr -d 'v')
fi
if [[ -z "$kustomize_version" ]]; then
log_warn "kustomize is a dev build with unverifiable version. Cannot guarantee compatibility with v$required_kustomize+."
elif [[ "$(printf '%s\n%s' "$required_kustomize" "$kustomize_version" | sort -V | head -1)" != "$required_kustomize" ]]; then
missing+=("kustomize (v$required_kustomize+ required, found ${kustomize_version})")
fi
else
missing+=("kustomize (v$required_kustomize+)")
fi
if [[ "$(uname -s)" == "Darwin" ]]; then
command -v gsed &>/dev/null || missing+=("gsed (GNU sed) for MacOS")
else
command -v sed &>/dev/null || missing+=("sed (GNU sed)")
fi
if [[ ${#missing[@]} -gt 0 ]]; then
log_error "Missing or incompatible required tools:"
for tool in "${missing[@]}"; do
log_error " - $tool"
done
return 1
fi
}
#──────────────────────────────────────────────────────────────
# CONFIGURATION VALIDATION
#──────────────────────────────────────────────────────────────
validate_configuration() {
log_info "Validating configuration..."
# Validate deployment mode
if [[ ! "$DEPLOYMENT_MODE" =~ ^(operator|kustomize)$ ]]; then
log_error "Invalid deployment mode: $DEPLOYMENT_MODE"
log_error "Must be 'operator' or 'kustomize'"
exit 1
fi
# Validate operator type
if [[ "$DEPLOYMENT_MODE" == "operator" ]]; then
if [[ ! "$OPERATOR_TYPE" =~ ^(rhoai|odh)$ ]]; then
log_error "Invalid operator type: $OPERATOR_TYPE"
log_error "Must be 'rhoai' or 'odh'"
exit 1
fi
fi
# Auto-determine policy engine based on operator type
# - ODH uses community Kuadrant (v1.3.1 from upstream catalog has AuthPolicy v1)
# - RHOAI uses RHCL (Red Hat Connectivity Link - downstream)
if [[ "$DEPLOYMENT_MODE" == "operator" ]]; then
case "$OPERATOR_TYPE" in
odh)
POLICY_ENGINE="kuadrant"
log_debug "Auto-selected policy engine for ODH: kuadrant (community v1.3.1)"
;;
rhoai)
POLICY_ENGINE="rhcl"
log_debug "Auto-selected policy engine for RHOAI: rhcl (Red Hat Connectivity Link)"
;;
esac
else
# Kustomize mode: default to kuadrant (community)
POLICY_ENGINE="kuadrant"
log_debug "Using auto-determined policy engine for kustomize mode: $POLICY_ENGINE"
fi
# Determine namespace based on deployment mode
if [[ "$DEPLOYMENT_MODE" == "kustomize" ]]; then
# Kustomize mode: use provided namespace or default to opendatahub
if [[ -z "$NAMESPACE" ]]; then
NAMESPACE="opendatahub"
fi
log_debug "Using namespace for kustomize mode: $NAMESPACE"
else
# Operator mode: ALWAYS use fixed namespace based on operator type
# This matches upstream deploy-rhoai-stable.sh behavior where the
# applications namespace is determined by DSCInitialization, not env vars.
case "$OPERATOR_TYPE" in
rhoai)
NAMESPACE="redhat-ods-applications"
;;
odh|*)
NAMESPACE="opendatahub"
;;
esac
log_debug "Using fixed namespace for operator mode: $NAMESPACE"
fi
log_info "Configuration validated successfully"
}
#──────────────────────────────────────────────────────────────
# DEPLOYMENT ORCHESTRATION
#──────────────────────────────────────────────────────────────
main() {
log_info "==================================================="
log_info " Models-as-a-Service Deployment"
log_info "==================================================="
parse_arguments "$@"
check_required_tools
validate_configuration
log_info "Deployment configuration:"
log_info " Mode: $DEPLOYMENT_MODE"
if [[ "$DEPLOYMENT_MODE" == "operator" ]]; then
log_info " Operator: $OPERATOR_TYPE"
fi
log_info " Policy Engine: $POLICY_ENGINE"
log_info " Namespace: $NAMESPACE"
log_info " TLS Backend: $ENABLE_TLS_BACKEND"
log_info " External OIDC: $EXTERNAL_OIDC"
if [[ "$EXTERNAL_OIDC" == "true" ]] && [[ "$DEPLOYMENT_MODE" == "operator" ]]; then
log_warn " --external-oidc is ignored in operator mode. Configure external OIDC via"
log_warn " the ModelsAsService CR: spec.externalOIDC.issuerUrl / clientId instead."
fi
if [[ -n "${MAAS_API_IMAGE:-}" ]]; then
log_info " MaaS API image: $MAAS_API_IMAGE"
fi
if [[ -n "${MAAS_CONTROLLER_IMAGE:-}" ]]; then
log_info " MaaS controller image: $MAAS_CONTROLLER_IMAGE"
fi
if [[ "$DRY_RUN" == "true" ]]; then
log_info "DRY RUN MODE - no changes will be applied"
log_info "Deployment plan validated. Exiting."
exit 0
fi
case "$DEPLOYMENT_MODE" in
operator)
deploy_via_operator
;;
kustomize)
deploy_via_kustomize
;;
esac
# Install subscription controller (always deployed)
# In kustomize mode, maas-controller is included in the overlay; in operator mode, install via script.
log_info ""
log_info "MaaS Subscription Controller..."
local script_dir
script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
local project_root="$script_dir/.."
local controller_dir="$project_root/maas-controller"
local config_dir="$project_root/deployment/base/maas-controller/default"
if [[ ! -d "$controller_dir" ]]; then
log_error "maas-controller directory not found at $controller_dir — subscription controller required"
return 1
else
if [[ "$DEPLOYMENT_MODE" != "kustomize" ]]; then
log_info " Installing controller (CRDs, RBAC, deployment, default-deny policy)..."
if ! kubectl get namespace "$NAMESPACE" &>/dev/null; then
log_error "Namespace $NAMESPACE does not exist. Create it first (e.g. via ODH operator)."
return 1
fi
set_maas_controller_image
if [[ "$NAMESPACE" != "opendatahub" ]]; then
(cd "$project_root" && kustomize build deployment/base/maas-controller/default | \
sed "s/namespace: opendatahub/namespace: $NAMESPACE/g") | kubectl apply -f - || {
cleanup_maas_controller_image
log_error "Failed to apply maas-controller manifests"
return 1
}
else
kubectl apply -k "$config_dir" || {
cleanup_maas_controller_image
log_error "Failed to apply maas-controller manifests"
return 1
}
fi
cleanup_maas_controller_image
else
log_info " Controller deployed via kustomize overlay (deployment/base/maas-controller/default)"
fi
log_info " Waiting for maas-controller to be ready..."
if ! kubectl rollout status deployment/maas-controller -n "$NAMESPACE" --timeout="${ROLLOUT_TIMEOUT}s"; then
log_error "maas-controller deployment not ready (timeout: ${ROLLOUT_TIMEOUT}s)"
return 1
fi
log_info " Subscription controller ready."
log_info " Create MaaSModelRef, MaaSAuthPolicy, and MaaSSubscription to enable per-model auth and rate limiting."
# Patch controller with correct audience for HyperShift/ROSA clusters.
# The controller creates AuthPolicies with kubernetesTokenReview.audiences;
# on non-standard clusters the default audience (https://kubernetes.default.svc)
# causes Authorino token validation to fail with 401.
local cluster_aud
cluster_aud=$(get_cluster_audience 2>/dev/null || echo "")
if [[ -n "$cluster_aud" && "$cluster_aud" != "https://kubernetes.default.svc" ]]; then
log_info " Non-standard cluster audience detected: $cluster_aud"
log_info " Patching maas-controller with correct CLUSTER_AUDIENCE..."
kubectl set env deployment/maas-controller -n "$NAMESPACE" CLUSTER_AUDIENCE="$cluster_aud"
if ! kubectl rollout status deployment/maas-controller -n "$NAMESPACE" --timeout="${ROLLOUT_TIMEOUT}s"; then
log_warn "maas-controller rollout after audience patch did not complete in time (timeout: ${ROLLOUT_TIMEOUT}s)"
fi
fi
fi
log_info "==================================================="
log_info " Deployment completed successfully!"
log_info "==================================================="
}
#──────────────────────────────────────────────────────────────
# OPERATOR-BASED DEPLOYMENT
#──────────────────────────────────────────────────────────────
deploy_via_operator() {
log_info "Starting operator-based deployment..."
# Check for conflicting operators before modifying the cluster
check_conflicting_operators
# Install optional operators
install_optional_operators
# Install rate limiter component
install_policy_engine
# Install primary operator (creates namespace)
install_primary_operator
# Apply custom resources
apply_custom_resources
# Deploy PostgreSQL for API key storage (requires namespace to exist)
deploy_postgresql
# Deploy Keycloak identity provider (optional, if enabled)
if [[ "$ENABLE_KEYCLOAK" == "true" ]]; then
deploy_keycloak
fi
# Inject custom MaaS API image if specified
inject_maas_api_image_operator_mode "$NAMESPACE"
# Configure TLS backend (if enabled)
if [[ "$ENABLE_TLS_BACKEND" == "true" ]]; then
configure_tls_backend
fi
# Configure audience for non-standard clusters (Hypershift/ROSA)
configure_cluster_audience
log_info "Operator deployment completed"
}
#──────────────────────────────────────────────────────────────
# KUSTOMIZE-BASED DEPLOYMENT
#──────────────────────────────────────────────────────────────
deploy_via_kustomize() {
log_info "Starting kustomize-based deployment..."
local project_root
project_root="$(find_project_root)" || {
log_error "Could not find project root"
exit 1
}
# Install rate limiter component (RHCL or Kuadrant)
install_policy_engine
local overlay="$project_root/deployment/overlays/http-backend"
if [[ "$ENABLE_TLS_BACKEND" == "true" ]]; then
log_info "Using TLS backend overlay"
overlay="$project_root/deployment/overlays/tls-backend"
else
log_info "Using HTTP backend overlay"
fi
# Set namespace and image from script (overlay kustomization is restored on exit)
trap 'cleanup_maas_api_image; cleanup_maas_controller_image; cleanup_overlay_namespace' EXIT INT TERM
set_maas_api_image
set_maas_controller_image
set_overlay_namespace "$overlay" "$NAMESPACE"
if ! kubectl get namespace "$NAMESPACE" &>/dev/null; then
log_info "Creating namespace: $NAMESPACE"
if ! kubectl create namespace "$NAMESPACE"; then
log_error "Failed to create namespace $NAMESPACE"
return 1
fi
else
log_debug "Namespace $NAMESPACE already exists"
fi
# Note: The subscription namespace (default: models-as-a-service) is automatically
# created by maas-controller when it starts (see maas-controller/cmd/manager/main.go).
# We only set the variable here for use in manifest patching below.
local subscription_namespace="${MAAS_SUBSCRIPTION_NAMESPACE:-models-as-a-service}"
# Deploy PostgreSQL for API key storage (requires namespace to exist)
deploy_postgresql
# Deploy Keycloak identity provider (optional, if enabled)
if [[ "$ENABLE_KEYCLOAK" == "true" ]]; then
deploy_keycloak
fi
log_info "Applying kustomize manifests..."
# Patch the maas-api URL placeholder with actual namespace
# Patch MAAS_SUBSCRIPTION_NAMESPACE env var with the configured subscription namespace
kubectl apply --server-side=true --force-conflicts="$KUSTOMIZE_FORCE_CONFLICTS" -f <(
kustomize build "$overlay" | \
sed "s/maas-api\.placehold\.svc/maas-api.$NAMESPACE.svc/g" | \
perl -pe 'BEGIN{undef $/;} s/(name: MAAS_SUBSCRIPTION_NAMESPACE\n\s+value: ")[^"]*"/${1}'"$subscription_namespace"'"/smg'
)
# Apply gateway policies separately so they stay in openshift-ingress (overlay
# namespace would otherwise overwrite them to $NAMESPACE)
local policies_dir="$project_root/deployment/base/maas-controller/policies"
if [[ -d "$policies_dir" ]]; then
log_info "Applying gateway policies (openshift-ingress)..."
kubectl apply --server-side=true --force-conflicts="$KUSTOMIZE_FORCE_CONFLICTS" -f <(kustomize build "$policies_dir")
fi
# Configure TLS backend (if enabled)
if [[ "$ENABLE_TLS_BACKEND" == "true" ]]; then
configure_tls_backend
fi
# Patch the live AuthPolicy after kustomize apply so OIDC and API key
# behavior matches operator mode when configured.
configure_maas_api_authpolicy
# Configure audience for non-standard clusters (HyperShift/ROSA)
configure_cluster_audience
log_info "Kustomize deployment completed"
}
#──────────────────────────────────────────────────────────────
# POSTGRESQL DEPLOYMENT
#──────────────────────────────────────────────────────────────
deploy_postgresql() {
NAMESPACE="$NAMESPACE" "${SCRIPT_DIR}/setup-database.sh"
}
#──────────────────────────────────────────────────────────────
# KEYCLOAK DEPLOYMENT
#──────────────────────────────────────────────────────────────
deploy_keycloak() {
log_info "Deploying Keycloak identity provider for external OIDC support..."
"${SCRIPT_DIR}/setup-keycloak.sh"
}
#──────────────────────────────────────────────────────────────
# OPTIONAL OPERATORS (cert-manager, LWS)
#──────────────────────────────────────────────────────────────
install_optional_operators() {
log_info "Installing optional operators in parallel..."
local data_dir="${SCRIPT_DIR}/data"
# Apply both subscriptions in parallel (they're independent)
log_info "Applying cert-manager and LeaderWorkerSet subscriptions..."
kubectl apply -f "${data_dir}/cert-manager-subscription.yaml" &
local cert_manager_pid=$!
kubectl apply -f "${data_dir}/lws-subscription.yaml" &
local lws_pid=$!
# Wait for both apply commands to complete and capture individual exit codes
local cert_manager_apply_rc=0
local lws_apply_rc=0
wait $cert_manager_pid || cert_manager_apply_rc=$?
wait $lws_pid || lws_apply_rc=$?
if [[ $cert_manager_apply_rc -ne 0 ]]; then
log_error "Failed to apply cert-manager subscription (exit code: $cert_manager_apply_rc)"
return 1
fi
if [[ $lws_apply_rc -ne 0 ]]; then
log_error "Failed to apply LWS subscription (exit code: $lws_apply_rc)"
return 1
fi
# Wait for both subscriptions to be installed (can run in parallel too)
log_info "Waiting for operators to be installed..."
waitsubscriptioninstalled "cert-manager-operator" "openshift-cert-manager-operator" &
local cert_wait_pid=$!
waitsubscriptioninstalled "openshift-lws-operator" "leader-worker-set" &
local lws_wait_pid=$!
# Wait for both to complete and capture individual exit codes
local cert_wait_rc=0
local lws_wait_rc=0
wait $cert_wait_pid || cert_wait_rc=$?
wait $lws_wait_pid || lws_wait_rc=$?
if [[ $cert_wait_rc -ne 0 ]]; then
log_error "cert-manager operator installation failed"
return 1
fi
if [[ $lws_wait_rc -ne 0 ]]; then
log_error "LWS operator installation failed"
return 1
fi
# Create LeaderWorkerSetOperator CR to activate the LWS controller-manager.
# The operator subscription alone only installs the operator pod; the CR is
# required to actually deploy the LWS API (controller-manager pods).
# See: https://docs.redhat.com/en/documentation/openshift_container_platform/latest/html/ai_workloads/leader-worker-set-operator
log_info "Activating LeaderWorkerSet API..."
kubectl apply -f "${data_dir}/lws-operator-cr.yaml"
log_info "Optional operators installed"
}
#──────────────────────────────────────────────────────────────
# RATE LIMITER INSTALLATION
#──────────────────────────────────────────────────────────────
# Patch Kuadrant/RHCL CSV to recognize OpenShift Gateway controller
# This is required because Kuadrant needs to know about the Gateway API provider
# Without this patch, Kuadrant shows "MissingDependency" and AuthPolicies won't be enforced
patch_kuadrant_csv_for_gateway() {
local namespace=$1
local operator_prefix=$2
log_info "Patching $operator_prefix CSV for OpenShift Gateway controller..."
# Find the CSV
local csv_name
csv_name=$(kubectl get csv -n "$namespace" --no-headers 2>/dev/null | grep "^${operator_prefix}" | awk '{print $1}' | head -1)
if [[ -z "$csv_name" ]]; then
log_warn "Could not find CSV for $operator_prefix in $namespace, skipping Gateway controller patch"
return 0
fi
# Check if ISTIO_GATEWAY_CONTROLLER_NAMES already has both values
local current_value
current_value=$(kubectl get csv "$csv_name" -n "$namespace" -o jsonpath='{.spec.install.spec.deployments[0].spec.template.spec.containers[0].env[?(@.name=="ISTIO_GATEWAY_CONTROLLER_NAMES")].value}' 2>/dev/null || echo "")
if [[ "$current_value" == *"istio.io/gateway-controller"* && "$current_value" == *"openshift.io/gateway-controller"* ]]; then
log_debug "CSV already has correct ISTIO_GATEWAY_CONTROLLER_NAMES value"
return 0
fi
# Find the index of ISTIO_GATEWAY_CONTROLLER_NAMES env var
local env_index
env_index=$(kubectl get csv "$csv_name" -n "$namespace" -o json | jq '.spec.install.spec.deployments[0].spec.template.spec.containers[0].env | to_entries | .[] | select(.value.name=="ISTIO_GATEWAY_CONTROLLER_NAMES") | .key' 2>/dev/null || echo "")
if [[ -z "$env_index" ]]; then
# Env var doesn't exist, add it
log_debug "Adding ISTIO_GATEWAY_CONTROLLER_NAMES to CSV"
kubectl patch csv "$csv_name" -n "$namespace" --type='json' -p='[
{
"op": "add",
"path": "/spec/install/spec/deployments/0/spec/template/spec/containers/0/env/-",
"value": {
"name": "ISTIO_GATEWAY_CONTROLLER_NAMES",
"value": "istio.io/gateway-controller,openshift.io/gateway-controller/v1"
}
}
]' 2>/dev/null || log_warn "Failed to add ISTIO_GATEWAY_CONTROLLER_NAMES to CSV"
else
# Env var exists, update it
log_debug "Updating ISTIO_GATEWAY_CONTROLLER_NAMES in CSV (index: $env_index)"
kubectl patch csv "$csv_name" -n "$namespace" --type='json' -p="[
{
\"op\": \"replace\",
\"path\": \"/spec/install/spec/deployments/0/spec/template/spec/containers/0/env/${env_index}/value\",
\"value\": \"istio.io/gateway-controller,openshift.io/gateway-controller/v1\"
}
]" 2>/dev/null || log_warn "Failed to update ISTIO_GATEWAY_CONTROLLER_NAMES in CSV"
fi
log_info "CSV patched for OpenShift Gateway controller"
# CRITICAL: Force delete the operator pod to pick up the new env var
# OLM updates the deployment spec but doesn't always trigger a pod restart
# The operator must have ISTIO_GATEWAY_CONTROLLER_NAMES set BEFORE Kuadrant CR is created
log_info "Forcing operator restart to apply new Gateway controller configuration..."
# The kuadrant operator deployment is always named kuadrant-operator-controller-manager
# regardless of whether we're using rhcl-operator or kuadrant-operator
local operator_deployment="kuadrant-operator-controller-manager"
if kubectl get deployment "$operator_deployment" -n "$namespace" &>/dev/null; then
# Force delete the operator pod - this ensures the new env var is picked up
kubectl delete pod -n "$namespace" -l control-plane=controller-manager --force --grace-period=0 2>/dev/null || \
kubectl delete pod -n "$namespace" -l app.kubernetes.io/name=kuadrant-operator --force --grace-period=0 2>/dev/null || \
kubectl delete pod -n "$namespace" -l app=kuadrant --force --grace-period=0 2>/dev/null || true
# Wait for the new pod to be ready
log_info "Waiting for operator pod to restart..."
sleep 5
kubectl rollout status deployment/"$operator_deployment" -n "$namespace" --timeout="${ROLLOUT_TIMEOUT}s" 2>/dev/null || \
log_warn "Operator rollout status check timed out (timeout: ${ROLLOUT_TIMEOUT}s)"
# Verify the env var is in the RUNNING pod
local pod_env
pod_env=$(kubectl exec -n "$namespace" deployment/"$operator_deployment" -- env 2>/dev/null | grep ISTIO_GATEWAY_CONTROLLER_NAMES || echo "")
if [[ "$pod_env" == *"openshift.io/gateway-controller/v1"* ]]; then
log_info "Operator pod is running with OpenShift Gateway controller configuration"
else
log_warn "Operator pod may not have correct env yet: $pod_env"
fi
# Give the operator time to fully initialize with the new Gateway controller configuration
# This is critical - the operator needs to register as a Gateway controller before Kuadrant CR is created
log_info "Waiting 15s for operator to fully initialize with Gateway controller configuration..."
sleep 15
else
log_warn "Could not find operator deployment, waiting 60s for env propagation"
sleep 60
fi
}
install_policy_engine() {
log_info "Installing policy engine: $POLICY_ENGINE"
case "$POLICY_ENGINE" in
rhcl)
log_info "Installing RHCL (Red Hat Connectivity Link - downstream)"
install_olm_operator \
"rhcl-operator" \
"rh-connectivity-link" \
"redhat-operators" \
"stable" \
"" \
"AllNamespaces" \
"" \
""
# Patch RHCL CSV to recognize OpenShift Gateway controller
patch_kuadrant_csv_for_gateway "rh-connectivity-link" "rhcl-operator"
# Apply RHCL/Kuadrant custom resource
apply_kuadrant_cr "rh-connectivity-link"
;;
kuadrant)
log_info "Installing Kuadrant v1.3.1 (upstream community)"
# Create custom catalog for upstream Kuadrant v1.3.1
# This version provides AuthPolicy v1 API required by ODH
local kuadrant_catalog="kuadrant-operator-catalog"
local kuadrant_ns="kuadrant-system"
log_info "Creating Kuadrant v1.3.1 catalog source..."
kubectl create namespace "$kuadrant_ns" 2>/dev/null || true
cat <<EOF | kubectl apply -f -
apiVersion: operators.coreos.com/v1alpha1
kind: CatalogSource
metadata:
name: $kuadrant_catalog
namespace: $kuadrant_ns
spec:
sourceType: grpc
image: quay.io/kuadrant/kuadrant-operator-catalog:v1.3.1
displayName: Kuadrant Operator Catalog
publisher: Kuadrant
updateStrategy:
registryPoll:
interval: 45m
EOF
# Wait for catalog to be ready
log_info "Waiting for Kuadrant catalog to be ready..."
sleep 10
# Create OperatorGroup for Kuadrant
cat <<EOF | kubectl apply -f -
apiVersion: operators.coreos.com/v1
kind: OperatorGroup
metadata:
name: kuadrant-operator-group
namespace: $kuadrant_ns
spec: {}
EOF
# Install Kuadrant operator from the custom catalog
# IMPORTANT: source_namespace must match where CatalogSource was created (kuadrant_ns)
install_olm_operator \
"kuadrant-operator" \
"$kuadrant_ns" \
"$kuadrant_catalog" \
"stable" \
"" \
"AllNamespaces" \
"$kuadrant_ns" \
""
# Patch Kuadrant CSV to recognize OpenShift Gateway controller
patch_kuadrant_csv_for_gateway "$kuadrant_ns" "kuadrant-operator"
# Apply Kuadrant custom resource
apply_kuadrant_cr "$kuadrant_ns"
;;
esac
}
#──────────────────────────────────────────────────────────────
# PRIMARY OPERATOR INSTALLATION
#──────────────────────────────────────────────────────────────
check_conflicting_operators() {
log_info "Checking if there are any conflicting operators..."
local conflicting_operator
if [[ "$OPERATOR_TYPE" == "odh" ]]; then
conflicting_operator="rhods-operator"
else
conflicting_operator="opendatahub-operator"
fi
# Check all namespaces for a conflicting subscription
local conflict
conflict=$(oc get subscription.operators.coreos.com --all-namespaces --no-headers 2>/dev/null | grep -w "$conflicting_operator" | head -n1 || true)
if [[ -n "$conflict" ]]; then
local ns
ns=$(echo "$conflict" | awk '{print $1}')
if [[ -z "$ns" ]]; then
log_error "Conflicting operator '$conflicting_operator' detected but could not determine its namespace"
return 1
fi
log_error "Conflicting operator found: $conflicting_operator in namespace $ns. ODH and RHOAI operators cannot coexist (they manage the same CRDs)."
log_info "Remove the conflicting operator before proceeding (suggested steps):"
log_info " 1. Delete custom resources: oc delete datasciencecluster --all && oc delete dscinitializations --all"
log_info " 2. Delete subscription: oc delete subscription.operators.coreos.com $conflicting_operator -n $ns"
log_info " 3. Delete CSV: oc delete csv -n $ns -l operators.coreos.com/$conflicting_operator"
log_info " 4. Try uninstalling $conflicting_operator (can be done via a console as well) before attempting to run deploy.sh again."
log_info " 5. Sanity check: delete any lingering operator groups, old namespaces and projects."
log_error "Quit the execution of the script. You may try re-running again."
return 1
fi
log_info "No conflicting operators found. Proceeding to installing the primary operator."
}
#──────────────────────────────────────────────────────────────
# PRIMARY OPERATOR INSTALLATION
#──────────────────────────────────────────────────────────────
install_primary_operator() {
log_info "Installing primary operator: $OPERATOR_TYPE"