Skip to content

Commit c4ffebb

Browse files
VaishnaviHireYauheni Kaliuta
andauthored
Sync odh-2.12 (#1011)
* Update version to v2.12.0 (#1007) * upgrade: retry if default DSCI creation fails (#1008) After removing leader election, operator fails to start if it is instructed to create default DSCI. Looks like webhook is not ready by the time: ``` create default DSCI CR. {"level":"error","ts":"2024-05-13T09:25:58Z","logger":"setup","msg":"unable to create initial setup for the operator","error":"Internal error occurred: failed calling webhook \"operator.opendatahub.io\": failed to call webhook: Post \"https://opendatahub-operator-controller-manager-service.oo-2ts9m.svc:443/validate-opendatahub-io-v1?timeout=10s\": no endpoints available for service \"opendatahub-operator-controller-manager-service\"","stacktrace":"main.main.func1\n\t/workspace/main.go:200\nsigs.k8s.io/controller-runtime/pkg/manager.RunnableFunc.Start\n\t/remote-source/operator/deps/gomod/pkg/mod/sigs.k8s.io/controller-runtime@v0.14.6/pkg/manager/manager.go:336\nsigs.k8s.io/controller-runtime/pkg/manager.(*runnableGroup).reconcile.func1\n\t/remote-source/operator/deps/gomod/pkg/mod/sigs.k8s.io/controller-runtime@v0.14.6/pkg/manager/runnable_group.go:219"} ``` Leader election added some delay. The problem does not happen in default configuration since it explicitly disables DSCI creation in the manifests: ``` containers: - command: - /manager env: - name: DISABLE_DSC_CONFIG value: 'true' args: - --operator-name=opendatahub image: controller:latest ``` Make a wrapper function cluster.CreateWithRetry for client.Object creation with timeout. Use hardcoded 5s interval, just seems reasonable, and timeout in minutes as the parameter. It requires disable linter nilerr since for the polling function error in creation is a valid condition, something the function wait to disappear. Fixes: 3610b0b ("feat: remove leader election for operator (#1000)") Signed-off-by: Yauheni Kaliuta <ykaliuta@redhat.com> --------- Signed-off-by: Yauheni Kaliuta <ykaliuta@redhat.com> Co-authored-by: Yauheni Kaliuta <ykaliuta@redhat.com>
1 parent 1abe316 commit c4ffebb

File tree

5 files changed

+23
-10
lines changed

5 files changed

+23
-10
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# To re-generate a bundle for another specific version without changing the standard setup, you can:
44
# - use the VERSION as arg of the bundle target (e.g make bundle VERSION=0.0.2)
55
# - use environment variables to overwrite this value (e.g export VERSION=0.0.2)
6-
VERSION ?= 2.10.1
6+
VERSION ?= 2.12.0
77
# IMAGE_TAG_BASE defines the opendatahub.io namespace and part of the image name for remote images.
88
# This variable is used to construct full image tags for bundle and catalog images.
99
#

bundle/manifests/opendatahub-operator.clusterserviceversion.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -101,14 +101,14 @@ metadata:
101101
capabilities: Full Lifecycle
102102
categories: AI/Machine Learning, Big Data
103103
certified: "False"
104-
containerImage: quay.io/opendatahub/opendatahub-operator:v2.10.0
104+
containerImage: quay.io/opendatahub/opendatahub-operator:v2.12.0
105105
createdAt: "2024-4-22T00:00:00Z"
106-
olm.skipRange: '>=1.0.0 <2.11.0'
106+
olm.skipRange: '>=1.0.0 <2.12.0'
107107
operators.operatorframework.io/builder: operator-sdk-v1.24.1
108108
operators.operatorframework.io/internal-objects: '[dscinitialization.opendatahub.io]'
109109
operators.operatorframework.io/project_layout: go.kubebuilder.io/v3
110110
repository: https://github.com/opendatahub-io/opendatahub-operator
111-
name: opendatahub-operator.v2.10.1
111+
name: opendatahub-operator.v2.12.0
112112
namespace: placeholder
113113
spec:
114114
apiservicedefinitions: {}
@@ -1763,7 +1763,7 @@ spec:
17631763
selector:
17641764
matchLabels:
17651765
component: opendatahub-operator
1766-
version: 2.10.1
1766+
version: 2.12.0
17671767
webhookdefinitions:
17681768
- admissionReviewVersions:
17691769
- v1

config/manifests/bases/opendatahub-operator.clusterserviceversion.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,12 @@ metadata:
66
capabilities: Full Lifecycle
77
categories: AI/Machine Learning, Big Data
88
certified: "False"
9-
containerImage: quay.io/opendatahub/opendatahub-operator:v2.10.0
9+
containerImage: quay.io/opendatahub/opendatahub-operator:v2.12.0
1010
createdAt: "2024-4-22T00:00:00Z"
11-
olm.skipRange: '>=1.0.0 <2.11.0'
11+
olm.skipRange: '>=1.0.0 <2.12.0'
1212
operators.operatorframework.io/internal-objects: '[dscinitialization.opendatahub.io]'
1313
repository: https://github.com/opendatahub-io/opendatahub-operator
14-
name: opendatahub-operator.v2.11.0
14+
name: opendatahub-operator.v2.12.0
1515
namespace: placeholder
1616
spec:
1717
apiservicedefinitions: {}
@@ -105,4 +105,4 @@ spec:
105105
selector:
106106
matchLabels:
107107
component: opendatahub-operator
108-
version: 2.11.0
108+
version: 2.12.0

pkg/cluster/resources.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,3 +174,16 @@ func WaitForDeploymentAvailable(ctx context.Context, c client.Client, componentN
174174
return true, nil
175175
})
176176
}
177+
178+
func CreateWithRetry(ctx context.Context, cli client.Client, obj client.Object, timeoutMin int) error {
179+
interval := time.Second * 5 // arbitrary value
180+
timeout := time.Duration(timeoutMin) * time.Minute
181+
182+
return wait.PollUntilContextTimeout(ctx, interval, timeout, true, func(ctx context.Context) (bool, error) {
183+
err := cli.Create(ctx, obj)
184+
if err != nil {
185+
return false, nil //nolint:nilerr
186+
}
187+
return true, nil
188+
})
189+
}

pkg/upgrade/upgrade.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,7 @@ func CreateDefaultDSCI(ctx context.Context, cli client.Client, _ cluster.Platfor
166166
return nil
167167
case len(instances.Items) == 0:
168168
fmt.Println("create default DSCI CR.")
169-
err := cli.Create(ctx, defaultDsci)
169+
err := cluster.CreateWithRetry(ctx, cli, defaultDsci, 1) // 1 min timeout
170170
if err != nil {
171171
return err
172172
}

0 commit comments

Comments
 (0)