Skip to content

Commit 99ad9fb

Browse files
committed
MGMT-20233: Improve recomputation of operator dependencies
* Run all db changes within a transaction * Call to EnsureOperatorPrerequisite * Reset auto-assign roles if anything changed
1 parent 4eb6717 commit 99ad9fb

File tree

4 files changed

+87
-57
lines changed

4 files changed

+87
-57
lines changed

internal/bminventory/inventory.go

+2
Original file line numberDiff line numberDiff line change
@@ -3121,6 +3121,8 @@ func (b *bareMetalInventory) updateClusterCPUFeatureUsage(cpuArchitecture string
31213121
}
31223122
}
31233123

3124+
// This code is very similar to internal/cluster/refresh_status_preprocessor.go:recalculateOperatorDependencies
3125+
// TODO: Refactor this to a common place if possible
31243126
func (b *bareMetalInventory) updateOperatorsData(ctx context.Context, cluster *common.Cluster, params installer.V2UpdateClusterParams, usages map[string]models.Usage, db *gorm.DB, log logrus.FieldLogger) error {
31253127
if params.ClusterUpdateParams.OlmOperators == nil {
31263128
return nil

internal/cluster/refresh_status_preprocessor.go

+78-55
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,9 @@ import (
1818
operatorcommon "github.com/openshift/assisted-service/internal/operators/common"
1919
"github.com/openshift/assisted-service/internal/usage"
2020
"github.com/openshift/assisted-service/models"
21-
"github.com/pkg/errors"
2221
"github.com/sirupsen/logrus"
2322
"github.com/thoas/go-funk"
23+
"gorm.io/gorm"
2424
)
2525

2626
type ValidationResult struct {
@@ -72,7 +72,7 @@ func (r *refreshPreprocessor) preprocess(ctx context.Context, c *clusterPreproce
7272
if c.cluster != nil {
7373
ignoredValidations, err = common.DeserializeJSONList(c.cluster.IgnoredClusterValidations)
7474
if err != nil {
75-
return nil, nil, errors.Wrap(err, fmt.Sprintf("Unable to deserialize ignored cluster validations for cluster %s", string(*c.cluster.ID)))
75+
return nil, nil, fmt.Errorf("unable to deserialize ignored cluster validations for cluster %s: %w", c.cluster.ID.String(), err)
7676
}
7777
}
7878

@@ -102,7 +102,7 @@ func (r *refreshPreprocessor) preprocess(ctx context.Context, c *clusterPreproce
102102
// dependency, and then we will need to validate that secure boot is disabled.
103103
err = r.recalculateOperatorDependencies(ctx, c)
104104
if err != nil {
105-
err = errors.Wrapf(err, "failed to recalculate operator dependencies for cluster '%s'", c.clusterId)
105+
err = fmt.Errorf("failed to recalculate operator dependencies for cluster '%s': %w", c.clusterId, err)
106106
return nil, nil, err
107107
}
108108

@@ -151,72 +151,97 @@ func (r *refreshPreprocessor) preprocess(ctx context.Context, c *clusterPreproce
151151
func (r *refreshPreprocessor) recalculateOperatorDependencies(ctx context.Context, c *clusterPreprocessContext) error {
152152
// Calculate and save the operators that have been added, updated or deleted:
153153
operatorsBeforeResolve := c.cluster.MonitoredOperators
154+
154155
operatorsAfterResolve, err := r.operatorsAPI.ResolveDependencies(c.cluster, c.cluster.MonitoredOperators)
155156
if err != nil {
156-
return errors.Wrapf(
157-
err,
158-
"failed to resolve operator dependencies for cluster '%s'",
159-
c.clusterId,
160-
)
157+
return fmt.Errorf("failed to resolve operator dependencies: %w", err)
161158
}
159+
162160
var addedOperators, updatedOperators, deletedOperators []*models.MonitoredOperator
161+
163162
for _, operatorAfterResolve := range operatorsAfterResolve {
164163
if operatorAfterResolve.ClusterID == "" {
165164
operatorAfterResolve.ClusterID = c.clusterId
166165
}
167-
operatorBeforeREsolve := operatorcommon.GetOperator(operatorsBeforeResolve, operatorAfterResolve.Name)
168-
if operatorBeforeREsolve != nil {
169-
if !reflect.DeepEqual(operatorAfterResolve, operatorBeforeREsolve) {
166+
167+
operatorBeforeResolve := operatorcommon.GetOperator(operatorsBeforeResolve, operatorAfterResolve.Name)
168+
if operatorBeforeResolve != nil {
169+
if !reflect.DeepEqual(operatorAfterResolve, operatorBeforeResolve) {
170170
updatedOperators = append(updatedOperators, operatorAfterResolve)
171171
}
172172
} else {
173173
addedOperators = append(addedOperators, operatorAfterResolve)
174174
}
175175
}
176+
176177
for _, operatorBeforeResolve := range operatorsBeforeResolve {
177178
if !operatorcommon.HasOperator(operatorsAfterResolve, operatorBeforeResolve.Name) {
178179
deletedOperators = append(deletedOperators, operatorBeforeResolve)
179180
}
180181
}
181-
for _, addedOperator := range addedOperators {
182-
err = c.db.Save(addedOperator).Error
183-
if err != nil {
184-
return errors.Wrapf(
185-
err,
186-
"failed to add operator '%s' to cluster '%s'",
187-
addedOperator.Name, *c.cluster.ID,
188-
)
189-
}
182+
183+
// If nothing changed, nothing needs to be done
184+
if len(addedOperators) == 0 && len(deletedOperators) == 0 && len(updatedOperators) == 0 {
185+
return nil
190186
}
191-
for _, updatedOperator := range updatedOperators {
192-
err = c.db.Save(updatedOperator).Error
193-
if err != nil {
194-
return errors.Wrapf(
195-
err,
196-
"failed to update operator '%s' for cluster '%s'",
197-
updatedOperator.Name, *c.cluster.ID,
198-
)
199-
}
187+
188+
// Validate with cluster CPU architecture
189+
err = r.operatorsAPI.EnsureOperatorPrerequisite(c.cluster, c.cluster.OpenshiftVersion, c.cluster.CPUArchitecture, operatorsAfterResolve)
190+
if err != nil {
191+
return fmt.Errorf("failed to validate operator prerequisite: %w", err)
200192
}
201-
for _, deletedOperator := range deletedOperators {
202-
err = c.db.Delete(deletedOperator).Error
193+
194+
c.cluster.MonitoredOperators = operatorsAfterResolve
195+
196+
err = c.db.Transaction(func(tx *gorm.DB) error {
197+
for _, addedOperator := range addedOperators {
198+
err = tx.Save(addedOperator).Error
199+
if err != nil {
200+
return fmt.Errorf("failed to add operator '%s': %w", addedOperator.Name, err)
201+
}
202+
}
203+
204+
for _, updatedOperator := range updatedOperators {
205+
err = tx.Save(updatedOperator).Error
206+
if err != nil {
207+
return fmt.Errorf("failed to update operator '%s': %w", updatedOperator.Name, err)
208+
}
209+
}
210+
211+
for _, deletedOperator := range deletedOperators {
212+
err = tx.Delete(deletedOperator).Error
213+
if err != nil {
214+
return fmt.Errorf("failed to delete operator '%s': %w", deletedOperator.Name, err)
215+
}
216+
}
217+
218+
// If any operator has been added or deleted then we need to update the corresponding feature usage
219+
if len(addedOperators) > 0 || len(deletedOperators) > 0 {
220+
err = r.recalculateOperatorFeatureUsage(c, tx, addedOperators, deletedOperators)
221+
if err != nil {
222+
return fmt.Errorf("failed to recalculate operator feature usage: %w", err)
223+
}
224+
}
225+
226+
// If any operator has been added, updated or deleted then we might need to retrigger auto-assign role calculation.
227+
// This is needed because operators may affect the role assignment logic.
228+
var resetHostRoles int
229+
resetHostRoles, err = common.ResetAutoAssignRoles(tx, c.clusterId)
203230
if err != nil {
204-
return errors.Wrapf(
205-
err,
206-
"failed to delete operator '%s' from cluster '%s'",
207-
deletedOperator.Name,
208-
c.clusterId,
209-
)
231+
return fmt.Errorf("failed to reset auto assign role: %w", err)
210232
}
233+
234+
r.log.Infof("resetting auto-assign roles on cluster %s after operator setup has changed: %d hosts affected", c.clusterId.String(), resetHostRoles)
235+
236+
return nil
237+
})
238+
239+
if err != nil {
240+
return fmt.Errorf("transaction to update monitored operators, the associated usage and reset roles failed: %w", err)
211241
}
212-
c.cluster.MonitoredOperators = operatorsAfterResolve
213242

214-
// If any operator has been added or deleted then we need to update the corresponding feature usage:
243+
// If everything went smoothly, notify about the change
215244
if len(addedOperators) > 0 || len(deletedOperators) > 0 {
216-
err = r.recalculateOperatorFeatureUsage(c, addedOperators, deletedOperators)
217-
if err != nil {
218-
return err
219-
}
220245
err = r.notifyOperatorFeatureUsageChange(ctx, c, addedOperators, deletedOperators)
221246
if err != nil {
222247
return err
@@ -226,37 +251,35 @@ func (r *refreshPreprocessor) recalculateOperatorDependencies(ctx context.Contex
226251
return nil
227252
}
228253

229-
func (r *refreshPreprocessor) recalculateOperatorFeatureUsage(c *clusterPreprocessContext,
254+
func (r *refreshPreprocessor) recalculateOperatorFeatureUsage(c *clusterPreprocessContext, db *gorm.DB,
230255
addedOperators, deletedOperators []*models.MonitoredOperator) error {
231256
if r.usageAPI == nil {
232257
return nil
233258
}
259+
234260
usages, err := usage.Unmarshal(c.cluster.FeatureUsage)
235261
if err != nil {
236-
return errors.Wrapf(
237-
err,
238-
"failed to read feature usage from cluster '%s'",
239-
c.clusterId,
240-
)
262+
return fmt.Errorf("failed to read feature usage: %w", err)
241263
}
264+
242265
for _, addedOperator := range addedOperators {
243266
featureName := strings.ToUpper(addedOperator.Name)
244267
r.usageAPI.Add(usages, featureName, nil)
245268
}
269+
246270
for _, deletedOperator := range deletedOperators {
247271
featureName := strings.ToUpper(deletedOperator.Name)
248272
r.usageAPI.Remove(usages, featureName)
249273
}
274+
250275
data, err := json.Marshal(usages)
251276
if err != nil {
252-
return errors.Wrapf(
253-
err,
254-
"failed to write feature usage to cluster '%s'",
255-
c.clusterId,
256-
)
277+
return fmt.Errorf("failed to write feature usage: %w", err)
257278
}
279+
258280
c.cluster.FeatureUsage = string(data)
259-
r.usageAPI.Save(c.db, c.clusterId, usages)
281+
r.usageAPI.Save(db, c.clusterId, usages)
282+
260283
return nil
261284
}
262285

internal/cluster/refresh_status_preprocessor_test.go

+7-1
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,10 @@ var _ = Describe("Cluster Refresh Status Preprocessor", func() {
118118
mockOperatorManager.EXPECT().ValidateCluster(gomock.Any(), gomock.Any()).Return(nil, nil).AnyTimes()
119119
}
120120

121+
mockOperatorEnsureOperatorPrerequisiteSuccess := func() {
122+
mockOperatorManager.EXPECT().EnsureOperatorPrerequisite(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).Times(1)
123+
}
124+
121125
Context("Skipping Validations", func() {
122126

123127
cantBeIgnored := common.NonIgnorableClusterValidations
@@ -149,7 +153,7 @@ var _ = Describe("Cluster Refresh Status Preprocessor", func() {
149153
validationContext.cluster.IgnoredClusterValidations = "bad JSON"
150154
_, _, err := preprocessor.preprocess(ctx, validationContext)
151155
Expect(err).To(HaveOccurred())
152-
Expect(err.Error()).To(ContainSubstring("Unable to deserialize ignored cluster validations"))
156+
Expect(err.Error()).To(ContainSubstring("unable to deserialize ignored cluster validations"))
153157
})
154158

155159
It("Should allow permitted ignorable validations to be ignored", func() {
@@ -215,6 +219,8 @@ var _ = Describe("Cluster Refresh Status Preprocessor", func() {
215219
},
216220
)
217221
mockOperatorValidationsSuccess()
222+
mockOperatorEnsureOperatorPrerequisiteSuccess()
223+
218224
_, _, err := preprocessor.preprocess(ctx, validationContext)
219225
Expect(err).ToNot(HaveOccurred())
220226

internal/operators/common/common.go

-1
Original file line numberDiff line numberDiff line change
@@ -126,4 +126,3 @@ func GetOperator(operators []*models.MonitoredOperator, operatorName string) *mo
126126
}
127127
return nil
128128
}
129-

0 commit comments

Comments
 (0)