Skip to content

Commit 4eb6717

Browse files
jhernandpastequo
authored andcommitted
NO-ISSUE: Recalculate operator dependencies before validations
Currently operator dependencies are only calculated when a cluster is created or updated. But certain dependencies are dynamic, and may change when new hosts are added. For example, if a cluster has the OpenShift AI operator installed, it will also require the NVIDIA GPU operator only if there are hosts that have NVIDIA GPUs. To support those dynamic dependencies this patch modifies the cluster monitor so that it recalculates the operator dependencies before checking validations. Signed-off-by: Juan Hernandez <[email protected]> (cherry picked from commit 5371230)
1 parent 7e3ad83 commit 4eb6717

File tree

13 files changed

+549
-95
lines changed

13 files changed

+549
-95
lines changed

cmd/main.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -458,7 +458,7 @@ func main() {
458458
manifestsGenerator := network.NewManifestsGenerator(manifestsApi, Options.ManifestsGeneratorConfig, db)
459459
clusterApi := cluster.NewManager(Options.ClusterConfig, log.WithField("pkg", "cluster-state"), db,
460460
notificationStream, eventsHandler, uploadClient, hostApi, metricsManager, manifestsGenerator, lead, operatorsManager,
461-
ocmClient, objectHandler, dnsApi, authHandler, manifestsApi, Options.EnableSoftTimeouts)
461+
ocmClient, objectHandler, dnsApi, authHandler, manifestsApi, Options.EnableSoftTimeouts, usageManager)
462462
infraEnvApi := infraenv.NewManager(log.WithField("pkg", "host-state"), db, objectHandler)
463463

464464
clusterEventsUploader := thread.New(

internal/bminventory/inventory_test.go

+52-29
Large diffs are not rendered by default.

internal/cluster/cluster.go

+4-2
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ import (
3232
"github.com/openshift/assisted-service/internal/operators"
3333
"github.com/openshift/assisted-service/internal/stream"
3434
"github.com/openshift/assisted-service/internal/uploader"
35+
"github.com/openshift/assisted-service/internal/usage"
3536
"github.com/openshift/assisted-service/models"
3637
"github.com/openshift/assisted-service/pkg/auth"
3738
"github.com/openshift/assisted-service/pkg/commonutils"
@@ -176,7 +177,8 @@ type Manager struct {
176177
func NewManager(cfg Config, log logrus.FieldLogger, db *gorm.DB, stream stream.Notifier, eventsHandler eventsapi.Handler,
177178
uploadClient uploader.Client, hostAPI host.API, metricApi metrics.API, manifestsGeneratorAPI network.ManifestsGeneratorAPI,
178179
leaderElector leader.Leader, operatorsApi operators.API, ocmClient *ocm.Client, objectHandler s3wrapper.API,
179-
dnsApi dns.DNSApi, authHandler auth.Authenticator, manifestApi manifestsapi.ManifestsAPI, softTimeoutsEnabled bool) *Manager {
180+
dnsApi dns.DNSApi, authHandler auth.Authenticator, manifestApi manifestsapi.ManifestsAPI, softTimeoutsEnabled bool,
181+
usageApi usage.API) *Manager {
180182
th := &transitionHandler{
181183
log: log,
182184
db: db,
@@ -198,7 +200,7 @@ func NewManager(cfg Config, log logrus.FieldLogger, db *gorm.DB, stream stream.N
198200
sm: NewClusterStateMachine(th),
199201
metricAPI: metricApi,
200202
manifestsGeneratorAPI: manifestsGeneratorAPI,
201-
rp: newRefreshPreprocessor(log, hostAPI, operatorsApi),
203+
rp: newRefreshPreprocessor(log, hostAPI, operatorsApi, usageApi, eventsHandler),
202204
hostAPI: hostAPI,
203205
leaderElector: leaderElector,
204206
prevMonitorInvokedAt: time.Now(),

internal/cluster/cluster_test.go

+81-32
Large diffs are not rendered by default.

internal/cluster/progress_test.go

+7-1
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,13 @@ var _ = Describe("Progress bar test", func() {
5050
mockOperatorApi = operators.NewMockAPI(ctrl)
5151
mockDnsApi = dns.NewMockDNSApi(ctrl)
5252
clusterApi = NewManager(getDefaultConfig(), common.GetTestLog().WithField("pkg", "cluster-monitor"), db, commontesting.GetDummyNotificationStream(ctrl),
53-
mockEvents, nil, mockHostAPI, mockMetric, nil, nil, mockOperatorApi, nil, nil, mockDnsApi, nil, nil, false)
53+
mockEvents, nil, mockHostAPI, mockMetric, nil, nil, mockOperatorApi, nil, nil, mockDnsApi, nil, nil, false, nil)
54+
55+
mockOperatorApi.EXPECT().ResolveDependencies(gomock.Any(), gomock.Any()).DoAndReturn(
56+
func(_ *common.Cluster, previousOperators []*models.MonitoredOperator) ([]*models.MonitoredOperator, error) {
57+
return previousOperators, nil
58+
},
59+
).AnyTimes()
5460
})
5561

5662
AfterEach(func() {

internal/cluster/refresh_status_preprocessor.go

+192-9
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,21 @@ package cluster
22

33
import (
44
"context"
5+
"encoding/json"
56
"fmt"
7+
"reflect"
68
"sort"
79
"strings"
810

11+
"github.com/dustin/go-humanize/english"
912
"github.com/go-openapi/swag"
1013
"github.com/openshift/assisted-service/internal/common"
14+
eventsapi "github.com/openshift/assisted-service/internal/events/api"
1115
"github.com/openshift/assisted-service/internal/host"
1216
"github.com/openshift/assisted-service/internal/operators"
1317
"github.com/openshift/assisted-service/internal/operators/api"
18+
operatorcommon "github.com/openshift/assisted-service/internal/operators/common"
19+
"github.com/openshift/assisted-service/internal/usage"
1420
"github.com/openshift/assisted-service/models"
1521
"github.com/pkg/errors"
1622
"github.com/sirupsen/logrus"
@@ -30,23 +36,28 @@ type stringer interface {
3036
}
3137

3238
type refreshPreprocessor struct {
33-
log logrus.FieldLogger
34-
validations []validation
35-
conditions []condition
36-
operatorsAPI operators.API
39+
log logrus.FieldLogger
40+
validations []validation
41+
conditions []condition
42+
operatorsAPI operators.API
43+
usageAPI usage.API
44+
eventsHandler eventsapi.Handler
3745
}
3846

39-
func newRefreshPreprocessor(log logrus.FieldLogger, hostAPI host.API, operatorsAPI operators.API) *refreshPreprocessor {
47+
func newRefreshPreprocessor(log logrus.FieldLogger, hostAPI host.API, operatorsAPI operators.API, usageAPI usage.API,
48+
eventsHandler eventsapi.Handler) *refreshPreprocessor {
4049
v := clusterValidator{
4150
log: log,
4251
hostAPI: hostAPI,
4352
}
4453

4554
return &refreshPreprocessor{
46-
log: log,
47-
validations: newValidations(&v),
48-
conditions: newConditions(&v),
49-
operatorsAPI: operatorsAPI,
55+
log: log,
56+
validations: newValidations(&v),
57+
conditions: newConditions(&v),
58+
operatorsAPI: operatorsAPI,
59+
usageAPI: usageAPI,
60+
eventsHandler: eventsHandler,
5061
}
5162
}
5263

@@ -84,6 +95,17 @@ func (r *refreshPreprocessor) preprocess(ctx context.Context, c *clusterPreproce
8495
Message: message,
8596
})
8697
}
98+
99+
// Before validating the operators we need to recalculate the dependencies because changes in the hosts may
100+
// imply changes in the dependencies between operators. For example, if the OpenShift AI operator is enabled and
101+
// a new host with an NVIDIA GPU has been discovered, then the NVIDIA GPU operator will need to be added as a
102+
// dependency, and then we will need to validate that secure boot is disabled.
103+
err = r.recalculateOperatorDependencies(ctx, c)
104+
if err != nil {
105+
err = errors.Wrapf(err, "failed to recalculate operator dependencies for cluster '%s'", c.clusterId)
106+
return nil, nil, err
107+
}
108+
87109
// Validate operators
88110
results, err := r.operatorsAPI.ValidateCluster(ctx, c.cluster)
89111
if err != nil {
@@ -124,6 +146,167 @@ func (r *refreshPreprocessor) preprocess(ctx context.Context, c *clusterPreproce
124146
return stateMachineInput, validationsOutput, nil
125147
}
126148

149+
// recalculateOperatorDependencies calculates the operator dependencies and updates the database and the passed cluster
150+
// accordingly.
151+
func (r *refreshPreprocessor) recalculateOperatorDependencies(ctx context.Context, c *clusterPreprocessContext) error {
152+
// Calculate and save the operators that have been added, updated or deleted:
153+
operatorsBeforeResolve := c.cluster.MonitoredOperators
154+
operatorsAfterResolve, err := r.operatorsAPI.ResolveDependencies(c.cluster, c.cluster.MonitoredOperators)
155+
if err != nil {
156+
return errors.Wrapf(
157+
err,
158+
"failed to resolve operator dependencies for cluster '%s'",
159+
c.clusterId,
160+
)
161+
}
162+
var addedOperators, updatedOperators, deletedOperators []*models.MonitoredOperator
163+
for _, operatorAfterResolve := range operatorsAfterResolve {
164+
if operatorAfterResolve.ClusterID == "" {
165+
operatorAfterResolve.ClusterID = c.clusterId
166+
}
167+
operatorBeforeREsolve := operatorcommon.GetOperator(operatorsBeforeResolve, operatorAfterResolve.Name)
168+
if operatorBeforeREsolve != nil {
169+
if !reflect.DeepEqual(operatorAfterResolve, operatorBeforeREsolve) {
170+
updatedOperators = append(updatedOperators, operatorAfterResolve)
171+
}
172+
} else {
173+
addedOperators = append(addedOperators, operatorAfterResolve)
174+
}
175+
}
176+
for _, operatorBeforeResolve := range operatorsBeforeResolve {
177+
if !operatorcommon.HasOperator(operatorsAfterResolve, operatorBeforeResolve.Name) {
178+
deletedOperators = append(deletedOperators, operatorBeforeResolve)
179+
}
180+
}
181+
for _, addedOperator := range addedOperators {
182+
err = c.db.Save(addedOperator).Error
183+
if err != nil {
184+
return errors.Wrapf(
185+
err,
186+
"failed to add operator '%s' to cluster '%s'",
187+
addedOperator.Name, *c.cluster.ID,
188+
)
189+
}
190+
}
191+
for _, updatedOperator := range updatedOperators {
192+
err = c.db.Save(updatedOperator).Error
193+
if err != nil {
194+
return errors.Wrapf(
195+
err,
196+
"failed to update operator '%s' for cluster '%s'",
197+
updatedOperator.Name, *c.cluster.ID,
198+
)
199+
}
200+
}
201+
for _, deletedOperator := range deletedOperators {
202+
err = c.db.Delete(deletedOperator).Error
203+
if err != nil {
204+
return errors.Wrapf(
205+
err,
206+
"failed to delete operator '%s' from cluster '%s'",
207+
deletedOperator.Name,
208+
c.clusterId,
209+
)
210+
}
211+
}
212+
c.cluster.MonitoredOperators = operatorsAfterResolve
213+
214+
// If any operator has been added or deleted then we need to update the corresponding feature usage:
215+
if len(addedOperators) > 0 || len(deletedOperators) > 0 {
216+
err = r.recalculateOperatorFeatureUsage(c, addedOperators, deletedOperators)
217+
if err != nil {
218+
return err
219+
}
220+
err = r.notifyOperatorFeatureUsageChange(ctx, c, addedOperators, deletedOperators)
221+
if err != nil {
222+
return err
223+
}
224+
}
225+
226+
return nil
227+
}
228+
229+
func (r *refreshPreprocessor) recalculateOperatorFeatureUsage(c *clusterPreprocessContext,
230+
addedOperators, deletedOperators []*models.MonitoredOperator) error {
231+
if r.usageAPI == nil {
232+
return nil
233+
}
234+
usages, err := usage.Unmarshal(c.cluster.FeatureUsage)
235+
if err != nil {
236+
return errors.Wrapf(
237+
err,
238+
"failed to read feature usage from cluster '%s'",
239+
c.clusterId,
240+
)
241+
}
242+
for _, addedOperator := range addedOperators {
243+
featureName := strings.ToUpper(addedOperator.Name)
244+
r.usageAPI.Add(usages, featureName, nil)
245+
}
246+
for _, deletedOperator := range deletedOperators {
247+
featureName := strings.ToUpper(deletedOperator.Name)
248+
r.usageAPI.Remove(usages, featureName)
249+
}
250+
data, err := json.Marshal(usages)
251+
if err != nil {
252+
return errors.Wrapf(
253+
err,
254+
"failed to write feature usage to cluster '%s'",
255+
c.clusterId,
256+
)
257+
}
258+
c.cluster.FeatureUsage = string(data)
259+
r.usageAPI.Save(c.db, c.clusterId, usages)
260+
return nil
261+
}
262+
263+
func (r refreshPreprocessor) notifyOperatorFeatureUsageChange(ctx context.Context, c *clusterPreprocessContext,
264+
addedOperators, deletedOperators []*models.MonitoredOperator) error {
265+
if r.eventsHandler == nil {
266+
return nil
267+
}
268+
if len(addedOperators) > 0 {
269+
r.notifyAddedOperatorFeatures(ctx, c, addedOperators)
270+
}
271+
if len(deletedOperators) > 0 {
272+
r.notifyDeletedOperatorFeatures(ctx, c, deletedOperators)
273+
}
274+
return nil
275+
}
276+
277+
func (r *refreshPreprocessor) notifyAddedOperatorFeatures(ctx context.Context, c *clusterPreprocessContext,
278+
operators []*models.MonitoredOperator) {
279+
featureList := r.calculateOperatorFeatureList(operators)
280+
var message string
281+
if len(operators) == 1 {
282+
message = fmt.Sprintf("Cluster %s: added operator feature %s", c.clusterId, featureList)
283+
} else {
284+
message = fmt.Sprintf("Cluster %s: added operator features %s", c.clusterId, featureList)
285+
}
286+
r.eventsHandler.NotifyInternalEvent(ctx, &c.clusterId, nil, nil, message)
287+
}
288+
289+
func (r *refreshPreprocessor) notifyDeletedOperatorFeatures(ctx context.Context, c *clusterPreprocessContext,
290+
operators []*models.MonitoredOperator) {
291+
featureList := r.calculateOperatorFeatureList(operators)
292+
var message string
293+
if len(operators) == 1 {
294+
message = fmt.Sprintf("Cluster %s: deleted operator feature %s", c.clusterId, featureList)
295+
} else {
296+
message = fmt.Sprintf("Cluster %s: deleted operator features %s", c.clusterId, featureList)
297+
}
298+
r.eventsHandler.NotifyInternalEvent(ctx, &c.clusterId, nil, nil, message)
299+
}
300+
301+
func (r *refreshPreprocessor) calculateOperatorFeatureList(operators []*models.MonitoredOperator) string {
302+
featureNames := make([]string, len(operators))
303+
for i, operator := range operators {
304+
featureNames[i] = strings.ToUpper(operator.Name)
305+
}
306+
sort.Strings(featureNames)
307+
return english.WordSeries(featureNames, "and")
308+
}
309+
127310
// sortByValidationResultID sorts results by models.ClusterValidationID
128311
func sortByValidationResultID(validationResults []ValidationResult) {
129312
sort.SliceStable(validationResults, func(i, j int) bool {

0 commit comments

Comments
 (0)