Skip to content

Commit ad1cf27

Browse files
Merge pull request #5739 from djoshy/disable-skew-sno
MCO-2145: Disable skew enforcement on SNO clusters
2 parents 9050f6f + 597024d commit ad1cf27

File tree

6 files changed

+66
-5
lines changed

6 files changed

+66
-5
lines changed

cmd/machine-config-controller/start.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,7 @@ func createControllers(ctx *ctrlcommon.ControllerContext) []ctrlcommon.Controlle
276276
ctx.ConfigInformerFactory.Config().V1().Schedulers(),
277277
ctx.OperatorInformerFactory.Operator().V1().MachineConfigurations(),
278278
ctx.InformerFactory.Machineconfiguration().V1alpha1().OSImageStreams(),
279+
ctx.ConfigInformerFactory.Config().V1().Infrastructures(),
279280
ctx.ClientBuilder.KubeClientOrDie("node-update-controller"),
280281
ctx.ClientBuilder.MachineConfigClientOrDie("node-update-controller"),
281282
ctx.FeatureGatesHandler,

pkg/controller/node/node_controller.go

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,9 @@ type Controller struct {
120120
mcopLister mcoplistersv1.MachineConfigurationLister
121121
mcopListerSynced cache.InformerSynced
122122

123+
infraLister cligolistersv1.InfrastructureLister
124+
infraListerSynced cache.InformerSynced
125+
123126
queue workqueue.TypedRateLimitingInterface[string]
124127

125128
fgHandler ctrlcommon.FeatureGatesHandler
@@ -144,6 +147,7 @@ func New(
144147
schedulerInformer cligoinformersv1.SchedulerInformer,
145148
mcopInformer mcopinformersv1.MachineConfigurationInformer,
146149
osImageStreamInformer mcfginformersv1alpha1.OSImageStreamInformer,
150+
infraInformer cligoinformersv1.InfrastructureInformer,
147151
kubeClient clientset.Interface,
148152
mcfgClient mcfgclientset.Interface,
149153
fgHandler ctrlcommon.FeatureGatesHandler,
@@ -160,6 +164,7 @@ func New(
160164
schedulerInformer,
161165
mcopInformer,
162166
osImageStreamInformer,
167+
infraInformer,
163168
kubeClient,
164169
mcfgClient,
165170
defaultUpdateDelay,
@@ -179,6 +184,7 @@ func NewWithCustomUpdateDelay(
179184
schedulerInformer cligoinformersv1.SchedulerInformer,
180185
mcopInformer mcopinformersv1.MachineConfigurationInformer,
181186
osImageStreamInformer mcfginformersv1alpha1.OSImageStreamInformer,
187+
infraInformer cligoinformersv1.InfrastructureInformer,
182188
kubeClient clientset.Interface,
183189
mcfgClient mcfgclientset.Interface,
184190
updateDelay time.Duration,
@@ -196,6 +202,7 @@ func NewWithCustomUpdateDelay(
196202
schedulerInformer,
197203
mcopInformer,
198204
osImageStreamInformer,
205+
infraInformer,
199206
kubeClient,
200207
mcfgClient,
201208
updateDelay,
@@ -216,6 +223,7 @@ func newController(
216223
schedulerInformer cligoinformersv1.SchedulerInformer,
217224
mcopInformer mcopinformersv1.MachineConfigurationInformer,
218225
osImageStreamInformer mcfginformersv1alpha1.OSImageStreamInformer,
226+
infraInformer cligoinformersv1.InfrastructureInformer,
219227
kubeClient clientset.Interface,
220228
mcfgClient mcfgclientset.Interface,
221229
updateDelay time.Duration,
@@ -297,6 +305,9 @@ func newController(
297305
ctrl.mcopLister = mcopInformer.Lister()
298306
ctrl.mcopListerSynced = mcopInformer.Informer().HasSynced
299307

308+
ctrl.infraLister = infraInformer.Lister()
309+
ctrl.infraListerSynced = infraInformer.Informer().HasSynced
310+
300311
// Only initialize OSImageStream lister if feature gate is enabled
301312
if ctrl.osStreamsFgEnabled {
302313
ctrl.osImageStreamLister = osImageStreamInformer.Lister()
@@ -314,6 +325,7 @@ func (ctrl *Controller) Run(workers int, stopCh <-chan struct{}) {
314325
syncers := []cache.InformerSynced{
315326
ctrl.ccListerSynced, ctrl.mcListerSynced, ctrl.mcpListerSynced, ctrl.moscListerSynced,
316327
ctrl.mosbListerSynced, ctrl.nodeListerSynced, ctrl.schedulerListerSynced, ctrl.mcopListerSynced,
328+
ctrl.infraListerSynced,
317329
}
318330
// Only wait for the OSImageStream informer to sync if the feature is enabled
319331
if ctrl.osStreamsFgEnabled {
@@ -1951,8 +1963,8 @@ func (ctrl *Controller) deleteMachineConfiguration(_ any) {
19511963

19521964
// syncBootImageSkewEnforcementMetric updates the mcc_boot_image_skew_enforcement_none metric
19531965
// based on the current BootImageSkewEnforcementStatus mode in MachineConfiguration.
1954-
// The metric is set to 1 when mode is "None", indicating that scaling operations may
1955-
// not be successful.
1966+
// The metric is set to 1 when mode is "None" on non-SNO clusters, indicating that scaling
1967+
// operations may not be successful. On SNO clusters, None is the default and the alert is suppressed.
19561968
func (ctrl *Controller) syncBootImageSkewEnforcementMetric(obj any) {
19571969

19581970
mcop, ok := obj.(*opv1.MachineConfiguration)
@@ -1962,6 +1974,17 @@ func (ctrl *Controller) syncBootImageSkewEnforcementMetric(obj any) {
19621974
}
19631975

19641976
if mcop.Status.BootImageSkewEnforcementStatus.Mode == opv1.BootImageSkewEnforcementModeStatusNone {
1977+
infra, err := ctrl.infraLister.Get("cluster")
1978+
if err != nil {
1979+
klog.Warningf("Failed to get infrastructure for skew enforcement metric: %v", err)
1980+
ctrlcommon.MCCBootImageSkewEnforcementNone.Set(0)
1981+
return
1982+
}
1983+
// On SNO clusters, None is the default; suppress the alert.
1984+
if infra.Status.ControlPlaneTopology == configv1.SingleReplicaTopologyMode {
1985+
ctrlcommon.MCCBootImageSkewEnforcementNone.Set(0)
1986+
return
1987+
}
19651988
ctrlcommon.MCCBootImageSkewEnforcementNone.Set(1)
19661989
} else {
19671990
ctrlcommon.MCCBootImageSkewEnforcementNone.Set(0)

pkg/controller/node/node_controller_test.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,14 +112,15 @@ func (f *fixture) newControllerWithStopChan(stopCh <-chan struct{}) *Controller
112112
oi := operatorinformer.NewSharedInformerFactory(operatorClient, noResyncPeriodFunc())
113113
c := NewWithCustomUpdateDelay(i.Machineconfiguration().V1().ControllerConfigs(), i.Machineconfiguration().V1().MachineConfigs(), i.Machineconfiguration().V1().MachineConfigPools(), k8sI.Core().V1().Nodes(),
114114
k8sI.Core().V1().Pods(), i.Machineconfiguration().V1().MachineOSConfigs(), i.Machineconfiguration().V1().MachineOSBuilds(), i.Machineconfiguration().V1().MachineConfigNodes(), ci.Config().V1().Schedulers(), oi.Operator().V1().MachineConfigurations(),
115-
i.Machineconfiguration().V1alpha1().OSImageStreams(), f.kubeclient, f.client, time.Millisecond, f.fgHandler)
115+
i.Machineconfiguration().V1alpha1().OSImageStreams(), ci.Config().V1().Infrastructures(), f.kubeclient, f.client, time.Millisecond, f.fgHandler)
116116

117117
c.ccListerSynced = alwaysReady
118118
c.mcpListerSynced = alwaysReady
119119
c.nodeListerSynced = alwaysReady
120120
c.schedulerListerSynced = alwaysReady
121121
c.mcopListerSynced = alwaysReady
122122
c.osImageStreamListerSynced = alwaysReady
123+
c.infraListerSynced = alwaysReady
123124
c.eventRecorder = &record.FakeRecorder{}
124125

125126
i.Start(stopCh)

pkg/operator/sync.go

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2367,7 +2367,7 @@ func (optr *Operator) syncMachineConfiguration(_ *renderConfig, _ *configv1.Clus
23672367
}
23682368

23692369
// Update skew enforcement status if needed
2370-
optr.syncBootImageSkewEnforcementStatus(mcop, newMachineConfigurationStatus, supportsBootImageUpdates)
2370+
optr.syncBootImageSkewEnforcementStatus(mcop, newMachineConfigurationStatus, infra, supportsBootImageUpdates)
23712371

23722372
newMachineConfigurationStatus.ObservedGeneration = mcop.GetGeneration()
23732373
// Check if any changes are required in the Status before making the API call.
@@ -2533,7 +2533,7 @@ func (optr *Operator) syncPreBuiltImageMachineConfigs() error {
25332533

25342534
// syncBootImageSkewEnforcementStatus determines the appropriate BootImageSkewEnforcementStatus based on
25352535
// the MachineConfiguration spec, platform defaults, and cluster version information.
2536-
func (optr *Operator) syncBootImageSkewEnforcementStatus(mcop *opv1.MachineConfiguration, newMachineConfigurationStatus *opv1.MachineConfigurationStatus, supportsBootImageUpdates bool) {
2536+
func (optr *Operator) syncBootImageSkewEnforcementStatus(mcop *opv1.MachineConfiguration, newMachineConfigurationStatus *opv1.MachineConfigurationStatus, infra *configv1.Infrastructure, supportsBootImageUpdates bool) {
25372537
// React to any changes in boot image skew enforcement configuration
25382538
if !optr.fgHandler.Enabled(features.FeatureGateBootImageSkewEnforcement) {
25392539
return
@@ -2552,6 +2552,9 @@ func (optr *Operator) syncBootImageSkewEnforcementStatus(mcop *opv1.MachineConfi
25522552
} else { // only other possible opinion is "None"
25532553
newMachineConfigurationStatus.BootImageSkewEnforcementStatus = apihelpers.GetSkewEnforcementStatusNone()
25542554
}
2555+
} else if infra.Status.ControlPlaneTopology == configv1.SingleReplicaTopologyMode {
2556+
// On SNO clusters, default to None since there are no MachineSets to scale
2557+
newMachineConfigurationStatus.BootImageSkewEnforcementStatus = apihelpers.GetSkewEnforcementStatusNone()
25552558
} else if supportsBootImageUpdates {
25562559
// If an "All" option is specified and BootImageSkewEnforcementStatus is empty or not set to Automatic => set Mode to Automatic.
25572560
if apihelpers.HasMAPIMachineSetManagerWithMode(newMachineConfigurationStatus.ManagedBootImagesStatus.MachineManagers, opv1.MachineSets, opv1.All) {

pkg/operator/sync_test.go

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,12 @@ func withPlatformType(platformType configv1.PlatformType) infraOption {
137137
}
138138
}
139139

140+
func withControlPlaneTopology(topology configv1.TopologyMode) infraOption {
141+
return func(infra *configv1.Infrastructure) {
142+
infra.Status.ControlPlaneTopology = topology
143+
}
144+
}
145+
140146
type kubeCloudConfigOption func(*corev1.ConfigMap)
141147

142148
func buildKubeCloudConfig(opts ...kubeCloudConfigOption) *corev1.ConfigMap {
@@ -728,6 +734,32 @@ func TestSyncMachineConfiguration(t *testing.T) {
728734
},
729735
expectedSkewEnforcementStatus: apihelpers.GetSkewEnforcementStatusAutomaticWithOCPVersion("4.18.0"),
730736
},
737+
{
738+
name: "SNO cluster, no skew enforcement spec, skew enforcement defaults to None",
739+
infra: buildInfra(withPlatformType(configv1.AWSPlatformType), withControlPlaneTopology(configv1.SingleReplicaTopologyMode)),
740+
mcop: buildMachineConfigurationWithNoBootImageConfiguration(),
741+
clusterVersion: buildClusterVersion("4.18.0"),
742+
annotationExpected: true,
743+
expectedManagedBootImagesStatus: opv1.ManagedBootImages{
744+
MachineManagers: []opv1.MachineManager{
745+
{Resource: opv1.MachineSets, APIGroup: opv1.MachineAPI, Selection: opv1.MachineManagerSelector{Mode: opv1.All}},
746+
},
747+
},
748+
expectedSkewEnforcementStatus: apihelpers.GetSkewEnforcementStatusNone(),
749+
},
750+
{
751+
name: "SNO cluster, spec defines manual mode, status should reflect spec",
752+
infra: buildInfra(withPlatformType(configv1.AWSPlatformType), withControlPlaneTopology(configv1.SingleReplicaTopologyMode)),
753+
mcop: buildMachineConfigurationWithSkewEnforcementManual("4.17.0"),
754+
clusterVersion: buildClusterVersion("4.18.0"),
755+
annotationExpected: true,
756+
expectedManagedBootImagesStatus: opv1.ManagedBootImages{
757+
MachineManagers: []opv1.MachineManager{
758+
{Resource: opv1.MachineSets, APIGroup: opv1.MachineAPI, Selection: opv1.MachineManagerSelector{Mode: opv1.All}},
759+
},
760+
},
761+
expectedSkewEnforcementStatus: apihelpers.GetSkewEnforcementStatusManualWithOCPVersion("4.17.0"),
762+
},
731763
}
732764

733765
for _, tc := range cases {

test/e2e-bootstrap/bootstrap_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -626,6 +626,7 @@ func createControllers(ctx *ctrlcommon.ControllerContext) []ctrlcommon.Controlle
626626
ctx.ConfigInformerFactory.Config().V1().Schedulers(),
627627
ctx.OperatorInformerFactory.Operator().V1().MachineConfigurations(),
628628
ctx.InformerFactory.Machineconfiguration().V1alpha1().OSImageStreams(),
629+
ctx.ConfigInformerFactory.Config().V1().Infrastructures(),
629630
ctx.ClientBuilder.KubeClientOrDie("node-update-controller"),
630631
ctx.ClientBuilder.MachineConfigClientOrDie("node-update-controller"),
631632
ctx.FeatureGatesHandler,

0 commit comments

Comments
 (0)