Skip to content

Commit 9efb3e9

Browse files
zszabo-rhopenshift-merge-bot[bot]
authored andcommitted
Refactors phases to functions
1 parent ff20029 commit 9efb3e9

File tree

3 files changed

+168
-79
lines changed

3 files changed

+168
-79
lines changed

api/v1alpha1/imageclusterinstall_types.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,9 @@ import (
2626

2727
const (
2828
ConfigurationPendingReason = "ConfigurationPending"
29-
ConfigurationFailedReason = "ConfigurationFailed"
29+
ConfigurationFailedReason = "ConfigurationFailed"
3030

31-
ImageCreationFailedReason = "ImageCreationFailed"
31+
ImageCreationFailedReason = "ImageCreationFailed"
3232
ImageCreationPendingReason = "ImageCreationPending"
3333

3434
HostConfigurationPendingReason = "HostConfigurationPending"

controllers/imageclusterinstall_controller.go

Lines changed: 158 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ import (
2626
_ "crypto/sha512"
2727
"encoding/base64"
2828
"encoding/json"
29+
"errors"
2930
"fmt"
3031
"net"
3132
"net/url"
@@ -37,7 +38,7 @@ import (
3738

3839
"gopkg.in/yaml.v3"
3940
corev1 "k8s.io/api/core/v1"
40-
"k8s.io/apimachinery/pkg/api/errors"
41+
k8sapierrors "k8s.io/apimachinery/pkg/api/errors"
4142
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
4243
"k8s.io/apimachinery/pkg/runtime"
4344
"k8s.io/apimachinery/pkg/types"
@@ -153,15 +154,14 @@ func (r *ImageClusterInstallReconciler) Reconcile(ctx context.Context, req ctrl.
153154
if verifyIsoAndAuthExists(clusterConfigDir) {
154155
return ctrl.Result{}, nil
155156
}
156-
log.Infof("Running reconcile for ici with bootTime set")
157+
log.Info("Running reconcile for ici with bootTime set")
157158
}
158159

159160
if err := r.initializeConditions(ctx, ici); err != nil {
160161
log.Errorf("Failed to initialize conditions: %s", err)
161162
return ctrl.Result{}, err
162163
}
163164

164-
/////// Requirements not met yet: setting defaults //////
165165
cond := hivev1.ClusterInstallCondition{
166166
Type: hivev1.ClusterInstallRequirementsMet,
167167
Status: corev1.ConditionFalse,
@@ -172,31 +172,103 @@ func (r *ImageClusterInstallReconciler) Reconcile(ctx context.Context, req ctrl.
172172
r.setRequirementsMetCondition(ctx, ici, cond.Status, cond.Reason, cond.Message)
173173
}()
174174

175-
/////// Requirements not met yet: starting config (CD, BMH) validation //////
175+
// 1. Config validation phase
176+
// Possible reasons for not meeting requirements and exiting reconcile:
177+
// - ConfigurationPending (default): it's either the user needs to complete the ImageClusterInstall definition, or some of
178+
// referenced resources (CD or BMH) are not available yet. In both cases the reconcile ends, and will be triggered again
179+
// when the problem is resolved.
180+
// - ConfigurationFailed: sets this reason when AutomatedCleaningMode cannot be modified in BMH.
176181
cond.Reason = v1alpha1.ConfigurationPendingReason
182+
cd, bmh, err := r.validateConfiguration(ctx, ici, &cond, log)
183+
if cd == nil || bmh == nil || err != nil {
184+
return ctrl.Result{}, err
185+
}
186+
187+
// 2. Host validation phase
188+
// Possible reasons for not meeting requirements and exiting reconcile:
189+
// - HostValidationPending: if BMH provisioning or hardware inspection is not ready yet, reconcile is requeued for 30s later.
190+
// - HostValidationFailed (default): in case of any errors or invalid BMH configuration the reconcile ends here.
191+
// Default is HostValidationFailedReason but validateBMH() can change this to HostValidationPendingReason
192+
cond.Reason = v1alpha1.HostValidationFailedReason
193+
res, err := r.validateHost(ctx, ici, bmh, &cond, log)
194+
if !res.IsZero() || err != nil {
195+
return res, err
196+
}
197+
198+
if err := r.setClusterInstallMetadata(ctx, log, ici, cd); err != nil {
199+
cond.Message = "failed to set ClusterMetaData in ImageClusterInstall"
200+
log.Error(err)
201+
return ctrl.Result{}, err
202+
}
203+
204+
// 3. Image creation phase
205+
// Possible reasons for not meeting requirements and exiting reconcile:
206+
// - ImageCreationPending: when lock cannot be acquired, reconcile gets requeued for 5s later to try again.
207+
// - ImageCreationFailed (default): any other unexpected error stops the reconcile loop with this reason.
208+
cond.Reason = v1alpha1.ImageCreationFailedReason
209+
imageUrl, res, err := r.createImage(ctx, ici, req, bmh, cd, &cond, log)
210+
if !res.IsZero() || err != nil {
211+
return res, err
212+
}
213+
214+
r.labelReferencedObjectsForBackup(ctx, log, ici, cd)
215+
216+
// 4. Host configuration phase
217+
// Possible reasons for not meeting requirements and exiting reconcile:
218+
// - HostConfigurationPending: sets this reason in following scenarios:
219+
// > earlier DataImage instance is still being deleted for some reason (requeue after 30s)
220+
// > current DataImage was just created less than a second ago so BMO might not be notified yet (requeue after 1s)
221+
// > image-based-install-managed annotation is not set yet in BMH (no requeue)
222+
// - HostConfigurationFailed (default): any unexpected errors during this phase will lead to this reason and finish reconcile.
223+
cond.Reason = v1alpha1.HostConfigurationFailedReason
224+
continueReconcile, res, err := r.configureHost(ctx, ici, imageUrl, bmh, &cond, log)
225+
if !continueReconcile || !res.IsZero() || err != nil {
226+
return res, err
227+
}
228+
229+
// Requirements met, host configured
230+
cond.Status = corev1.ConditionTrue
231+
cond.Reason = v1alpha1.HostConfigurationSucceededReason
232+
cond.Message = "configuration image is attached to the referenced host"
233+
234+
return ctrl.Result{}, nil
235+
}
236+
237+
func GetClusterConfigDir(namespacesDir, namespace, uid string) string {
238+
return filepath.Join(namespacesDir, namespace, uid, FilesDir, ClusterConfigDir)
239+
}
240+
241+
func (r *ImageClusterInstallReconciler) validateConfiguration(
242+
ctx context.Context,
243+
ici *v1alpha1.ImageClusterInstall,
244+
cond *hivev1.ClusterInstallCondition,
245+
log logrus.FieldLogger,
246+
) (*hivev1.ClusterDeployment, *bmh_v1alpha1.BareMetalHost, error) {
177247

178248
if ici.Spec.ClusterDeploymentRef == nil || ici.Spec.ClusterDeploymentRef.Name == "" {
179249
cond.Message = "ClusterDeploymentRef is unset"
180-
return ctrl.Result{}, nil
250+
log.Error(errors.New(cond.Message))
251+
return nil, nil, nil
181252
}
182253

183-
clusterDeployment, err := r.getCD(ctx, ici)
254+
cd, err := r.getCD(ctx, ici)
184255
if err != nil {
185256
cond.Message = fmt.Sprintf("failed to get ClusterDeployment %s/%s", ici.Namespace, ici.Spec.ClusterDeploymentRef.Name)
186-
log.WithError(err).Error(cond.Message)
187-
return ctrl.Result{}, err
257+
log.Error(err)
258+
return nil, nil, nil
188259
}
189260

190-
if ici.Spec.BareMetalHostRef == nil {
191-
cond.Message = "no BareMetalHostRef set, nothing to do without provided bmh"
192-
return ctrl.Result{}, nil
261+
if ici.Spec.BareMetalHostRef == nil || ici.Spec.BareMetalHostRef.Name == "" {
262+
cond.Message = "BareMetalHostRef is unset"
263+
log.Error(errors.New(cond.Message))
264+
return nil, nil, nil
193265
}
194266

195267
bmh, err := r.getBMH(ctx, ici.Spec.BareMetalHostRef)
196268
if err != nil {
197269
cond.Message = fmt.Sprintf("failed to get BareMetalHost %s/%s", ici.Spec.BareMetalHostRef.Namespace, ici.Spec.BareMetalHostRef.Name)
198-
log.WithError(err).Error(cond.Message)
199-
return ctrl.Result{}, err
270+
log.Error(err)
271+
return nil, nil, nil
200272
}
201273

202274
// AutomatedCleaningMode is set at the beginning of this flow because we don't want ironic to format the disk
@@ -208,80 +280,113 @@ func (r *ImageClusterInstallReconciler) Reconcile(ctx context.Context, req ctrl.
208280
cond.Reason = v1alpha1.ConfigurationFailedReason
209281
cond.Message = fmt.Sprintf("failed to disable automated cleaning mode for BareMetalHost %s/%s", bmh.Namespace, bmh.Name)
210282
log.WithError(err).Error(cond.Message)
211-
return ctrl.Result{}, err
283+
return nil, nil, err
212284
}
213285
}
214286

215-
/////// Requirements not met yet: config validated, starting host validation //////
216-
cond.Reason = v1alpha1.HostValidationFailedReason
287+
return cd, bmh, nil
288+
}
289+
290+
func (r *ImageClusterInstallReconciler) validateHost(
291+
ctx context.Context,
292+
ici *v1alpha1.ImageClusterInstall,
293+
bmh *bmh_v1alpha1.BareMetalHost,
294+
cond *hivev1.ClusterInstallCondition,
295+
log logrus.FieldLogger,
296+
) (ctrl.Result, error) {
217297

218-
if res, err := r.validateBMH(ctx, ici, bmh, &cond); !res.IsZero() || err != nil {
298+
if res, err := r.validateBMH(ici, bmh, cond); !res.IsZero() || err != nil {
219299
return res, err
220300
}
221301

222-
if err = r.setClusterInstallMetadata(ctx, log, ici, clusterDeployment); err != nil {
223-
cond.Message = "failed to set ImageClusterInstall data"
224-
log.WithError(err).Error(cond.Message)
225-
return ctrl.Result{}, err
302+
if !bmh.Spec.ExternallyProvisioned {
303+
log.Infof("Setting BareMetalHost (%s/%s) ExternallyProvisioned spec", bmh.Namespace, bmh.Name)
304+
patch := client.MergeFrom(bmh.DeepCopy())
305+
bmh.Spec.ExternallyProvisioned = true
306+
if err := r.Patch(ctx, bmh, patch); err != nil {
307+
return ctrl.Result{}, err
308+
}
309+
226310
}
227311

228-
/////// Requirements not met yet: host validated, starting image creation //////
229-
cond.Reason = v1alpha1.ImageCreationFailedReason
312+
return ctrl.Result{}, nil
313+
}
314+
315+
func (r *ImageClusterInstallReconciler) createImage(
316+
ctx context.Context,
317+
ici *v1alpha1.ImageClusterInstall,
318+
req ctrl.Request,
319+
bmh *bmh_v1alpha1.BareMetalHost,
320+
cd *hivev1.ClusterDeployment,
321+
cond *hivev1.ClusterInstallCondition,
322+
log logrus.FieldLogger,
323+
) (string, ctrl.Result, error) {
230324

231-
res, err := r.writeInputData(ctx, log, ici, clusterDeployment, bmh)
325+
res, err := r.writeInputData(ctx, log, ici, cd, bmh)
232326
if !res.IsZero() || err != nil {
233-
cond.Reason = v1alpha1.ImageCreationPendingReason
234-
cond.Message = "could not acquire lock for image data"
235327
if err != nil {
236328
cond.Reason = v1alpha1.ImageCreationFailedReason
237329
cond.Message = "failed to create image"
238-
log.WithError(err).Error(cond.Message)
330+
log.Error(err)
331+
} else {
332+
cond.Reason = v1alpha1.ImageCreationPendingReason
333+
cond.Message = "could not acquire lock for image data"
239334
}
240-
return res, err
335+
return "", res, err
241336
}
242337

243-
r.labelReferencedObjectsForBackup(ctx, log, ici, clusterDeployment)
244-
245338
imageUrl, err := url.JoinPath(r.BaseURL, "images", req.Namespace, fmt.Sprintf("%s.iso", ici.ObjectMeta.UID))
246339
if err != nil {
247340
cond.Message = "failed to create image url"
248341
log.WithError(err).Error(cond.Message)
249-
return ctrl.Result{}, err
342+
return "", ctrl.Result{}, err
250343
}
251344

252-
/////// Requirements not met yet: image created, starting host configuration //////
253-
cond.Reason = v1alpha1.HostConfigurationFailedReason
345+
return imageUrl, ctrl.Result{}, nil
346+
}
347+
348+
func (r *ImageClusterInstallReconciler) configureHost(
349+
ctx context.Context,
350+
ici *v1alpha1.ImageClusterInstall,
351+
imageUrl string,
352+
bmh *bmh_v1alpha1.BareMetalHost,
353+
cond *hivev1.ClusterInstallCondition,
354+
log logrus.FieldLogger,
355+
) (bool, ctrl.Result, error) {
356+
357+
continueReconcile := false
254358

255359
dataImage, res, err := r.ensureBMHDataImage(ctx, log, bmh, imageUrl)
360+
if !res.IsZero() {
361+
cond.Reason = v1alpha1.HostConfigurationPendingReason
362+
cond.Message = "previous DataImage is being deleted"
363+
return continueReconcile, res, nil
364+
}
256365
if err != nil {
257366
cond.Message = "failed to create BareMetalHost DataImage"
258-
if !res.IsZero() {
259-
cond.Reason = v1alpha1.HostConfigurationPendingReason
260-
cond.Message = "previous DataImage is being deleted"
261-
}
262367
log.WithError(err).Error(cond.Message)
263-
return res, err
368+
return continueReconcile, ctrl.Result{}, err
264369
}
265370

266371
if dataImage.ObjectMeta.CreationTimestamp.Time.Add(r.Options.DataImageCoolDownPeriod).After(time.Now()) {
267372
// in case the dataImage was created less than a second ago requeue to allow BMO some time to get
268373
// notified about the newly created DataImage before adding the reboot annotation in updateBMHProvisioningState
269374
cond.Reason = v1alpha1.HostConfigurationPendingReason
270-
cond.Message = "Waiting for BareMetalHost to get DataImage"
271-
return ctrl.Result{RequeueAfter: r.Options.DataImageCoolDownPeriod}, err
375+
cond.Message = "waiting for DataImage to cool down"
376+
return continueReconcile, ctrl.Result{RequeueAfter: r.Options.DataImageCoolDownPeriod}, nil
272377
}
273378

274379
if err := r.updateBMHProvisioningState(ctx, log, bmh, dataImage); err != nil {
275380
cond.Message = "failed to update BareMetalHost provisioning state"
276381
log.WithError(err).Error(cond.Message)
277-
return ctrl.Result{}, err
382+
return continueReconcile, ctrl.Result{}, err
278383
}
279384
if !annotationExists(&bmh.ObjectMeta, ibioManagedBMH) {
280385
// TODO: consider replacing this condition with `dataDisk.Status.AttachedImage`
281386
cond.Reason = v1alpha1.HostConfigurationPendingReason
282-
cond.Message = fmt.Sprintf("Waiting for BMH to get %s annotation", ibioManagedBMH)
387+
cond.Message = fmt.Sprintf("waiting for BMH provisioning state to be StateAvailable or StateExternallyProvisioned, current state is: %s", bmh.Status.Provisioning.State)
283388
log.Info(cond.Message)
284-
return ctrl.Result{}, nil
389+
return continueReconcile, ctrl.Result{}, nil
285390
}
286391

287392
if ici.Status.BareMetalHostRef == nil {
@@ -294,24 +399,15 @@ func (r *ImageClusterInstallReconciler) Reconcile(ctx context.Context, req ctrl.
294399
if err := r.Status().Patch(ctx, ici, patch); err != nil {
295400
cond.Message = "failed to set Status.BareMetalHostRef"
296401
log.WithError(err).Error(cond.Message)
297-
return ctrl.Result{}, err
402+
return continueReconcile, ctrl.Result{}, err
298403
}
299404
}
300405

301-
/////// Requirements met, host configured //////
302-
cond.Status = corev1.ConditionTrue
303-
cond.Reason = v1alpha1.HostConfigurationSucceededReason
304-
cond.Message = "Configuration image is attached to the referenced host"
305-
306-
return ctrl.Result{}, nil
307-
}
308-
309-
func GetClusterConfigDir(namespacesDir, namespace, uid string) string {
310-
return filepath.Join(namespacesDir, namespace, uid, FilesDir, ClusterConfigDir)
406+
continueReconcile = true
407+
return continueReconcile, ctrl.Result{}, nil
311408
}
312409

313410
func (r *ImageClusterInstallReconciler) validateBMH(
314-
ctx context.Context,
315411
ici *v1alpha1.ImageClusterInstall,
316412
bmh *bmh_v1alpha1.BareMetalHost,
317413
cond *hivev1.ClusterInstallCondition) (ctrl.Result, error) {
@@ -499,13 +595,7 @@ func (r *ImageClusterInstallReconciler) updateBMHProvisioningState(ctx context.C
499595
if bmh.Status.Provisioning.State != bmh_v1alpha1.StateAvailable && bmh.Status.Provisioning.State != bmh_v1alpha1.StateExternallyProvisioned {
500596
return nil
501597
}
502-
log.Infof("Updating BareMetalHost %s/%s provisioning state, current PoweredOn status is: %s", bmh.Namespace, bmh.Name, bmh.Status.PoweredOn)
503-
if bmh.Status.Provisioning.State == bmh_v1alpha1.StateAvailable {
504-
if !bmh.Spec.ExternallyProvisioned {
505-
log.Infof("Setting BareMetalHost (%s/%s) ExternallyProvisioned spec", bmh.Namespace, bmh.Name)
506-
bmh.Spec.ExternallyProvisioned = true
507-
}
508-
}
598+
log.Infof("BareMetalHost %s/%s PoweredOn status is: %s", bmh.Namespace, bmh.Name, bmh.Status.PoweredOn)
509599
if !bmh.Spec.Online {
510600
bmh.Spec.Online = true
511601
log.Infof("Setting BareMetalHost (%s/%s) spec.Online to true", bmh.Namespace, bmh.Name)
@@ -532,14 +622,14 @@ func (r *ImageClusterInstallReconciler) ensureBMHDataImage(
532622
url string) (*bmh_v1alpha1.DataImage, ctrl.Result, error) {
533623
dataImage, err := r.getDataImage(ctx, bmh.Namespace, bmh.Name)
534624
if err == nil {
535-
if err == nil && !dataImage.ObjectMeta.DeletionTimestamp.IsZero() {
536-
err = fmt.Errorf("dataImage %s/%s already exists but is being deleted, probably leftover from previous installation", bmh.Namespace, bmh.Name)
537-
return dataImage, ctrl.Result{RequeueAfter: 30 * time.Second}, err
625+
if !dataImage.ObjectMeta.DeletionTimestamp.IsZero() {
626+
log.Errorf("dataImage %s/%s already exists but is being deleted, probably leftover from previous installation", bmh.Namespace, bmh.Name)
627+
return dataImage, ctrl.Result{RequeueAfter: 30 * time.Second}, nil
538628
}
539629
return dataImage, ctrl.Result{}, nil
540630
}
541631

542-
if err != nil && !errors.IsNotFound(err) {
632+
if err != nil && !k8sapierrors.IsNotFound(err) {
543633
return dataImage, ctrl.Result{}, err
544634
}
545635
log.Infof("creating new dataImage for BareMetalHost (%s/%s)", bmh.Name, bmh.Namespace)
@@ -600,7 +690,7 @@ func (r *ImageClusterInstallReconciler) removeBMHDataImage(ctx context.Context,
600690

601691
bmh := &bmh_v1alpha1.BareMetalHost{}
602692
if err := r.Get(ctx, bmhRef, bmh); err != nil {
603-
if errors.IsNotFound(err) {
693+
if k8sapierrors.IsNotFound(err) {
604694
log.Warnf("Referenced BareMetalHost %s/%s does not exist, not waiting for dataImage deletion", bmhRef.Namespace, bmhRef.Name)
605695
return nil, nil
606696
} else {
@@ -634,7 +724,7 @@ func (r *ImageClusterInstallReconciler) deleteDataImage(ctx context.Context, log
634724
dataImage := &bmh_v1alpha1.DataImage{}
635725

636726
if err := r.Get(ctx, dataImageRef, dataImage); err != nil {
637-
if errors.IsNotFound(err) {
727+
if k8sapierrors.IsNotFound(err) {
638728
log.Infof("Can't find DataImage from BareMetalHost %s/%s, Nothing to remove", dataImageRef.Namespace, dataImageRef.Name)
639729
return nil, nil
640730
}

0 commit comments

Comments
 (0)