@@ -26,6 +26,7 @@ import (
26
26
_ "crypto/sha512"
27
27
"encoding/base64"
28
28
"encoding/json"
29
+ "errors"
29
30
"fmt"
30
31
"net"
31
32
"net/url"
@@ -37,7 +38,7 @@ import (
37
38
38
39
"gopkg.in/yaml.v3"
39
40
corev1 "k8s.io/api/core/v1"
40
- "k8s.io/apimachinery/pkg/api/errors"
41
+ k8sapierrors "k8s.io/apimachinery/pkg/api/errors"
41
42
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
42
43
"k8s.io/apimachinery/pkg/runtime"
43
44
"k8s.io/apimachinery/pkg/types"
@@ -153,15 +154,14 @@ func (r *ImageClusterInstallReconciler) Reconcile(ctx context.Context, req ctrl.
153
154
if verifyIsoAndAuthExists (clusterConfigDir ) {
154
155
return ctrl.Result {}, nil
155
156
}
156
- log .Infof ("Running reconcile for ici with bootTime set" )
157
+ log .Info ("Running reconcile for ici with bootTime set" )
157
158
}
158
159
159
160
if err := r .initializeConditions (ctx , ici ); err != nil {
160
161
log .Errorf ("Failed to initialize conditions: %s" , err )
161
162
return ctrl.Result {}, err
162
163
}
163
164
164
- /////// Requirements not met yet: setting defaults //////
165
165
cond := hivev1.ClusterInstallCondition {
166
166
Type : hivev1 .ClusterInstallRequirementsMet ,
167
167
Status : corev1 .ConditionFalse ,
@@ -172,31 +172,103 @@ func (r *ImageClusterInstallReconciler) Reconcile(ctx context.Context, req ctrl.
172
172
r .setRequirementsMetCondition (ctx , ici , cond .Status , cond .Reason , cond .Message )
173
173
}()
174
174
175
- /////// Requirements not met yet: starting config (CD, BMH) validation //////
175
+ // 1. Config validation phase
176
+ // Possible reasons for not meeting requirements and exiting reconcile:
177
+ // - ConfigurationPending (default): it's either the user needs to complete the ImageClusterInstall definition, or some of
178
+ // referenced resources (CD or BMH) are not available yet. In both cases the reconcile ends, and will be triggered again
179
+ // when the problem is resolved.
180
+ // - ConfigurationFailed: sets this reason when AutomatedCleaningMode cannot be modified in BMH.
176
181
cond .Reason = v1alpha1 .ConfigurationPendingReason
182
+ cd , bmh , err := r .validateConfiguration (ctx , ici , & cond , log )
183
+ if cd == nil || bmh == nil || err != nil {
184
+ return ctrl.Result {}, err
185
+ }
186
+
187
+ // 2. Host validation phase
188
+ // Possible reasons for not meeting requirements and exiting reconcile:
189
+ // - HostValidationPending: if BMH provisioning or hardware inspection is not ready yet, reconcile is requeued for 30s later.
190
+ // - HostValidationFailed (default): in case of any errors or invalid BMH configuration the reconcile ends here.
191
+ // Default is HostValidationFailedReason but validateBMH() can change this to HostValidationPendingReason
192
+ cond .Reason = v1alpha1 .HostValidationFailedReason
193
+ res , err := r .validateHost (ctx , ici , bmh , & cond , log )
194
+ if ! res .IsZero () || err != nil {
195
+ return res , err
196
+ }
197
+
198
+ if err := r .setClusterInstallMetadata (ctx , log , ici , cd ); err != nil {
199
+ cond .Message = "failed to set ClusterMetaData in ImageClusterInstall"
200
+ log .Error (err )
201
+ return ctrl.Result {}, err
202
+ }
203
+
204
+ // 3. Image creation phase
205
+ // Possible reasons for not meeting requirements and exiting reconcile:
206
+ // - ImageCreationPending: when lock cannot be acquired, reconcile gets requeued for 5s later to try again.
207
+ // - ImageCreationFailed (default): any other unexpected error stops the reconcile loop with this reason.
208
+ cond .Reason = v1alpha1 .ImageCreationFailedReason
209
+ imageUrl , res , err := r .createImage (ctx , ici , req , bmh , cd , & cond , log )
210
+ if ! res .IsZero () || err != nil {
211
+ return res , err
212
+ }
213
+
214
+ r .labelReferencedObjectsForBackup (ctx , log , ici , cd )
215
+
216
+ // 4. Host configuration phase
217
+ // Possible reasons for not meeting requirements and exiting reconcile:
218
+ // - HostConfigurationPending: sets this reason in following scenarios:
219
+ // > earlier DataImage instance is still being deleted for some reason (requeue after 30s)
220
+ // > current DataImage was just created less than a second ago so BMO might not be notified yet (requeue after 1s)
221
+ // > image-based-install-managed annotation is not set yet in BMH (no requeue)
222
+ // - HostConfigurationFailed (default): any unexpected errors during this phase will lead to this reason and finish reconcile.
223
+ cond .Reason = v1alpha1 .HostConfigurationFailedReason
224
+ continueReconcile , res , err := r .configureHost (ctx , ici , imageUrl , bmh , & cond , log )
225
+ if ! continueReconcile || ! res .IsZero () || err != nil {
226
+ return res , err
227
+ }
228
+
229
+ // Requirements met, host configured
230
+ cond .Status = corev1 .ConditionTrue
231
+ cond .Reason = v1alpha1 .HostConfigurationSucceededReason
232
+ cond .Message = "configuration image is attached to the referenced host"
233
+
234
+ return ctrl.Result {}, nil
235
+ }
236
+
237
+ func GetClusterConfigDir (namespacesDir , namespace , uid string ) string {
238
+ return filepath .Join (namespacesDir , namespace , uid , FilesDir , ClusterConfigDir )
239
+ }
240
+
241
+ func (r * ImageClusterInstallReconciler ) validateConfiguration (
242
+ ctx context.Context ,
243
+ ici * v1alpha1.ImageClusterInstall ,
244
+ cond * hivev1.ClusterInstallCondition ,
245
+ log logrus.FieldLogger ,
246
+ ) (* hivev1.ClusterDeployment , * bmh_v1alpha1.BareMetalHost , error ) {
177
247
178
248
if ici .Spec .ClusterDeploymentRef == nil || ici .Spec .ClusterDeploymentRef .Name == "" {
179
249
cond .Message = "ClusterDeploymentRef is unset"
180
- return ctrl.Result {}, nil
250
+ log .Error (errors .New (cond .Message ))
251
+ return nil , nil , nil
181
252
}
182
253
183
- clusterDeployment , err := r .getCD (ctx , ici )
254
+ cd , err := r .getCD (ctx , ici )
184
255
if err != nil {
185
256
cond .Message = fmt .Sprintf ("failed to get ClusterDeployment %s/%s" , ici .Namespace , ici .Spec .ClusterDeploymentRef .Name )
186
- log .WithError ( err ). Error (cond . Message )
187
- return ctrl. Result {}, err
257
+ log .Error (err )
258
+ return nil , nil , nil
188
259
}
189
260
190
- if ici .Spec .BareMetalHostRef == nil {
191
- cond .Message = "no BareMetalHostRef set, nothing to do without provided bmh"
192
- return ctrl.Result {}, nil
261
+ if ici .Spec .BareMetalHostRef == nil || ici .Spec .BareMetalHostRef .Name == "" {
262
+ cond .Message = "BareMetalHostRef is unset"
263
+ log .Error (errors .New (cond .Message ))
264
+ return nil , nil , nil
193
265
}
194
266
195
267
bmh , err := r .getBMH (ctx , ici .Spec .BareMetalHostRef )
196
268
if err != nil {
197
269
cond .Message = fmt .Sprintf ("failed to get BareMetalHost %s/%s" , ici .Spec .BareMetalHostRef .Namespace , ici .Spec .BareMetalHostRef .Name )
198
- log .WithError ( err ). Error (cond . Message )
199
- return ctrl. Result {}, err
270
+ log .Error (err )
271
+ return nil , nil , nil
200
272
}
201
273
202
274
// AutomatedCleaningMode is set at the beginning of this flow because we don't want ironic to format the disk
@@ -208,80 +280,113 @@ func (r *ImageClusterInstallReconciler) Reconcile(ctx context.Context, req ctrl.
208
280
cond .Reason = v1alpha1 .ConfigurationFailedReason
209
281
cond .Message = fmt .Sprintf ("failed to disable automated cleaning mode for BareMetalHost %s/%s" , bmh .Namespace , bmh .Name )
210
282
log .WithError (err ).Error (cond .Message )
211
- return ctrl. Result {} , err
283
+ return nil , nil , err
212
284
}
213
285
}
214
286
215
- /////// Requirements not met yet: config validated, starting host validation //////
216
- cond .Reason = v1alpha1 .HostValidationFailedReason
287
+ return cd , bmh , nil
288
+ }
289
+
290
+ func (r * ImageClusterInstallReconciler ) validateHost (
291
+ ctx context.Context ,
292
+ ici * v1alpha1.ImageClusterInstall ,
293
+ bmh * bmh_v1alpha1.BareMetalHost ,
294
+ cond * hivev1.ClusterInstallCondition ,
295
+ log logrus.FieldLogger ,
296
+ ) (ctrl.Result , error ) {
217
297
218
- if res , err := r .validateBMH (ctx , ici , bmh , & cond ); ! res .IsZero () || err != nil {
298
+ if res , err := r .validateBMH (ici , bmh , cond ); ! res .IsZero () || err != nil {
219
299
return res , err
220
300
}
221
301
222
- if err = r .setClusterInstallMetadata (ctx , log , ici , clusterDeployment ); err != nil {
223
- cond .Message = "failed to set ImageClusterInstall data"
224
- log .WithError (err ).Error (cond .Message )
225
- return ctrl.Result {}, err
302
+ if ! bmh .Spec .ExternallyProvisioned {
303
+ log .Infof ("Setting BareMetalHost (%s/%s) ExternallyProvisioned spec" , bmh .Namespace , bmh .Name )
304
+ patch := client .MergeFrom (bmh .DeepCopy ())
305
+ bmh .Spec .ExternallyProvisioned = true
306
+ if err := r .Patch (ctx , bmh , patch ); err != nil {
307
+ return ctrl.Result {}, err
308
+ }
309
+
226
310
}
227
311
228
- /////// Requirements not met yet: host validated, starting image creation //////
229
- cond .Reason = v1alpha1 .ImageCreationFailedReason
312
+ return ctrl.Result {}, nil
313
+ }
314
+
315
+ func (r * ImageClusterInstallReconciler ) createImage (
316
+ ctx context.Context ,
317
+ ici * v1alpha1.ImageClusterInstall ,
318
+ req ctrl.Request ,
319
+ bmh * bmh_v1alpha1.BareMetalHost ,
320
+ cd * hivev1.ClusterDeployment ,
321
+ cond * hivev1.ClusterInstallCondition ,
322
+ log logrus.FieldLogger ,
323
+ ) (string , ctrl.Result , error ) {
230
324
231
- res , err := r .writeInputData (ctx , log , ici , clusterDeployment , bmh )
325
+ res , err := r .writeInputData (ctx , log , ici , cd , bmh )
232
326
if ! res .IsZero () || err != nil {
233
- cond .Reason = v1alpha1 .ImageCreationPendingReason
234
- cond .Message = "could not acquire lock for image data"
235
327
if err != nil {
236
328
cond .Reason = v1alpha1 .ImageCreationFailedReason
237
329
cond .Message = "failed to create image"
238
- log .WithError (err ).Error (cond .Message )
330
+ log .Error (err )
331
+ } else {
332
+ cond .Reason = v1alpha1 .ImageCreationPendingReason
333
+ cond .Message = "could not acquire lock for image data"
239
334
}
240
- return res , err
335
+ return "" , res , err
241
336
}
242
337
243
- r .labelReferencedObjectsForBackup (ctx , log , ici , clusterDeployment )
244
-
245
338
imageUrl , err := url .JoinPath (r .BaseURL , "images" , req .Namespace , fmt .Sprintf ("%s.iso" , ici .ObjectMeta .UID ))
246
339
if err != nil {
247
340
cond .Message = "failed to create image url"
248
341
log .WithError (err ).Error (cond .Message )
249
- return ctrl.Result {}, err
342
+ return "" , ctrl.Result {}, err
250
343
}
251
344
252
- /////// Requirements not met yet: image created, starting host configuration //////
253
- cond .Reason = v1alpha1 .HostConfigurationFailedReason
345
+ return imageUrl , ctrl.Result {}, nil
346
+ }
347
+
348
+ func (r * ImageClusterInstallReconciler ) configureHost (
349
+ ctx context.Context ,
350
+ ici * v1alpha1.ImageClusterInstall ,
351
+ imageUrl string ,
352
+ bmh * bmh_v1alpha1.BareMetalHost ,
353
+ cond * hivev1.ClusterInstallCondition ,
354
+ log logrus.FieldLogger ,
355
+ ) (bool , ctrl.Result , error ) {
356
+
357
+ continueReconcile := false
254
358
255
359
dataImage , res , err := r .ensureBMHDataImage (ctx , log , bmh , imageUrl )
360
+ if ! res .IsZero () {
361
+ cond .Reason = v1alpha1 .HostConfigurationPendingReason
362
+ cond .Message = "previous DataImage is being deleted"
363
+ return continueReconcile , res , nil
364
+ }
256
365
if err != nil {
257
366
cond .Message = "failed to create BareMetalHost DataImage"
258
- if ! res .IsZero () {
259
- cond .Reason = v1alpha1 .HostConfigurationPendingReason
260
- cond .Message = "previous DataImage is being deleted"
261
- }
262
367
log .WithError (err ).Error (cond .Message )
263
- return res , err
368
+ return continueReconcile , ctrl. Result {} , err
264
369
}
265
370
266
371
if dataImage .ObjectMeta .CreationTimestamp .Time .Add (r .Options .DataImageCoolDownPeriod ).After (time .Now ()) {
267
372
// in case the dataImage was created less than a second ago requeue to allow BMO some time to get
268
373
// notified about the newly created DataImage before adding the reboot annotation in updateBMHProvisioningState
269
374
cond .Reason = v1alpha1 .HostConfigurationPendingReason
270
- cond .Message = "Waiting for BareMetalHost to get DataImage "
271
- return ctrl.Result {RequeueAfter : r .Options .DataImageCoolDownPeriod }, err
375
+ cond .Message = "waiting for DataImage to cool down "
376
+ return continueReconcile , ctrl.Result {RequeueAfter : r .Options .DataImageCoolDownPeriod }, nil
272
377
}
273
378
274
379
if err := r .updateBMHProvisioningState (ctx , log , bmh , dataImage ); err != nil {
275
380
cond .Message = "failed to update BareMetalHost provisioning state"
276
381
log .WithError (err ).Error (cond .Message )
277
- return ctrl.Result {}, err
382
+ return continueReconcile , ctrl.Result {}, err
278
383
}
279
384
if ! annotationExists (& bmh .ObjectMeta , ibioManagedBMH ) {
280
385
// TODO: consider replacing this condition with `dataDisk.Status.AttachedImage`
281
386
cond .Reason = v1alpha1 .HostConfigurationPendingReason
282
- cond .Message = fmt .Sprintf ("Waiting for BMH to get %s annotation " , ibioManagedBMH )
387
+ cond .Message = fmt .Sprintf ("waiting for BMH provisioning state to be StateAvailable or StateExternallyProvisioned, current state is: %s " , bmh . Status . Provisioning . State )
283
388
log .Info (cond .Message )
284
- return ctrl.Result {}, nil
389
+ return continueReconcile , ctrl.Result {}, nil
285
390
}
286
391
287
392
if ici .Status .BareMetalHostRef == nil {
@@ -294,24 +399,15 @@ func (r *ImageClusterInstallReconciler) Reconcile(ctx context.Context, req ctrl.
294
399
if err := r .Status ().Patch (ctx , ici , patch ); err != nil {
295
400
cond .Message = "failed to set Status.BareMetalHostRef"
296
401
log .WithError (err ).Error (cond .Message )
297
- return ctrl.Result {}, err
402
+ return continueReconcile , ctrl.Result {}, err
298
403
}
299
404
}
300
405
301
- /////// Requirements met, host configured //////
302
- cond .Status = corev1 .ConditionTrue
303
- cond .Reason = v1alpha1 .HostConfigurationSucceededReason
304
- cond .Message = "Configuration image is attached to the referenced host"
305
-
306
- return ctrl.Result {}, nil
307
- }
308
-
309
- func GetClusterConfigDir (namespacesDir , namespace , uid string ) string {
310
- return filepath .Join (namespacesDir , namespace , uid , FilesDir , ClusterConfigDir )
406
+ continueReconcile = true
407
+ return continueReconcile , ctrl.Result {}, nil
311
408
}
312
409
313
410
func (r * ImageClusterInstallReconciler ) validateBMH (
314
- ctx context.Context ,
315
411
ici * v1alpha1.ImageClusterInstall ,
316
412
bmh * bmh_v1alpha1.BareMetalHost ,
317
413
cond * hivev1.ClusterInstallCondition ) (ctrl.Result , error ) {
@@ -499,13 +595,7 @@ func (r *ImageClusterInstallReconciler) updateBMHProvisioningState(ctx context.C
499
595
if bmh .Status .Provisioning .State != bmh_v1alpha1 .StateAvailable && bmh .Status .Provisioning .State != bmh_v1alpha1 .StateExternallyProvisioned {
500
596
return nil
501
597
}
502
- log .Infof ("Updating BareMetalHost %s/%s provisioning state, current PoweredOn status is: %s" , bmh .Namespace , bmh .Name , bmh .Status .PoweredOn )
503
- if bmh .Status .Provisioning .State == bmh_v1alpha1 .StateAvailable {
504
- if ! bmh .Spec .ExternallyProvisioned {
505
- log .Infof ("Setting BareMetalHost (%s/%s) ExternallyProvisioned spec" , bmh .Namespace , bmh .Name )
506
- bmh .Spec .ExternallyProvisioned = true
507
- }
508
- }
598
+ log .Infof ("BareMetalHost %s/%s PoweredOn status is: %s" , bmh .Namespace , bmh .Name , bmh .Status .PoweredOn )
509
599
if ! bmh .Spec .Online {
510
600
bmh .Spec .Online = true
511
601
log .Infof ("Setting BareMetalHost (%s/%s) spec.Online to true" , bmh .Namespace , bmh .Name )
@@ -532,14 +622,14 @@ func (r *ImageClusterInstallReconciler) ensureBMHDataImage(
532
622
url string ) (* bmh_v1alpha1.DataImage , ctrl.Result , error ) {
533
623
dataImage , err := r .getDataImage (ctx , bmh .Namespace , bmh .Name )
534
624
if err == nil {
535
- if err == nil && ! dataImage .ObjectMeta .DeletionTimestamp .IsZero () {
536
- err = fmt .Errorf ("dataImage %s/%s already exists but is being deleted, probably leftover from previous installation" , bmh .Namespace , bmh .Name )
537
- return dataImage , ctrl.Result {RequeueAfter : 30 * time .Second }, err
625
+ if ! dataImage .ObjectMeta .DeletionTimestamp .IsZero () {
626
+ log .Errorf ("dataImage %s/%s already exists but is being deleted, probably leftover from previous installation" , bmh .Namespace , bmh .Name )
627
+ return dataImage , ctrl.Result {RequeueAfter : 30 * time .Second }, nil
538
628
}
539
629
return dataImage , ctrl.Result {}, nil
540
630
}
541
631
542
- if err != nil && ! errors .IsNotFound (err ) {
632
+ if err != nil && ! k8sapierrors .IsNotFound (err ) {
543
633
return dataImage , ctrl.Result {}, err
544
634
}
545
635
log .Infof ("creating new dataImage for BareMetalHost (%s/%s)" , bmh .Name , bmh .Namespace )
@@ -600,7 +690,7 @@ func (r *ImageClusterInstallReconciler) removeBMHDataImage(ctx context.Context,
600
690
601
691
bmh := & bmh_v1alpha1.BareMetalHost {}
602
692
if err := r .Get (ctx , bmhRef , bmh ); err != nil {
603
- if errors .IsNotFound (err ) {
693
+ if k8sapierrors .IsNotFound (err ) {
604
694
log .Warnf ("Referenced BareMetalHost %s/%s does not exist, not waiting for dataImage deletion" , bmhRef .Namespace , bmhRef .Name )
605
695
return nil , nil
606
696
} else {
@@ -634,7 +724,7 @@ func (r *ImageClusterInstallReconciler) deleteDataImage(ctx context.Context, log
634
724
dataImage := & bmh_v1alpha1.DataImage {}
635
725
636
726
if err := r .Get (ctx , dataImageRef , dataImage ); err != nil {
637
- if errors .IsNotFound (err ) {
727
+ if k8sapierrors .IsNotFound (err ) {
638
728
log .Infof ("Can't find DataImage from BareMetalHost %s/%s, Nothing to remove" , dataImageRef .Namespace , dataImageRef .Name )
639
729
return nil , nil
640
730
}
0 commit comments