Skip to content

Commit b727f28

Browse files
committed
feat: Implement structured logging pattern from CAPM3
This commit introduces a consistent logging pattern following the cluster-api-provider-metal3 PR #2971 approach: - Add logging.go with VerbosityLevelDebug (4) and VerbosityLevelTrace (5) - Define 40+ LogField* constants for consistent structured logging keys - Update all controllers to use the new logging constants - Add trace-level logging for function entry points - Convert inline strings to LogField* constants for type safety - Add LOG_LEVEL_REVIEW_TODO section for future log level decisions - Change SecretManager to return error instead of panic for nil owner - Update tests to match new logging format The structured logging approach improves: - Consistency across the codebase - Log filtering and analysis capabilities - Type safety for log field keys - Debugging and troubleshooting workflows Signed-off-by: Maximilian Rink <maximilian.rink@telekom.de>
1 parent d7991eb commit b727f28

11 files changed

Lines changed: 447 additions & 197 deletions

internal/controller/metal3.io/action_result.go

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ package controllers
33
import (
44
"errors"
55
"math"
6-
"math/rand"
6+
"math/rand/v2"
77
"time"
88

99
metal3api "github.com/metal3-io/baremetal-operator/apis/metal3.io/v1alpha1"
@@ -18,10 +18,6 @@ const (
1818
defaultBackoff = 0.5
1919
)
2020

21-
func init() {
22-
rand.Seed(time.Now().UTC().UnixNano())
23-
}
24-
2521
// actionResult is an interface that encapsulates the result of a Reconcile
2622
// call, as returned by the action corresponding to the current state.
2723
type actionResult interface {

internal/controller/metal3.io/baremetalhost_controller.go

Lines changed: 142 additions & 98 deletions
Large diffs are not rendered by default.

internal/controller/metal3.io/host_state_machine.go

Lines changed: 16 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ func (hsm *hostStateMachine) updateHostStateFrom(ctx context.Context, initialSta
114114
default:
115115
}
116116

117-
info.log.Info("changing provisioning state",
117+
info.log.V(VerbosityLevelDebug).Info("changing provisioning state",
118118
"old", initialState,
119119
"new", hsm.NextState)
120120
now := metav1.Now()
@@ -139,8 +139,8 @@ func (hsm *hostStateMachine) updateHostStateFrom(ctx context.Context, initialSta
139139
// it needs error handling logic that we can't support in
140140
// this function.
141141
if updateBootModeStatus(hsm.Host) {
142-
info.log.Info("saving boot mode",
143-
"new mode", hsm.Host.Status.Provisioning.BootMode)
142+
info.log.V(VerbosityLevelDebug).Info("saving boot mode",
143+
"newMode", hsm.Host.Status.Provisioning.BootMode)
144144
}
145145
default:
146146
}
@@ -189,7 +189,7 @@ func (hsm *hostStateMachine) ReconcileState(ctx context.Context, info *reconcile
189189
}
190190

191191
if hsm.checkInitiateDelete(info.log) {
192-
info.log.Info("Initiating host deletion")
192+
info.log.V(VerbosityLevelDebug).Info("initiating host deletion")
193193
return actionComplete{}
194194
}
195195

@@ -206,7 +206,8 @@ func (hsm *hostStateMachine) ReconcileState(ctx context.Context, info *reconcile
206206
return stateHandler(ctx, info)
207207
}
208208

209-
info.log.Info("No handler found for state", "state", initialState)
209+
info.log.Info("no handler found for state",
210+
LogFieldProvisioningState, initialState)
210211
return actionError{fmt.Errorf("no handler found for state \"%s\"", initialState)}
211212
}
212213

@@ -235,7 +236,7 @@ func (hsm *hostStateMachine) checkInitiateDelete(log logr.Logger) bool {
235236
case metal3api.StateProvisioning, metal3api.StateProvisioned:
236237
if hsm.Host.OperationalStatus() == metal3api.OperationalStatusDetached {
237238
if delayDeleteForDetachedHost(hsm.Host) {
238-
log.Info("Delaying detached host deletion")
239+
log.V(VerbosityLevelDebug).Info("delaying detached host deletion")
239240
deleteDelayedForDetached.Inc()
240241
return false
241242
}
@@ -247,7 +248,7 @@ func (hsm *hostStateMachine) checkInitiateDelete(log logr.Logger) bool {
247248
case metal3api.StateExternallyProvisioned:
248249
if hsm.Host.OperationalStatus() == metal3api.OperationalStatusDetached {
249250
if delayDeleteForDetachedHost(hsm.Host) {
250-
log.Info("Delaying detached host deletion")
251+
log.V(VerbosityLevelDebug).Info("delaying detached host deletion")
251252
deleteDelayedForDetached.Inc()
252253
return false
253254
}
@@ -288,29 +289,6 @@ func hasDetachedAnnotation(host *metal3api.BareMetalHost) bool {
288289
return false
289290
}
290291

291-
// skipReconcileSubresource checks if the applied annotations and status
292-
// prevent a subresource (HFS/HFC) from being reconciled.
293-
func skipReconcileSubresource(host *metal3api.BareMetalHost, logger logr.Logger) bool {
294-
switch host.Status.Provisioning.State {
295-
case "", metal3api.StateUnmanaged:
296-
logger.Info("host is unmanaged or not yet enrolled, skipping reconcile")
297-
return true
298-
default:
299-
}
300-
301-
if host.Status.OperationalStatus == metal3api.OperationalStatusDetached || hasDetachedAnnotation(host) {
302-
logger.Info("host is currently detached, skipping reconcile")
303-
return true
304-
}
305-
306-
if _, ok := host.Annotations[metal3api.PausedAnnotation]; ok {
307-
logger.Info("host is currently paused, skipping reconcile")
308-
return true
309-
}
310-
311-
return false
312-
}
313-
314292
func delayDeleteForDetachedHost(host *metal3api.BareMetalHost) bool {
315293
annotations := host.GetAnnotations()
316294
args := metal3api.DetachedAnnotationArguments{}
@@ -342,13 +320,14 @@ func (hsm *hostStateMachine) checkDetachedHost(ctx context.Context, info *reconc
342320
case metal3api.StateProvisioned, metal3api.StateExternallyProvisioned, metal3api.StateReady, metal3api.StateAvailable:
343321
return hsm.Reconciler.detachHost(ctx, hsm.Provisioner, info)
344322
default:
345-
info.log.Info("host cannot be detached yet, waiting for the current operation to finish", "provisioningState", info.host.Status.Provisioning.State)
323+
info.log.V(VerbosityLevelDebug).Info("host cannot be detached yet, waiting for the current operation to finish",
324+
LogFieldProvisioningState, info.host.Status.Provisioning.State)
346325
}
347326
}
348327
if info.host.Status.ErrorType == metal3api.DetachError {
349328
clearError(info.host)
350329
hsm.Host.Status.ErrorCount = 0
351-
info.log.Info("removed detach error")
330+
info.log.V(VerbosityLevelDebug).Info("removed detach error")
352331
return actionUpdate{}
353332
}
354333
if info.host.OperationalStatus() == metal3api.OperationalStatusDetached {
@@ -357,7 +336,7 @@ func (hsm *hostStateMachine) checkDetachedHost(ctx context.Context, info *reconc
357336
newStatus = metal3api.OperationalStatusError
358337
}
359338
info.host.SetOperationalStatus(newStatus)
360-
info.log.Info("removed detached status")
339+
info.log.V(VerbosityLevelDebug).Info("removed detached status")
361340
return actionUpdate{}
362341
}
363342
return nil
@@ -386,7 +365,8 @@ func (hsm *hostStateMachine) ensureRegistered(ctx context.Context, info *reconci
386365
default:
387366
if hsm.Host.Status.ErrorType == metal3api.RegistrationError ||
388367
!hsm.Host.Status.GoodCredentials.Match(*info.bmcCredsSecret) {
389-
info.log.Info("retrying registration", "LastError", hsm.Host.Status.ErrorMessage)
368+
info.log.V(VerbosityLevelDebug).Info("retrying registration",
369+
"lastError", hsm.Host.Status.ErrorMessage)
390370
recordStateBegin(hsm.Host, metal3api.StateRegistering, metav1.Now())
391371
}
392372
}
@@ -603,8 +583,7 @@ func (hsm *hostStateMachine) handleDeprovisioning(ctx context.Context, info *rec
603583
// If the provisioner gives up deprovisioning and
604584
// deletion has been requested, continue to delete.
605585
if hsm.Host.Status.ErrorCount > retryCount {
606-
info.log.Info("Giving up on host clean up after 3 attempts. The host may still be operational " +
607-
"and cause issues in your clusters. You should clean it up manually now.")
586+
info.log.Info("giving up on host clean up after 3 attempts, the host may still be operational and cause issues in your clusters - clean it up manually")
608587
hsm.NextState = metal3api.StatePoweringOffBeforeDelete
609588
info.postSaveCallbacks = append(info.postSaveCallbacks, deleteWithoutDeprov.Inc)
610589
return actionComplete{}
@@ -640,7 +619,7 @@ func (hsm *hostStateMachine) handlePoweringOffBeforeDelete(ctx context.Context,
640619
// If the provisioner gives up deprovisioning and
641620
// deletion has been requested, continue to delete.
642621
if hsm.Host.Status.ErrorCount > retryCount {
643-
info.log.Info("Giving up on host power off after 3 attempts.")
622+
info.log.Info("giving up on host power off after 3 attempts")
644623
return skipToDelete()
645624
}
646625
case actionError:

internal/controller/metal3.io/hostfirmwarecomponents_controller.go

Lines changed: 32 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -95,8 +95,8 @@ func (info *rhfcInfo) publishEvent(reason, message string) {
9595

9696
// Reconcile handles changes to HostFirmwareComponents resources.
9797
func (r *HostFirmwareComponentsReconciler) Reconcile(ctx context.Context, req ctrl.Request) (result ctrl.Result, err error) {
98-
reqLogger := r.Log.WithValues("hostfirmwarecomponents", req.NamespacedName)
99-
reqLogger.Info("start")
98+
reqLogger := r.Log.WithValues(LogFieldHost, req.NamespacedName)
99+
reqLogger.V(VerbosityLevelTrace).Info("reconciliation started")
100100

101101
// Get the corresponding baremetalhost in this namespace, if one doesn't exist don't continue processing
102102
bmh := &metal3api.BareMetalHost{}
@@ -109,17 +109,24 @@ func (r *HostFirmwareComponentsReconciler) Reconcile(ctx context.Context, req ct
109109
return ctrl.Result{Requeue: true, RequeueAfter: resourceNotAvailableRetryDelay}, err
110110
}
111111

112-
if skipReconcileSubresource(bmh, reqLogger) {
113-
// We'll get notified on BMH changes, no need to reconcile soon
112+
if hasDetachedAnnotation(bmh) {
113+
reqLogger.V(VerbosityLevelDebug).Info("the host is detached, not running hostfirmwarecomponents reconciler")
114114
return ctrl.Result{Requeue: true, RequeueAfter: unmanagedRetryDelay}, nil
115115
}
116+
// If the reconciliation is paused, requeue
117+
annotations := bmh.GetAnnotations()
118+
119+
if _, ok := annotations[metal3api.PausedAnnotation]; ok {
120+
reqLogger.V(VerbosityLevelDebug).Info("host is paused, no work to do")
121+
return ctrl.Result{Requeue: true, RequeueAfter: subResourceNotReadyRetryDelay}, nil
122+
}
116123

117124
// Fetch the HostFirmwareComponents
118125
hfc := &metal3api.HostFirmwareComponents{}
119126
if err = r.Get(ctx, req.NamespacedName, hfc); err != nil {
120127
// The HFC resource may have been deleted
121128
if k8serrors.IsNotFound(err) {
122-
reqLogger.Info("HostFirmwareComponents not found")
129+
reqLogger.V(VerbosityLevelDebug).Info("HostFirmwareComponents not found")
123130
return ctrl.Result{}, nil
124131
}
125132
// Error reading the object - requeue the request.
@@ -141,24 +148,30 @@ func (r *HostFirmwareComponentsReconciler) Reconcile(ctx context.Context, req ct
141148
} else {
142149
msg = err.Error()
143150
}
144-
reqLogger.Info("provisioner is not ready", "Error", msg, "RequeueAfter", provisionerRetryDelay)
151+
reqLogger.Info("provisioner is not ready",
152+
LogFieldError, msg,
153+
LogFieldRequeueAfter, provisionerRetryDelay)
145154
return ctrl.Result{Requeue: true, RequeueAfter: provisionerRetryDelay}, nil
146155
}
147156

148-
info.log.V(1).Info("retrieving firmware components and saving to resource", "Node", bmh.Status.Provisioning.ID)
157+
info.log.V(VerbosityLevelDebug).Info("retrieving firmware components and saving to resource",
158+
LogFieldNode, bmh.Status.Provisioning.ID)
149159
// Check ironic for the components information if possible
150160
components, err := prov.GetFirmwareComponents(ctx)
151161

152162
if err != nil {
153163
if errors.Is(err, provisioner.ErrFirmwareUpdateUnsupported) {
154164
return ctrl.Result{}, nil
155165
}
156-
reqLogger.Info("provisioner returns error", "Error", err.Error(), "RequeueAfter", provisionerRetryDelay)
166+
reqLogger.Info("provisioner returns error",
167+
LogFieldError, err.Error(),
168+
LogFieldRequeueAfter, provisionerRetryDelay)
157169
return ctrl.Result{Requeue: true, RequeueAfter: provisionerRetryDelay}, nil
158170
}
159171

160172
if err = r.updateHostFirmware(ctx, info, components); err != nil {
161-
info.log.Info("updateHostFirmware returned error")
173+
info.log.Info("updateHostFirmware returned error",
174+
LogFieldError, err.Error())
162175
return ctrl.Result{}, fmt.Errorf("could not update hostfirmwarecomponents: %w", err)
163176
}
164177

@@ -223,7 +236,7 @@ func (r *HostFirmwareComponentsReconciler) updateHostFirmware(ctx context.Contex
223236

224237
// Update Status if has changed
225238
if dirty {
226-
info.log.Info("Status for HostFirmwareComponents changed")
239+
info.log.V(VerbosityLevelDebug).Info("status for HostFirmwareComponents changed")
227240
info.hfc.Status = *newStatus.DeepCopy()
228241

229242
t := metav1.Now()
@@ -247,7 +260,7 @@ func (r *HostFirmwareComponentsReconciler) SetupWithManager(mgr ctrl.Manager, ma
247260
}
248261

249262
func (r *HostFirmwareComponentsReconciler) updateEventHandler(e event.UpdateEvent) bool {
250-
r.Log.Info("hostfirmwarecomponents in event handler")
263+
r.Log.V(VerbosityLevelTrace).Info("hostfirmwarecomponents in event handler")
251264

252265
return e.ObjectNew.GetGeneration() != e.ObjectOld.GetGeneration()
253266
}
@@ -264,12 +277,16 @@ func (r *HostFirmwareComponentsReconciler) validateHostFirmwareComponents(info *
264277
}
265278

266279
func (r *HostFirmwareComponentsReconciler) publishEvent(ctx context.Context, request ctrl.Request, event corev1.Event) {
267-
reqLogger := r.Log.WithValues("hostfirmwarecomponents", request.NamespacedName)
268-
reqLogger.Info("publishing event", "reason", event.Reason, "message", event.Message)
280+
reqLogger := r.Log.WithValues(LogFieldHost, request.NamespacedName)
281+
reqLogger.V(VerbosityLevelDebug).Info("publishing event",
282+
LogFieldReason, event.Reason,
283+
"message", event.Message)
269284
err := r.Create(ctx, &event)
270285
if err != nil {
271-
reqLogger.Info("failed to record event, ignoring",
272-
"reason", event.Reason, "message", event.Message, "error", err)
286+
reqLogger.V(VerbosityLevelDebug).Info("failed to record event, ignoring",
287+
LogFieldReason, event.Reason,
288+
"message", event.Message,
289+
LogFieldError, err)
273290
}
274291
}
275292

internal/controller/metal3.io/hostfirmwaresettings_controller.go

Lines changed: 24 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -113,21 +113,21 @@ func (info *rInfo) publishEvent(reason, message string) {
113113
// For more details, check Reconcile and its Result here:
114114
// - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.6.4/pkg/reconcile
115115
func (r *HostFirmwareSettingsReconciler) Reconcile(ctx context.Context, req ctrl.Request) (result ctrl.Result, err error) {
116-
reqLogger := r.Log.WithValues("hostfirmwaresettings", req.NamespacedName)
117-
reqLogger.Info("start")
116+
reqLogger := r.Log.WithValues(LogFieldHost, req.NamespacedName)
117+
reqLogger.V(VerbosityLevelTrace).Info("reconciliation started")
118118

119119
// Get the corresponding baremetalhost in this namespace, if one doesn't exist don't continue processing
120120
bmh := &metal3api.BareMetalHost{}
121121
if err = r.Get(ctx, req.NamespacedName, bmh); err != nil {
122-
reqLogger.Info("could not get baremetalhost, not running reconciler")
122+
reqLogger.V(VerbosityLevelDebug).Info("could not get baremetalhost, not running reconciler")
123123
if k8serrors.IsNotFound(err) {
124124
return ctrl.Result{}, nil
125125
}
126126
return ctrl.Result{Requeue: true, RequeueAfter: resourceNotAvailableRetryDelay}, nil
127127
}
128128

129-
if skipReconcileSubresource(bmh, reqLogger) {
130-
// We'll get notified on BMH changes, no need to reconcile soon
129+
if hasDetachedAnnotation(bmh) {
130+
reqLogger.V(VerbosityLevelDebug).Info("the host is detached, not running reconciler")
131131
return ctrl.Result{Requeue: true, RequeueAfter: unmanagedRetryDelay}, nil
132132
}
133133

@@ -137,7 +137,7 @@ func (r *HostFirmwareSettingsReconciler) Reconcile(ctx context.Context, req ctrl
137137
if err = r.Get(ctx, req.NamespacedName, hfs); err != nil {
138138
// The HFS resource may have been deleted
139139
if k8serrors.IsNotFound(err) {
140-
reqLogger.Info("hostFirmwareSettings not found")
140+
reqLogger.V(VerbosityLevelDebug).Info("hostFirmwareSettings not found")
141141
return ctrl.Result{Requeue: true, RequeueAfter: resourceNotAvailableRetryDelay}, nil
142142
}
143143
// Error reading the object - requeue the request.
@@ -158,16 +158,21 @@ func (r *HostFirmwareSettingsReconciler) Reconcile(ctx context.Context, req ctrl
158158
} else {
159159
msg = err.Error()
160160
}
161-
reqLogger.Info("provisioner is not ready", "Error", msg, "RequeueAfter", provisionerRetryDelay)
161+
reqLogger.Info("provisioner is not ready",
162+
LogFieldError, msg,
163+
LogFieldRequeueAfter, provisionerRetryDelay)
162164
return ctrl.Result{Requeue: true, RequeueAfter: provisionerRetryDelay}, nil
163165
}
164166

165-
info.log.V(1).Info("retrieving firmware settings and saving to resource", "Node", bmh.Status.Provisioning.ID)
167+
info.log.V(VerbosityLevelDebug).Info("retrieving firmware settings and saving to resource",
168+
LogFieldNode, bmh.Status.Provisioning.ID)
166169

167170
// Get the current settings and schema, retry if provisioner returns error
168171
currentSettings, schema, err := prov.GetFirmwareSettings(ctx, true)
169172
if err != nil {
170-
reqLogger.Info("provisioner returns error", "Error", err.Error(), "RequeueAfter", provisionerRetryDelay)
173+
reqLogger.Info("provisioner returns error",
174+
LogFieldError, err.Error(),
175+
LogFieldRequeueAfter, provisionerRetryDelay)
171176
return ctrl.Result{Requeue: true, RequeueAfter: provisionerRetryDelay}, nil
172177
}
173178

@@ -231,7 +236,10 @@ func (r *HostFirmwareSettingsReconciler) updateStatus(ctx context.Context, info
231236
for k, v := range info.hfs.Spec.Settings {
232237
if statusVal, ok := newStatus.Settings[k]; ok {
233238
if v.String() != statusVal {
234-
info.log.Info("spec value different than status", "name", k, "specvalue", v.String(), "statusvalue", statusVal)
239+
info.log.V(VerbosityLevelDebug).Info("spec value different than status",
240+
"name", k,
241+
"specValue", v.String(),
242+
"statusValue", statusVal)
235243
specMismatch = true
236244
break
237245
}
@@ -281,7 +289,7 @@ func (r *HostFirmwareSettingsReconciler) updateStatus(ctx context.Context, info
281289

282290
// Update Status if it has changed
283291
if dirty {
284-
info.log.Info("Status has changed")
292+
info.log.V(VerbosityLevelDebug).Info("status has changed")
285293
info.hfs.Status = *newStatus.DeepCopy()
286294

287295
t := metav1.Now()
@@ -293,15 +301,15 @@ func (r *HostFirmwareSettingsReconciler) updateStatus(ctx context.Context, info
293301

294302
// Get a firmware schema that matches the host vendor or create one if it doesn't exist.
295303
func (r *HostFirmwareSettingsReconciler) getOrCreateFirmwareSchema(ctx context.Context, info *rInfo, schema map[string]metal3api.SettingSchema) (fSchema *metal3api.FirmwareSchema, err error) {
296-
info.log.V(1).Info("getting firmwareSchema")
304+
info.log.V(VerbosityLevelTrace).Info("getting firmwareSchema")
297305

298306
schemaName := GetSchemaName(schema)
299307
firmwareSchema := &metal3api.FirmwareSchema{}
300308

301309
// If a schema exists that matches, use that, otherwise create a new one
302310
if err = r.Get(ctx, client.ObjectKey{Namespace: info.hfs.ObjectMeta.Namespace, Name: schemaName},
303311
firmwareSchema); err == nil {
304-
info.log.V(1).Info("found existing firmwareSchema resource")
312+
info.log.V(VerbosityLevelDebug).Info("found existing firmwareSchema resource")
305313

306314
// Add hfs as owner so can be garbage collected on delete, if already an owner it will just be overwritten
307315
if err = controllerutil.SetOwnerReference(info.hfs, firmwareSchema, r.Scheme()); err != nil {
@@ -353,7 +361,7 @@ func (r *HostFirmwareSettingsReconciler) getOrCreateFirmwareSchema(ctx context.C
353361
return nil, err
354362
}
355363

356-
info.log.Info("created new firmwareSchema resource")
364+
info.log.V(VerbosityLevelDebug).Info("created new firmwareSchema resource")
357365

358366
return firmwareSchema, nil
359367
}
@@ -372,11 +380,11 @@ func (r *HostFirmwareSettingsReconciler) SetupWithManager(mgr ctrl.Manager, maxC
372380
}
373381

374382
func (r *HostFirmwareSettingsReconciler) updateEventHandler(e event.UpdateEvent) bool {
375-
r.Log.Info("hostfirmwaresettings in event handler")
383+
r.Log.V(VerbosityLevelTrace).Info("hostfirmwaresettings in event handler")
376384

377385
// If the update increased the resource Generation then let's process it
378386
if e.ObjectNew.GetGeneration() != e.ObjectOld.GetGeneration() {
379-
r.Log.Info("returning true as generation changed from event handler")
387+
r.Log.V(VerbosityLevelDebug).Info("returning true as generation changed from event handler")
380388
return true
381389
}
382390

0 commit comments

Comments
 (0)