-
Notifications
You must be signed in to change notification settings - Fork 1.5k
🐛 Add MachinesUpToDate condition handling in MachinePool controller #13234
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -19,7 +19,9 @@ package machinepool | |
| import ( | ||
| "context" | ||
| "fmt" | ||
| "time" | ||
|
|
||
| metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||
| "k8s.io/utils/ptr" | ||
| ctrl "sigs.k8s.io/controller-runtime" | ||
|
|
||
|
|
@@ -41,7 +43,8 @@ func (r *Reconciler) updateStatus(ctx context.Context, s *scope) error { | |
|
|
||
| setReplicas(s.machinePool, hasMachinePoolMachines, s.machines) | ||
|
|
||
| // TODO: in future add setting conditions here | ||
| // Set MachinesUpToDate condition to surface when machines are not up-to-date with the spec. | ||
| setMachinesUpToDateCondition(ctx, s.machinePool, s.machines, hasMachinePoolMachines) | ||
|
|
||
| return nil | ||
| } | ||
|
|
@@ -73,3 +76,78 @@ func setReplicas(mp *clusterv1.MachinePool, hasMachinePoolMachines bool, machine | |
| mp.Status.AvailableReplicas = ptr.To(availableReplicas) | ||
| mp.Status.UpToDateReplicas = ptr.To(upToDateReplicas) | ||
| } | ||
|
|
||
| // setMachinesUpToDateCondition sets the MachinesUpToDate condition on the MachinePool. | ||
| func setMachinesUpToDateCondition(ctx context.Context, mp *clusterv1.MachinePool, machines []*clusterv1.Machine, hasMachinePoolMachines bool) { | ||
| log := ctrl.LoggerFrom(ctx) | ||
|
|
||
| // For MachinePool providers that don't use Machine objects (like managed pools), | ||
| // we derive the MachinesUpToDate condition from the infrastructure status. | ||
| // If InfrastructureReady is False, machines are not up-to-date. | ||
| if !hasMachinePoolMachines { | ||
| // Check if infrastructure is ready by looking at the initialization status | ||
| if !ptr.Deref(mp.Status.Initialization.InfrastructureProvisioned, false) { | ||
| log.V(4).Info("setting MachinesUpToDate to false for machineless MachinePool: infrastructure not yet provisioned") | ||
| conditions.Set(mp, metav1.Condition{ | ||
| Type: clusterv1.MachinesUpToDateCondition, | ||
| Status: metav1.ConditionFalse, | ||
| Reason: clusterv1.NotUpToDateReason, | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. instead of this, would suggest using MachinePool specific constant. I already see
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. aha! Nice catch! But it looks like it's going to be a separate work as per https://github.com/kubernetes-sigs/cluster-api/blob/main/api/core/v1beta2/machinepool_types.go#L32-L65 |
||
| Message: "Infrastructure is not yet provisioned", | ||
| }) | ||
| return | ||
| } | ||
|
|
||
| conditions.Set(mp, metav1.Condition{ | ||
| Type: clusterv1.MachinesUpToDateCondition, | ||
| Status: metav1.ConditionTrue, | ||
| Reason: clusterv1.UpToDateReason, | ||
ramessesii2 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| }) | ||
| return | ||
| } | ||
|
|
||
| // Only consider Machines that have an UpToDate condition or are older than 10s. | ||
| // This is done to ensure the MachinesUpToDate condition doesn't flicker after a new Machine is created, | ||
| // because it can take a bit until the UpToDate condition is set on a new Machine. | ||
| filteredMachines := make([]*clusterv1.Machine, 0, len(machines)) | ||
| for _, machine := range machines { | ||
| if conditions.Has(machine, clusterv1.MachineUpToDateCondition) || time.Since(machine.CreationTimestamp.Time) > 10*time.Second { | ||
| filteredMachines = append(filteredMachines, machine) | ||
| } | ||
| } | ||
|
|
||
| if len(filteredMachines) == 0 { | ||
| conditions.Set(mp, metav1.Condition{ | ||
| Type: clusterv1.MachinesUpToDateCondition, | ||
| Status: metav1.ConditionTrue, | ||
| Reason: clusterv1.NoReplicasReason, | ||
ramessesii2 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| }) | ||
| return | ||
| } | ||
|
|
||
| upToDateCondition, err := conditions.NewAggregateCondition( | ||
| filteredMachines, clusterv1.MachineUpToDateCondition, | ||
| conditions.TargetConditionType(clusterv1.MachinesUpToDateCondition), | ||
| // Using a custom merge strategy to override reasons applied during merge. | ||
| conditions.CustomMergeStrategy{ | ||
| MergeStrategy: conditions.DefaultMergeStrategy( | ||
| conditions.ComputeReasonFunc(conditions.GetDefaultComputeMergeReasonFunc( | ||
| clusterv1.NotUpToDateReason, | ||
| clusterv1.UpToDateUnknownReason, | ||
| clusterv1.UpToDateReason, | ||
| )), | ||
| ), | ||
| }, | ||
| ) | ||
| if err != nil { | ||
| log.Error(err, "Failed to aggregate Machine's UpToDate conditions") | ||
| conditions.Set(mp, metav1.Condition{ | ||
| Type: clusterv1.MachinesUpToDateCondition, | ||
| Status: metav1.ConditionUnknown, | ||
| Reason: clusterv1.InternalErrorReason, | ||
| Message: "Please check controller logs for errors", | ||
| }) | ||
| return | ||
| } | ||
|
|
||
| conditions.Set(mp, *upToDateCondition) | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I wonder if its also worth adding a message to state that this pool is "machine less" and the infrastructure is not ready. Just to give more context.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That's a user-facing condition and so, isn't that a provider-specific implementation detail?
It might be useful for devs debugging the controller itself, but then they can go to logs as opposed to users going to the infrastructure resource when they see the infra not provisioned message.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In that case adding a log with a high verbosity level may help with the debugging then
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sounds good. Added the log!