Skip to content

Commit b3fa6eb

Browse files
authored
chore: Avoid multiple logs messages for the node health controller (#1888)
1 parent 79fe772 commit b3fa6eb

File tree

2 files changed

+43
-5
lines changed

2 files changed

+43
-5
lines changed

pkg/controllers/node/health/controller.go

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import (
2323

2424
"github.com/samber/lo"
2525
corev1 "k8s.io/api/core/v1"
26+
"k8s.io/apimachinery/pkg/api/equality"
2627
"k8s.io/apimachinery/pkg/types"
2728
"k8s.io/apimachinery/pkg/util/intstr"
2829

@@ -120,10 +121,18 @@ func (c *Controller) Reconcile(ctx context.Context, node *corev1.Node) (reconcil
120121
if err := c.annotateTerminationGracePeriod(ctx, nodeClaim); err != nil {
121122
return reconcile.Result{}, client.IgnoreNotFound(err)
122123
}
124+
125+
return c.deleteNodeClaim(ctx, nodeClaim, node, unhealthyNodeCondition)
126+
}
127+
128+
// deleteNodeClaim removes the NodeClaim from the api-server
129+
func (c *Controller) deleteNodeClaim(ctx context.Context, nodeClaim *v1.NodeClaim, node *corev1.Node, unhealthyNodeCondition *corev1.NodeCondition) (reconcile.Result, error) {
130+
if !nodeClaim.DeletionTimestamp.IsZero() {
131+
return reconcile.Result{}, nil
132+
}
123133
if err := c.kubeClient.Delete(ctx, nodeClaim); err != nil {
124134
return reconcile.Result{}, client.IgnoreNotFound(err)
125135
}
126-
127136
// The deletion timestamp has successfully been set for the Node, update relevant metrics.
128137
log.FromContext(ctx).V(1).Info("deleting unhealthy node")
129138
metrics.NodeClaimsDisruptedTotal.Inc(map[string]string{
@@ -155,11 +164,22 @@ func (c *Controller) findUnhealthyConditions(node *corev1.Node) (nc *corev1.Node
155164
}
156165

157166
func (c *Controller) annotateTerminationGracePeriod(ctx context.Context, nodeClaim *v1.NodeClaim) error {
167+
if expirationTimeString, exists := nodeClaim.ObjectMeta.Annotations[v1.NodeClaimTerminationTimestampAnnotationKey]; exists {
168+
expirationTime, err := time.Parse(time.RFC3339, expirationTimeString)
169+
if err == nil && expirationTime.Before(c.clock.Now()) {
170+
return nil
171+
}
172+
}
173+
158174
stored := nodeClaim.DeepCopy()
159-
nodeClaim.ObjectMeta.Annotations = lo.Assign(nodeClaim.ObjectMeta.Annotations, map[string]string{v1.NodeClaimTerminationTimestampAnnotationKey: c.clock.Now().Format(time.RFC3339)})
175+
terminationTime := c.clock.Now().Format(time.RFC3339)
176+
nodeClaim.ObjectMeta.Annotations = lo.Assign(nodeClaim.ObjectMeta.Annotations, map[string]string{v1.NodeClaimTerminationTimestampAnnotationKey: terminationTime})
160177

161-
if err := c.kubeClient.Patch(ctx, nodeClaim, client.MergeFrom(stored)); err != nil {
162-
return err
178+
if !equality.Semantic.DeepEqual(stored, nodeClaim) {
179+
if err := c.kubeClient.Patch(ctx, nodeClaim, client.MergeFrom(stored)); err != nil {
180+
return err
181+
}
182+
log.FromContext(ctx).WithValues(v1.NodeClaimTerminationTimestampAnnotationKey, terminationTime).Info("annotated nodeclaim")
163183
}
164184

165185
return nil

pkg/controllers/node/health/suite_test.go

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import (
2424

2525
. "github.com/onsi/ginkgo/v2"
2626
. "github.com/onsi/gomega"
27+
"github.com/samber/lo"
2728
corev1 "k8s.io/api/core/v1"
2829
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2930
clock "k8s.io/utils/clock/testing"
@@ -171,7 +172,7 @@ var _ = Describe("Node Health", func() {
171172
Expect(nodeClaim.Annotations).To(HaveKeyWithValue(v1.NodeClaimTerminationTimestampAnnotationKey, fakeClock.Now().Format(time.RFC3339)))
172173
})
173174
It("should not respect termination grace period if set on the nodepool", func() {
174-
nodeClaim.Spec.TerminationGracePeriod = &metav1.Duration{Duration: time.Minute}
175+
nodeClaim.ObjectMeta.Annotations = lo.Assign(nodeClaim.ObjectMeta.Annotations, map[string]string{v1.NodeClaimTerminationTimestampAnnotationKey: fakeClock.Now().Add(120 * time.Minute).Format(time.RFC3339)})
175176
node.Status.Conditions = append(node.Status.Conditions, corev1.NodeCondition{
176177
Type: "BadNode",
177178
Status: corev1.ConditionFalse,
@@ -186,6 +187,23 @@ var _ = Describe("Node Health", func() {
186187
nodeClaim = ExpectExists(ctx, env.Client, nodeClaim)
187188
Expect(nodeClaim.Annotations).To(HaveKeyWithValue(v1.NodeClaimTerminationTimestampAnnotationKey, fakeClock.Now().Format(time.RFC3339)))
188189
})
190+
It("should not update termination grace period if set before the current time", func() {
191+
terminationTime := fakeClock.Now().Add(-3 * time.Minute).Format(time.RFC3339)
192+
nodeClaim.ObjectMeta.Annotations = lo.Assign(nodeClaim.ObjectMeta.Annotations, map[string]string{v1.NodeClaimTerminationTimestampAnnotationKey: terminationTime})
193+
node.Status.Conditions = append(node.Status.Conditions, corev1.NodeCondition{
194+
Type: "BadNode",
195+
Status: corev1.ConditionFalse,
196+
// We expect the last transition for HealthyNode condition to wait 30 minutes
197+
LastTransitionTime: metav1.Time{Time: time.Now()},
198+
})
199+
fakeClock.Step(60 * time.Minute)
200+
ExpectApplied(ctx, env.Client, nodePool, nodeClaim, node)
201+
// Determine to delete unhealthy nodes
202+
ExpectObjectReconciled(ctx, env.Client, healthController, node)
203+
204+
nodeClaim = ExpectExists(ctx, env.Client, nodeClaim)
205+
Expect(nodeClaim.Annotations).To(HaveKeyWithValue(v1.NodeClaimTerminationTimestampAnnotationKey, terminationTime))
206+
})
189207
It("should return the requeue interval for the condition closest to its terminationDuration", func() {
190208
cloudProvider.RepairPolicy = []cloudprovider.RepairPolicy{
191209
{

0 commit comments

Comments
 (0)