Skip to content

feat(nodeclaim): use PodScheduled's status for lastPodEventTime #2054

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pkg/controllers/nodeclaim/disruption/consolidation.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ func (c *Consolidation) Reconcile(ctx context.Context, nodePool *v1.NodePool, no
timeToCheck := lo.Ternary(!nodeClaim.Status.LastPodEventTime.IsZero(), nodeClaim.Status.LastPodEventTime.Time, initialized.LastTransitionTime.Time)

// Consider a node consolidatable by looking at the lastPodEvent status field on the nodeclaim.
// This time is based on the PodScheduled condition's lastTransitionTime or pod is being removed(terminal or terminating)
if c.clock.Since(timeToCheck) < lo.FromPtr(nodePool.Spec.Disruption.ConsolidateAfter.Duration) {
if hasConsolidatableCondition {
_ = nodeClaim.StatusConditions().Clear(v1.ConditionTypeConsolidatable)
Expand Down
52 changes: 42 additions & 10 deletions pkg/controllers/nodeclaim/podevents/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ import (
"sigs.k8s.io/controller-runtime/pkg/predicate"
"sigs.k8s.io/controller-runtime/pkg/reconcile"

"github.com/samber/lo"

"sigs.k8s.io/karpenter/pkg/cloudprovider"
nodeutils "sigs.k8s.io/karpenter/pkg/utils/node"
nodeclaimutils "sigs.k8s.io/karpenter/pkg/utils/nodeclaim"
Expand All @@ -43,7 +45,7 @@ import (
// 15 second validation period, so that we can ensure that we invalidate consolidation commands that are decided while we're de-duping pod events.
const dedupeTimeout = 10 * time.Second

// Podevents is a nodeclaim controller that deletes adds the lastPodEvent status onto the nodeclaim
// Podevents is a nodeclaim controller that updates the lastPodEvent status based on PodScheduled condition
type Controller struct {
clock clock.Clock
kubeClient client.Client
Expand Down Expand Up @@ -81,14 +83,35 @@ func (c *Controller) Reconcile(ctx context.Context, pod *corev1.Pod) (reconcile.
return reconcile.Result{}, nil
}

// If we've set the lastPodEvent before and it hasn't been before the timeout, don't do anything
if !nc.Status.LastPodEventTime.Time.IsZero() && c.clock.Since(nc.Status.LastPodEventTime.Time) < dedupeTimeout {
var eventTime time.Time
// If pod is being removed (terminal or terminating), use current time
if podutils.IsTerminal(pod) || podutils.IsTerminating(pod) {
eventTime = c.clock.Now()
} else {
// Otherwise check for PodScheduled condition
if cond, ok := lo.Find(pod.Status.Conditions, func(c corev1.PodCondition) bool {
return c.Type == corev1.PodScheduled && c.Status == corev1.ConditionTrue
}); ok {
eventTime = cond.LastTransitionTime.Time
}
}

// If we don't have a valid time, skip
if eventTime.IsZero() {
return reconcile.Result{}, nil
}

// If we've set the lastPodEvent before
// and the stored lastPodEventTime is after(max) eventTime
// and it hasn't been before the timeout, don't do anything
if !nc.Status.LastPodEventTime.Time.IsZero() &&
nc.Status.LastPodEventTime.Time.After(eventTime) &&
c.clock.Since(nc.Status.LastPodEventTime.Time) < dedupeTimeout {
return reconcile.Result{}, nil
}

// otherwise, set the pod event time to now
stored := nc.DeepCopy()
nc.Status.LastPodEventTime.Time = c.clock.Now()
nc.Status.LastPodEventTime.Time = eventTime
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This needs to be max otherwise it's going to oscillate around as different pods get reconciled. Also, I think still applying a dedupe timeout makes sense -- basically, if the current stored time is within (say 10s) of the new time, then don't update it

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ammended

if !equality.Semantic.DeepEqual(stored, nc) {
if err = c.kubeClient.Status().Patch(ctx, nc, client.MergeFrom(stored)); err != nil {
return reconcile.Result{}, client.IgnoreNotFound(err)
Expand All @@ -97,23 +120,32 @@ func (c *Controller) Reconcile(ctx context.Context, pod *corev1.Pod) (reconcile.
return reconcile.Result{}, nil
}

//nolint:gocyclo
func (c *Controller) Register(ctx context.Context, m manager.Manager) error {
return controllerruntime.NewControllerManagedBy(m).
Named("nodeclaim.podevents").
For(&corev1.Pod{}).
WithEventFilter(predicate.TypedFuncs[client.Object]{
// If a pod is bound to a node or goes terminal
UpdateFunc: func(e event.TypedUpdateEvent[client.Object]) bool {
oldPod := (e.ObjectOld).(*corev1.Pod)
newPod := (e.ObjectNew).(*corev1.Pod)
// if this is a newly bound pod
bound := oldPod.Spec.NodeName == "" && newPod.Spec.NodeName != ""
// Check for pod scheduling changes
oldCond, oldOk := lo.Find(oldPod.Status.Conditions, func(c corev1.PodCondition) bool {
return c.Type == corev1.PodScheduled
})
newCond, newOk := lo.Find(newPod.Status.Conditions, func(c corev1.PodCondition) bool {
return c.Type == corev1.PodScheduled
})
// Trigger on PodScheduled condition changes
if (!oldOk && newOk) || (oldOk && newOk && (oldCond.Status != newCond.Status || !oldCond.LastTransitionTime.Equal(&newCond.LastTransitionTime))) {
return true
}
// if this is a newly terminal pod
terminal := (newPod.Spec.NodeName != "" && !podutils.IsTerminal(oldPod) && podutils.IsTerminal(newPod))
// if this is a newly terminating pod
terminating := (newPod.Spec.NodeName != "" && !podutils.IsTerminating(oldPod) && podutils.IsTerminating(newPod))
// return true if it was bound to a node, went terminal, or went terminating
return bound || terminal || terminating
// return true if it went terminal, or went terminating
return terminal || terminating
},
}).
Complete(reconcile.AsReconciler(m.GetClient(), c))
Expand Down
112 changes: 106 additions & 6 deletions pkg/controllers/nodeclaim/podevents/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,13 @@ var _ = Describe("PodEvents", func() {
})
})
It("should set the nodeclaim lastPodEvent", func() {
ExpectApplied(ctx, env.Client, nodePool, nodeClaim, node, pod)
timeToCheck := fakeClock.Now().Truncate(time.Second)
pod.Status.Conditions = []corev1.PodCondition{{
Type: corev1.PodScheduled,
Status: corev1.ConditionTrue,
LastTransitionTime: metav1.Time{Time: timeToCheck},
}}
ExpectApplied(ctx, env.Client, nodePool, nodeClaim, node, pod)
ExpectObjectReconciled(ctx, env.Client, podEventsController, pod)

nodeClaim = ExpectExists(ctx, env.Client, nodeClaim)
Expand All @@ -113,40 +118,135 @@ var _ = Describe("PodEvents", func() {
nodeClaim = ExpectExists(ctx, env.Client, nodeClaim)
Expect(nodeClaim.Status.LastPodEventTime.Time).To(BeZero())
})
It("should not set the nodeclaim lastPodEvent when pod has no PodScheduled condition", func() {
ExpectApplied(ctx, env.Client, nodePool, node, nodeClaim, pod)
ExpectObjectReconciled(ctx, env.Client, podEventsController, pod)

nodeClaim = ExpectExists(ctx, env.Client, nodeClaim)
Expect(nodeClaim.Status.LastPodEventTime.Time.IsZero()).To(BeTrue())
})
It("should not set the nodeclaim lastPodEvent when PodScheduled condition is not True", func() {
pod.Status.Conditions = []corev1.PodCondition{{
Type: corev1.PodScheduled,
Status: corev1.ConditionFalse, // PodScheduled false
LastTransitionTime: metav1.Time{Time: fakeClock.Now()},
}}
ExpectApplied(ctx, env.Client, nodePool, node, nodeClaim, pod)
ExpectObjectReconciled(ctx, env.Client, podEventsController, pod)

nodeClaim = ExpectExists(ctx, env.Client, nodeClaim)
Expect(nodeClaim.Status.LastPodEventTime.Time.IsZero()).To(BeTrue())
})
It("should succeed when the nodeclaim lastPodEvent when the nodeclaim does not exist", func() {
ExpectApplied(ctx, env.Client, nodePool, node, pod)
ExpectObjectReconciled(ctx, env.Client, podEventsController, pod)
})
It("should set the nodeclaim lastPodEvent when it's been set before", func() {
nodeClaim.Status.LastPodEventTime.Time = fakeClock.Now().Add(-5 * time.Minute)
ExpectApplied(ctx, env.Client, nodePool, node, nodeClaim, pod)
timeToCheck := fakeClock.Now().Truncate(time.Second)
pod.Status.Conditions = []corev1.PodCondition{{
Type: corev1.PodScheduled,
Status: corev1.ConditionTrue,
LastTransitionTime: metav1.Time{Time: timeToCheck},
}}
ExpectApplied(ctx, env.Client, nodePool, node, nodeClaim, pod)
ExpectObjectReconciled(ctx, env.Client, podEventsController, pod)

nodeClaim = ExpectExists(ctx, env.Client, nodeClaim)
Expect(nodeClaim.Status.LastPodEventTime.Time).To(BeEquivalentTo(timeToCheck))
})
It("should only set the nodeclaim lastPodEvent once within the dedupe timeframe", func() {
It("should only set the nodeclaim lastPodEventTime when the event time is after than", func() {
timeToCheck := fakeClock.Now().Truncate(time.Second)
nodeClaim.Status.LastPodEventTime.Time = timeToCheck
scheduledTimeBeforeLastPodEventTime := timeToCheck.Add(-5 * time.Minute)
pod.Status.Conditions = []corev1.PodCondition{{
Type: corev1.PodScheduled,
Status: corev1.ConditionTrue,
LastTransitionTime: metav1.Time{Time: scheduledTimeBeforeLastPodEventTime},
}}
ExpectApplied(ctx, env.Client, nodePool, node, nodeClaim, pod)
ExpectObjectReconciled(ctx, env.Client, podEventsController, pod)

nodeClaim = ExpectExists(ctx, env.Client, nodeClaim)
Expect(nodeClaim.Status.LastPodEventTime.Time).To(BeEquivalentTo(timeToCheck))
})
It("should only set the nodeclaim lastPodEvent within the dedupe timeframe", func() {
timeToCheck := fakeClock.Now().Truncate(time.Second)
pod.Status.Conditions = []corev1.PodCondition{{
Type: corev1.PodScheduled,
Status: corev1.ConditionTrue,
LastTransitionTime: metav1.Time{Time: timeToCheck},
}}
ExpectApplied(ctx, env.Client, nodePool, node, nodeClaim, pod)
ExpectObjectReconciled(ctx, env.Client, podEventsController, pod)

// Expect that the lastPodEventTime is set
nodeClaim = ExpectExists(ctx, env.Client, nodeClaim)
Expect(nodeClaim.Status.LastPodEventTime.Time).To(BeEquivalentTo(timeToCheck))

// step through half of the dedupe timeout, and re-reconcile, expecting the status to not change
// step through half of the dedupe timeout, and re-reconcile,
// expecting the status to not change
fakeClock.Step(5 * time.Second)
ExpectObjectReconciled(ctx, env.Client, podEventsController, pod)

nodeClaim = ExpectExists(ctx, env.Client, nodeClaim)
Expect(nodeClaim.Status.LastPodEventTime.Time).To(BeEquivalentTo(timeToCheck))

// step through rest of the dedupe timeout, and re-reconcile, expecting the status to change
// step through rest of the dedupe timeout, and re-reconcile,
// expecting the status to not change since the podscheduled condition has not changed or terminal or terminating
fakeClock.Step(5 * time.Second)
ExpectObjectReconciled(ctx, env.Client, podEventsController, pod)

nodeClaim = ExpectExists(ctx, env.Client, nodeClaim)
Expect(nodeClaim.Status.LastPodEventTime.Time).ToNot(BeEquivalentTo(timeToCheck))
Expect(nodeClaim.Status.LastPodEventTime.Time).To(BeEquivalentTo(timeToCheck))
})
It("should set the nodeclaim lastPodEvent when pod becomes terminal", func() {
scheduledTime := fakeClock.Now().Truncate(time.Second)
pod.Status.Conditions = []corev1.PodCondition{{
Type: corev1.PodScheduled,
Status: corev1.ConditionTrue,
LastTransitionTime: metav1.Time{Time: scheduledTime},
}}
ExpectApplied(ctx, env.Client, nodePool, node, nodeClaim, pod)
ExpectObjectReconciled(ctx, env.Client, podEventsController, pod)

// Make pod terminal now
timeToCheck := fakeClock.Now().Add(time.Minute).Truncate(time.Second)
fakeClock.SetTime(timeToCheck)
pod.Status.Phase = corev1.PodSucceeded // Setting pod as terminal directly
ExpectApplied(ctx, env.Client, pod)
ExpectObjectReconciled(ctx, env.Client, podEventsController, pod)

nodeClaim = ExpectExists(ctx, env.Client, nodeClaim)
Expect(nodeClaim.Status.LastPodEventTime.Time).To(BeEquivalentTo(timeToCheck))
})
It("should set the nodeclaim lastPodEvent when pod becomes terminating", func() {
ExpectApplied(ctx, env.Client, nodePool, node, nodeClaim, pod)
ExpectObjectReconciled(ctx, env.Client, podEventsController, pod)

timeToCheck := fakeClock.Now().Truncate(time.Second)
fakeClock.SetTime(timeToCheck)
// Make pod terminating by deleting it
ExpectDeletionTimestampSet(ctx, env.Client, pod)
// Reconcile for the terminating pod
ExpectObjectReconciled(ctx, env.Client, podEventsController, pod)
nodeClaim = ExpectExists(ctx, env.Client, nodeClaim)
Expect(nodeClaim.Status.LastPodEventTime.Time).To(BeEquivalentTo(timeToCheck))
})
It("should set the nodeclaim lastPodEvent when pod is already in a terminal state", func() {
// Setup time
timeToCheck := fakeClock.Now().Truncate(time.Second)
fakeClock.SetTime(timeToCheck)

// Set the pod to a terminal state directly - mocks podutils.IsTerminal() return true
pod.Status.Phase = corev1.PodSucceeded

// Apply objects and reconcile
ExpectApplied(ctx, env.Client, nodePool, node, nodeClaim, pod)
ExpectObjectReconciled(ctx, env.Client, podEventsController, pod)

// Verify the last pod event time is set
nodeClaim = ExpectExists(ctx, env.Client, nodeClaim)
Expect(nodeClaim.Status.LastPodEventTime.Time).To(BeEquivalentTo(timeToCheck))
})
})
Loading