Skip to content

Commit ddeaccd

Browse files
committed
feat(nodeclaim): use PodScheduled's lastTransitionTime for lastPodEventTime when pod is bound
Signed-off-by: flavono123 <[email protected]>
1 parent cfc3153 commit ddeaccd

File tree

4 files changed

+120
-39
lines changed

4 files changed

+120
-39
lines changed

pkg/controllers/nodeclaim/disruption/consolidation.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ func (c *Consolidation) Reconcile(ctx context.Context, nodePool *v1.NodePool, no
6060
timeToCheck := lo.Ternary(!nodeClaim.Status.LastPodEventTime.IsZero(), nodeClaim.Status.LastPodEventTime.Time, initialized.LastTransitionTime.Time)
6161

6262
// Consider a node consolidatable by looking at the lastPodEvent status field on the nodeclaim.
63+
// This time is now based on the PodScheduled condition's lastTransitionTime or pod is being removed(terminal or terminating)
6364
if c.clock.Since(timeToCheck) < lo.FromPtr(nodePool.Spec.Disruption.ConsolidateAfter.Duration) {
6465
if hasConsolidatableCondition {
6566
_ = nodeClaim.StatusConditions().Clear(v1.ConditionTypeConsolidatable)

pkg/controllers/nodeclaim/hydration/suite_test.go

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,12 @@ import (
3535
. "sigs.k8s.io/karpenter/pkg/utils/testing"
3636
)
3737

38-
var ctx context.Context
39-
var hydrationController *hydration.Controller
40-
var env *test.Environment
41-
var cloudProvider *fake.CloudProvider
38+
var (
39+
ctx context.Context
40+
hydrationController *hydration.Controller
41+
env *test.Environment
42+
cloudProvider *fake.CloudProvider
43+
)
4244

4345
func TestAPIs(t *testing.T) {
4446
ctx = TestContextWithLogger(t)

pkg/controllers/nodeclaim/podevents/controller.go

Lines changed: 33 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -32,18 +32,15 @@ import (
3232
"sigs.k8s.io/controller-runtime/pkg/predicate"
3333
"sigs.k8s.io/controller-runtime/pkg/reconcile"
3434

35+
"github.com/samber/lo"
36+
3537
"sigs.k8s.io/karpenter/pkg/cloudprovider"
3638
nodeutils "sigs.k8s.io/karpenter/pkg/utils/node"
3739
nodeclaimutils "sigs.k8s.io/karpenter/pkg/utils/nodeclaim"
3840
podutils "sigs.k8s.io/karpenter/pkg/utils/pod"
3941
)
4042

41-
// dedupeTimeout is 10 seconds to reduce the number of writes to the APIServer, since pod scheduling and deletion events are very frequent.
42-
// The smaller this value is, the more writes Karpenter will make in a busy cluster. This timeout is intentionally smaller than the consolidation
43-
// 15 second validation period, so that we can ensure that we invalidate consolidation commands that are decided while we're de-duping pod events.
44-
const dedupeTimeout = 10 * time.Second
45-
46-
// Podevents is a nodeclaim controller that deletes adds the lastPodEvent status onto the nodeclaim
43+
// Podevents is a nodeclaim controller that updates the lastPodEvent status based on PodScheduled condition
4744
type Controller struct {
4845
clock clock.Clock
4946
kubeClient client.Client
@@ -81,14 +78,28 @@ func (c *Controller) Reconcile(ctx context.Context, pod *corev1.Pod) (reconcile.
8178
return reconcile.Result{}, nil
8279
}
8380

84-
// If we've set the lastPodEvent before and it hasn't been before the timeout, don't do anything
85-
if !nc.Status.LastPodEventTime.Time.IsZero() && c.clock.Since(nc.Status.LastPodEventTime.Time) < dedupeTimeout {
81+
var eventTime time.Time
82+
// If pod is being removed (terminal or terminating), use current time
83+
if podutils.IsTerminal(pod) || podutils.IsTerminating(pod) {
84+
eventTime = c.clock.Now()
85+
} else {
86+
// Otherwise check for PodScheduled condition
87+
if cond, ok := lo.Find(pod.Status.Conditions, func(c corev1.PodCondition) bool {
88+
return c.Type == corev1.PodScheduled && c.Status == corev1.ConditionTrue
89+
}); ok {
90+
eventTime = cond.LastTransitionTime.Time
91+
}
92+
}
93+
94+
// If we don't have a valid time, skip
95+
if eventTime.IsZero() {
8696
return reconcile.Result{}, nil
8797
}
8898

8999
// otherwise, set the pod event time to now
90100
stored := nc.DeepCopy()
91-
nc.Status.LastPodEventTime.Time = c.clock.Now()
101+
102+
nc.Status.LastPodEventTime.Time = eventTime
92103
if !equality.Semantic.DeepEqual(stored, nc) {
93104
if err = c.kubeClient.Status().Patch(ctx, nc, client.MergeFrom(stored)); err != nil {
94105
return reconcile.Result{}, client.IgnoreNotFound(err)
@@ -102,18 +113,26 @@ func (c *Controller) Register(ctx context.Context, m manager.Manager) error {
102113
Named("nodeclaim.podevents").
103114
For(&corev1.Pod{}).
104115
WithEventFilter(predicate.TypedFuncs[client.Object]{
105-
// If a pod is bound to a node or goes terminal
106116
UpdateFunc: func(e event.TypedUpdateEvent[client.Object]) bool {
107117
oldPod := (e.ObjectOld).(*corev1.Pod)
108118
newPod := (e.ObjectNew).(*corev1.Pod)
109-
// if this is a newly bound pod
110-
bound := oldPod.Spec.NodeName == "" && newPod.Spec.NodeName != ""
119+
// Check for pod scheduling changes
120+
oldCond, oldOk := lo.Find(oldPod.Status.Conditions, func(c corev1.PodCondition) bool {
121+
return c.Type == corev1.PodScheduled
122+
})
123+
newCond, newOk := lo.Find(newPod.Status.Conditions, func(c corev1.PodCondition) bool {
124+
return c.Type == corev1.PodScheduled
125+
})
126+
// Trigger on PodScheduled condition changes
127+
if (!oldOk && newOk) || (oldOk && newOk && (oldCond.Status != newCond.Status || !oldCond.LastTransitionTime.Equal(&newCond.LastTransitionTime))) {
128+
return true
129+
}
111130
// if this is a newly terminal pod
112131
terminal := (newPod.Spec.NodeName != "" && !podutils.IsTerminal(oldPod) && podutils.IsTerminal(newPod))
113132
// if this is a newly terminating pod
114133
terminating := (newPod.Spec.NodeName != "" && !podutils.IsTerminating(oldPod) && podutils.IsTerminating(newPod))
115-
// return true if it was bound to a node, went terminal, or went terminating
116-
return bound || terminal || terminating
134+
// return true if it went terminal, or went terminating
135+
return terminal || terminating
117136
},
118137
}).
119138
Complete(reconcile.AsReconciler(m.GetClient(), c))

pkg/controllers/nodeclaim/podevents/suite_test.go

Lines changed: 80 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -38,11 +38,13 @@ import (
3838
. "sigs.k8s.io/karpenter/pkg/utils/testing"
3939
)
4040

41-
var ctx context.Context
42-
var podEventsController *podevents.Controller
43-
var env *test.Environment
44-
var fakeClock *clock.FakeClock
45-
var cp *fake.CloudProvider
41+
var (
42+
ctx context.Context
43+
podEventsController *podevents.Controller
44+
env *test.Environment
45+
fakeClock *clock.FakeClock
46+
cp *fake.CloudProvider
47+
)
4648

4749
func TestAPIs(t *testing.T) {
4850
ctx = TestContextWithLogger(t)
@@ -75,6 +77,7 @@ var _ = AfterEach(func() {
7577
cp.Reset()
7678
ExpectCleanedUp(ctx, env.Client)
7779
})
80+
7881
var _ = Describe("PodEvents", func() {
7982
var nodePool *v1.NodePool
8083
var nodeClaim *v1.NodeClaim
@@ -98,13 +101,31 @@ var _ = Describe("PodEvents", func() {
98101
NodeName: node.Name,
99102
})
100103
})
101-
It("should set the nodeclaim lastPodEvent", func() {
102-
ExpectApplied(ctx, env.Client, nodePool, nodeClaim, node, pod)
103-
timeToCheck := fakeClock.Now().Truncate(time.Second)
104+
It("should set the nodeclaim lastPodEvent based on PodScheduled condition", func() {
105+
scheduledTime := fakeClock.Now().Truncate(time.Second)
106+
pod.Status.Conditions = []corev1.PodCondition{{
107+
Type: corev1.PodScheduled,
108+
Status: corev1.ConditionTrue,
109+
LastTransitionTime: metav1.Time{Time: scheduledTime},
110+
}}
111+
ExpectApplied(ctx, env.Client, nodePool, node, nodeClaim, pod)
104112
ExpectObjectReconciled(ctx, env.Client, podEventsController, pod)
105113

106114
nodeClaim = ExpectExists(ctx, env.Client, nodeClaim)
107-
Expect(nodeClaim.Status.LastPodEventTime.Time).To(BeEquivalentTo(timeToCheck))
115+
Expect(nodeClaim.Status.LastPodEventTime.Time).To(BeEquivalentTo(scheduledTime))
116+
117+
// Update the PodScheduled condition's lastTransitionTime
118+
newScheduledTime := fakeClock.Now().Add(time.Minute).Truncate(time.Second)
119+
pod.Status.Conditions = []corev1.PodCondition{{
120+
Type: corev1.PodScheduled,
121+
Status: corev1.ConditionTrue,
122+
LastTransitionTime: metav1.Time{Time: newScheduledTime},
123+
}}
124+
ExpectApplied(ctx, env.Client, pod)
125+
ExpectObjectReconciled(ctx, env.Client, podEventsController, pod)
126+
127+
nodeClaim = ExpectExists(ctx, env.Client, nodeClaim)
128+
Expect(nodeClaim.Status.LastPodEventTime.Time).To(BeEquivalentTo(newScheduledTime))
108129
})
109130
It("should not set the nodeclaim lastPodEvent when the node does not exist", func() {
110131
ExpectApplied(ctx, env.Client, nodePool, nodeClaim, pod)
@@ -117,36 +138,74 @@ var _ = Describe("PodEvents", func() {
117138
ExpectApplied(ctx, env.Client, nodePool, node, pod)
118139
ExpectObjectReconciled(ctx, env.Client, podEventsController, pod)
119140
})
120-
It("should set the nodeclaim lastPodEvent when it's been set before", func() {
121-
nodeClaim.Status.LastPodEventTime.Time = fakeClock.Now().Add(-5 * time.Minute)
141+
It("should set the nodeclaim lastPodEvent when pod becomes terminal", func() {
142+
// First set up a regular pod
143+
scheduledTime := fakeClock.Now().Truncate(time.Second)
144+
pod.Status.Conditions = []corev1.PodCondition{{
145+
Type: corev1.PodScheduled,
146+
Status: corev1.ConditionTrue,
147+
LastTransitionTime: metav1.Time{Time: scheduledTime},
148+
}}
122149
ExpectApplied(ctx, env.Client, nodePool, node, nodeClaim, pod)
123-
timeToCheck := fakeClock.Now().Truncate(time.Second)
150+
ExpectObjectReconciled(ctx, env.Client, podEventsController, pod)
151+
152+
// Make pod terminal now
153+
timeToCheck := fakeClock.Now().Add(time.Minute).Truncate(time.Second)
154+
fakeClock.SetTime(timeToCheck)
155+
pod.Status.Phase = corev1.PodSucceeded // Setting pod as terminal directly
156+
ExpectApplied(ctx, env.Client, pod)
124157
ExpectObjectReconciled(ctx, env.Client, podEventsController, pod)
125158

126159
nodeClaim = ExpectExists(ctx, env.Client, nodeClaim)
127160
Expect(nodeClaim.Status.LastPodEventTime.Time).To(BeEquivalentTo(timeToCheck))
128161
})
129-
It("should only set the nodeclaim lastPodEvent once within the dedupe timeframe", func() {
162+
It("should set the nodeclaim lastPodEvent when pod becomes terminating", func() {
130163
ExpectApplied(ctx, env.Client, nodePool, node, nodeClaim, pod)
131-
timeToCheck := fakeClock.Now().Truncate(time.Second)
132164
ExpectObjectReconciled(ctx, env.Client, podEventsController, pod)
133165

134-
// Expect that the lastPodEventTime is set
166+
timeToCheck := fakeClock.Now().Truncate(time.Second)
167+
fakeClock.SetTime(timeToCheck)
168+
// Make pod terminating by deleting it
169+
ExpectDeletionTimestampSet(ctx, env.Client, pod)
170+
// Reconcile for the terminating pod
171+
ExpectObjectReconciled(ctx, env.Client, podEventsController, pod)
135172
nodeClaim = ExpectExists(ctx, env.Client, nodeClaim)
136173
Expect(nodeClaim.Status.LastPodEventTime.Time).To(BeEquivalentTo(timeToCheck))
174+
})
175+
It("should not update lastPodEvent when pod has no PodScheduled condition", func() {
176+
// Pod with no conditions
177+
ExpectApplied(ctx, env.Client, nodePool, node, nodeClaim, pod)
178+
ExpectObjectReconciled(ctx, env.Client, podEventsController, pod)
137179

138-
// step through half of the dedupe timeout, and re-reconcile, expecting the status to not change
139-
fakeClock.Step(5 * time.Second)
180+
nodeClaim = ExpectExists(ctx, env.Client, nodeClaim)
181+
Expect(nodeClaim.Status.LastPodEventTime.Time.IsZero()).To(BeTrue())
182+
})
183+
It("should not update lastPodEvent when PodScheduled condition is not True", func() {
184+
pod.Status.Conditions = []corev1.PodCondition{{
185+
Type: corev1.PodScheduled,
186+
Status: corev1.ConditionFalse,
187+
LastTransitionTime: metav1.Time{Time: fakeClock.Now()},
188+
}}
189+
ExpectApplied(ctx, env.Client, nodePool, node, nodeClaim, pod)
140190
ExpectObjectReconciled(ctx, env.Client, podEventsController, pod)
141191

142192
nodeClaim = ExpectExists(ctx, env.Client, nodeClaim)
143-
Expect(nodeClaim.Status.LastPodEventTime.Time).To(BeEquivalentTo(timeToCheck))
193+
Expect(nodeClaim.Status.LastPodEventTime.Time.IsZero()).To(BeTrue())
194+
})
195+
It("should set the nodeclaim lastPodEvent when pod is already in a terminal state", func() {
196+
// Setup time
197+
timeToCheck := fakeClock.Now().Truncate(time.Second)
198+
fakeClock.SetTime(timeToCheck)
144199

145-
// step through rest of the dedupe timeout, and re-reconcile, expecting the status to change
146-
fakeClock.Step(5 * time.Second)
200+
// Set the pod to a terminal state directly - mocks podutils.IsTerminal() return true
201+
pod.Status.Phase = corev1.PodSucceeded
202+
203+
// Apply objects and reconcile
204+
ExpectApplied(ctx, env.Client, nodePool, node, nodeClaim, pod)
147205
ExpectObjectReconciled(ctx, env.Client, podEventsController, pod)
148206

207+
// Verify the last pod event time is set
149208
nodeClaim = ExpectExists(ctx, env.Client, nodeClaim)
150-
Expect(nodeClaim.Status.LastPodEventTime.Time).ToNot(BeEquivalentTo(timeToCheck))
209+
Expect(nodeClaim.Status.LastPodEventTime.Time).To(BeEquivalentTo(timeToCheck))
151210
})
152211
})

0 commit comments

Comments
 (0)