Skip to content

Commit 0be1ea7

Browse files
Add testing for consolidation budgets e2e (#5525)
1 parent daeb5da commit 0be1ea7

File tree

5 files changed

+332
-239
lines changed

5 files changed

+332
-239
lines changed

test/pkg/environment/common/expectations.go

Lines changed: 89 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import (
1919
"encoding/base64"
2020
"fmt"
2121
"io"
22+
"math"
2223
"strings"
2324
"time"
2425

@@ -468,25 +469,44 @@ func (env *Environment) ExpectCreatedNodeCount(comparator string, count int) []*
468469
return createdNodes
469470
}
470471

472+
func (env *Environment) ExpectNodeCount(comparator string, count int) {
473+
GinkgoHelper()
474+
475+
nodeList := &v1.NodeList{}
476+
Expect(env.Client.List(env, nodeList, client.HasLabels{test.DiscoveryLabel})).To(Succeed())
477+
Expect(len(nodeList.Items)).To(BeNumerically(comparator, count))
478+
}
479+
480+
func (env *Environment) ExpectNodeClaimCount(comparator string, count int) {
481+
GinkgoHelper()
482+
483+
nodeClaimList := &corev1beta1.NodeClaimList{}
484+
Expect(env.Client.List(env, nodeClaimList, client.HasLabels{test.DiscoveryLabel})).To(Succeed())
485+
Expect(len(nodeClaimList.Items)).To(BeNumerically(comparator, count))
486+
}
487+
471488
func NodeNames(nodes []*v1.Node) []string {
472489
return lo.Map(nodes, func(n *v1.Node, index int) string {
473490
return n.Name
474491
})
475492
}
476493

477-
func (env *Environment) ConsistentlyExpectNodeCount(comparator string, count int, duration string) []*v1.Node {
494+
func (env *Environment) ConsistentlyExpectNodeCount(comparator string, count int, duration time.Duration) []*v1.Node {
478495
GinkgoHelper()
479496
By(fmt.Sprintf("expecting nodes to be %s to %d for %s", comparator, count, duration))
480497
nodeList := &v1.NodeList{}
481498
Consistently(func(g Gomega) {
482499
g.Expect(env.Client.List(env, nodeList, client.HasLabels{test.DiscoveryLabel})).To(Succeed())
483500
g.Expect(len(nodeList.Items)).To(BeNumerically(comparator, count),
484501
fmt.Sprintf("expected %d nodes, had %d (%v) for %s", count, len(nodeList.Items), NodeNames(lo.ToSlicePtr(nodeList.Items)), duration))
485-
}, duration).Should(Succeed())
502+
}, duration.String()).Should(Succeed())
486503
return lo.ToSlicePtr(nodeList.Items)
487504
}
488505

489-
func (env *Environment) ConsistentlyExpectNoDisruptions(nodeCount int, duration string) {
506+
// ConsistentlyExpectNoDisruptions ensures that the state of the cluster is not changed within a passed duration
507+
// Specifically, we check if the cluster size in terms of nodes is the same as the passed-in size and we validate
508+
// that no disrupting taints are added throughout the window
509+
func (env *Environment) ConsistentlyExpectNoDisruptions(nodeCount int, duration time.Duration) {
490510
GinkgoHelper()
491511
Consistently(func(g Gomega) {
492512
// Ensure we don't change our NodeClaims
@@ -504,7 +524,20 @@ func (env *Environment) ConsistentlyExpectNoDisruptions(nodeCount int, duration
504524
})
505525
g.Expect(ok).To(BeFalse())
506526
}
507-
}, duration).Should(Succeed())
527+
}, duration.String()).Should(Succeed())
528+
}
529+
530+
func (env *Environment) ConsistentlyExpectTaintedNodeCount(comparator string, count int, duration time.Duration) []*v1.Node {
531+
GinkgoHelper()
532+
533+
By(fmt.Sprintf("checking for tainted nodes to be %s to %d for %s", comparator, count, duration))
534+
nodeList := &v1.NodeList{}
535+
Consistently(func(g Gomega) {
536+
g.Expect(env.Client.List(env, nodeList, client.MatchingFields{"spec.taints[*].karpenter.sh/disruption": "disrupting"})).To(Succeed())
537+
g.Expect(len(nodeList.Items)).To(BeNumerically(comparator, count),
538+
fmt.Sprintf("expected %d tainted nodes, had %d (%v)", count, len(nodeList.Items), NodeNames(lo.ToSlicePtr(nodeList.Items))))
539+
}, duration.String()).Should(Succeed())
540+
return lo.ToSlicePtr(nodeList.Items)
508541
}
509542

510543
func (env *Environment) EventuallyExpectTaintedNodeCount(comparator string, count int) []*v1.Node {
@@ -751,17 +784,63 @@ func (env *Environment) ExpectDaemonSetEnvironmentVariableUpdated(obj client.Obj
751784
Expect(env.Client.Patch(env.Context, ds, patch)).To(Succeed())
752785
}
753786

754-
func (env *Environment) ExpectHealthyPodsForNode(nodeName string) []*v1.Pod {
787+
// ForcePodsToSpread ensures that currently scheduled pods get spread evenly across all passed nodes by deleting pods off of existing
788+
// nodes and waiting them to reschedule. This is useful for scenarios where you want to force the nodes be underutilized
789+
// but you want to keep a consistent count of nodes rather than leaving around empty ones.
790+
func (env *Environment) ForcePodsToSpread(nodes ...*v1.Node) {
791+
GinkgoHelper()
792+
793+
// Get the total count of pods across
794+
podCount := 0
795+
for _, n := range nodes {
796+
podCount += len(env.ExpectActivePodsForNode(n.Name))
797+
}
798+
maxPodsPerNode := int(math.Ceil(float64(podCount) / float64(len(nodes))))
799+
800+
By(fmt.Sprintf("forcing %d pods to spread across %d nodes", podCount, len(nodes)))
801+
start := time.Now()
802+
for {
803+
var nodePods []*v1.Pod
804+
node, found := lo.Find(nodes, func(n *v1.Node) bool {
805+
nodePods = env.ExpectActivePodsForNode(n.Name)
806+
return len(nodePods) > maxPodsPerNode
807+
})
808+
if !found {
809+
break
810+
}
811+
// Set the nodes to unschedulable so that the pods won't reschedule.
812+
Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(node), node)).To(Succeed())
813+
stored := node.DeepCopy()
814+
node.Spec.Unschedulable = true
815+
Expect(env.Client.Patch(env.Context, node, client.MergeFrom(stored))).To(Succeed())
816+
for _, pod := range nodePods[maxPodsPerNode:] {
817+
env.ExpectDeleted(pod)
818+
}
819+
Eventually(func(g Gomega) {
820+
g.Expect(len(env.ExpectActivePodsForNode(node.Name))).To(Or(Equal(maxPodsPerNode), Equal(maxPodsPerNode-1)))
821+
}).WithTimeout(5 * time.Second).Should(Succeed())
822+
823+
// TODO: Consider moving this time check to an Eventually poll. This gets a little tricker with helper functions
824+
// since you need to make sure that your Expectation helper functions are scoped to to your "g Gomega" scope
825+
// so that you don't fail the first time you get a failure on your expectation
826+
if time.Since(start) > time.Minute*15 {
827+
Fail("forcing pods to spread failed due to a timeout")
828+
}
829+
}
830+
for _, n := range nodes {
831+
stored := n.DeepCopy()
832+
n.Spec.Unschedulable = false
833+
Expect(env.Client.Patch(env.Context, n, client.MergeFrom(stored))).To(Succeed())
834+
}
835+
}
836+
837+
func (env *Environment) ExpectActivePodsForNode(nodeName string) []*v1.Pod {
755838
GinkgoHelper()
756839
podList := &v1.PodList{}
757840
Expect(env.Client.List(env, podList, client.MatchingFields{"spec.nodeName": nodeName}, client.HasLabels{test.DiscoveryLabel})).To(Succeed())
758841

759-
// Return the healthy pods
760842
return lo.Filter(lo.ToSlicePtr(podList.Items), func(p *v1.Pod, _ int) bool {
761-
_, found := lo.Find(p.Status.Conditions, func(cond v1.PodCondition) bool {
762-
return cond.Type == v1.PodReady && cond.Status == v1.ConditionTrue
763-
})
764-
return found
843+
return p.DeletionTimestamp.IsZero()
765844
})
766845
}
767846

test/suites/consolidation/suite_test.go

Lines changed: 220 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,12 @@ import (
2222

2323
"github.com/aws/aws-sdk-go/aws"
2424
"github.com/samber/lo"
25+
appsv1 "k8s.io/api/apps/v1"
2526
v1 "k8s.io/api/core/v1"
2627
"k8s.io/apimachinery/pkg/api/resource"
2728
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2829
"k8s.io/apimachinery/pkg/labels"
30+
"sigs.k8s.io/controller-runtime/pkg/client"
2931

3032
corev1beta1 "sigs.k8s.io/karpenter/pkg/apis/v1beta1"
3133
"sigs.k8s.io/karpenter/pkg/test"
@@ -63,6 +65,224 @@ var _ = AfterEach(func() { env.Cleanup() })
6365
var _ = AfterEach(func() { env.AfterEach() })
6466

6567
var _ = Describe("Consolidation", func() {
68+
Context("Budgets", func() {
69+
var nodePool *corev1beta1.NodePool
70+
var dep *appsv1.Deployment
71+
var selector labels.Selector
72+
var numPods int32
73+
BeforeEach(func() {
74+
nodePool = env.DefaultNodePool(nodeClass)
75+
nodePool.Spec.Disruption.ConsolidateAfter = nil
76+
77+
numPods = 5
78+
dep = test.Deployment(test.DeploymentOptions{
79+
Replicas: numPods,
80+
PodOptions: test.PodOptions{
81+
ObjectMeta: metav1.ObjectMeta{
82+
Labels: map[string]string{"app": "regular-app"},
83+
},
84+
ResourceRequirements: v1.ResourceRequirements{
85+
Requests: v1.ResourceList{v1.ResourceCPU: resource.MustParse("1")},
86+
},
87+
},
88+
})
89+
selector = labels.SelectorFromSet(dep.Spec.Selector.MatchLabels)
90+
})
91+
It("should respect budgets for empty delete consolidation", func() {
92+
nodePool.Spec.Disruption.Budgets = []corev1beta1.Budget{
93+
{
94+
Nodes: "40%",
95+
},
96+
}
97+
98+
// Hostname anti-affinity to require one pod on each node
99+
dep.Spec.Template.Spec.Affinity = &v1.Affinity{
100+
PodAntiAffinity: &v1.PodAntiAffinity{
101+
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
102+
{
103+
LabelSelector: dep.Spec.Selector,
104+
TopologyKey: v1.LabelHostname,
105+
},
106+
},
107+
},
108+
}
109+
env.ExpectCreated(nodeClass, nodePool, dep)
110+
111+
env.EventuallyExpectCreatedNodeClaimCount("==", 5)
112+
nodes := env.EventuallyExpectCreatedNodeCount("==", 5)
113+
env.EventuallyExpectHealthyPodCount(selector, int(numPods))
114+
115+
By("adding finalizers to the nodes to prevent termination")
116+
for _, node := range nodes {
117+
Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(node), node)).To(Succeed())
118+
node.Finalizers = append(node.Finalizers, common.TestingFinalizer)
119+
env.ExpectUpdated(node)
120+
}
121+
122+
dep.Spec.Replicas = lo.ToPtr[int32](1)
123+
By("making the nodes empty")
124+
// Update the deployment to only contain 1 replica.
125+
env.ExpectUpdated(dep)
126+
127+
// Ensure that we get two nodes tainted, and they have overlap during the drift
128+
env.EventuallyExpectTaintedNodeCount("==", 2)
129+
nodes = env.ConsistentlyExpectTaintedNodeCount("==", 2, time.Second*5)
130+
131+
// Remove the finalizer from each node so that we can terminate
132+
for _, node := range nodes {
133+
Expect(env.ExpectTestingFinalizerRemoved(node)).To(Succeed())
134+
}
135+
136+
// After the deletion timestamp is set and all pods are drained
137+
// the node should be gone
138+
env.EventuallyExpectNotFound(nodes[0], nodes[1])
139+
140+
// This check ensures that we are consolidating nodes at the same time
141+
env.EventuallyExpectTaintedNodeCount("==", 2)
142+
nodes = env.ConsistentlyExpectTaintedNodeCount("==", 2, time.Second*5)
143+
144+
for _, node := range nodes {
145+
Expect(env.ExpectTestingFinalizerRemoved(node)).To(Succeed())
146+
}
147+
env.EventuallyExpectNotFound(nodes[0], nodes[1])
148+
149+
// Expect there to only be one node remaining for the last replica
150+
env.ExpectNodeCount("==", 1)
151+
})
152+
It("should respect budgets for non-empty delete consolidation", func() {
153+
// This test will hold consolidation until we are ready to execute it
154+
nodePool.Spec.Disruption.ConsolidateAfter = &corev1beta1.NillableDuration{}
155+
156+
nodePool = test.ReplaceRequirements(nodePool,
157+
v1.NodeSelectorRequirement{
158+
Key: v1beta1.LabelInstanceSize,
159+
Operator: v1.NodeSelectorOpIn,
160+
Values: []string{"2xlarge"},
161+
},
162+
)
163+
// We're expecting to create 3 nodes, so we'll expect to see at most 2 nodes deleting at one time.
164+
nodePool.Spec.Disruption.Budgets = []corev1beta1.Budget{{
165+
Nodes: "50%",
166+
}}
167+
numPods = 9
168+
dep = test.Deployment(test.DeploymentOptions{
169+
Replicas: numPods,
170+
PodOptions: test.PodOptions{
171+
ObjectMeta: metav1.ObjectMeta{
172+
Labels: map[string]string{"app": "large-app"},
173+
},
174+
// Each 2xlarge has 8 cpu, so each node should fit no more than 3 pods.
175+
ResourceRequirements: v1.ResourceRequirements{
176+
Requests: v1.ResourceList{
177+
v1.ResourceCPU: resource.MustParse("2100m"),
178+
},
179+
},
180+
},
181+
})
182+
selector = labels.SelectorFromSet(dep.Spec.Selector.MatchLabels)
183+
env.ExpectCreated(nodeClass, nodePool, dep)
184+
185+
env.EventuallyExpectCreatedNodeClaimCount("==", 3)
186+
nodes := env.EventuallyExpectCreatedNodeCount("==", 3)
187+
env.EventuallyExpectHealthyPodCount(selector, int(numPods))
188+
189+
By("scaling down the deployment")
190+
// Update the deployment to a third of the replicas.
191+
dep.Spec.Replicas = lo.ToPtr[int32](3)
192+
env.ExpectUpdated(dep)
193+
194+
env.ForcePodsToSpread(nodes...)
195+
env.EventuallyExpectHealthyPodCount(selector, 3)
196+
197+
By("cordoning and adding finalizer to the nodes")
198+
// Add a finalizer to each node so that we can stop termination disruptions
199+
for _, node := range nodes {
200+
Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(node), node)).To(Succeed())
201+
node.Finalizers = append(node.Finalizers, common.TestingFinalizer)
202+
env.ExpectUpdated(node)
203+
}
204+
205+
By("enabling consolidation")
206+
nodePool.Spec.Disruption.ConsolidateAfter = nil
207+
env.ExpectUpdated(nodePool)
208+
209+
// Ensure that we get two nodes tainted, and they have overlap during the drift
210+
env.EventuallyExpectTaintedNodeCount("==", 2)
211+
nodes = env.ConsistentlyExpectTaintedNodeCount("==", 2, time.Second*5)
212+
213+
for _, node := range nodes {
214+
Expect(env.ExpectTestingFinalizerRemoved(node)).To(Succeed())
215+
}
216+
env.EventuallyExpectNotFound(nodes[0], nodes[1])
217+
env.ExpectNodeCount("==", 1)
218+
})
219+
It("should not allow consolidation if the budget is fully blocking", func() {
220+
// We're going to define a budget that doesn't allow any consolidation to happen
221+
nodePool.Spec.Disruption.Budgets = []corev1beta1.Budget{{
222+
Nodes: "0",
223+
}}
224+
225+
// Hostname anti-affinity to require one pod on each node
226+
dep.Spec.Template.Spec.Affinity = &v1.Affinity{
227+
PodAntiAffinity: &v1.PodAntiAffinity{
228+
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
229+
{
230+
LabelSelector: dep.Spec.Selector,
231+
TopologyKey: v1.LabelHostname,
232+
},
233+
},
234+
},
235+
}
236+
env.ExpectCreated(nodeClass, nodePool, dep)
237+
238+
env.EventuallyExpectCreatedNodeClaimCount("==", 5)
239+
env.EventuallyExpectCreatedNodeCount("==", 5)
240+
env.EventuallyExpectHealthyPodCount(selector, int(numPods))
241+
242+
dep.Spec.Replicas = lo.ToPtr[int32](1)
243+
By("making the nodes empty")
244+
// Update the deployment to only contain 1 replica.
245+
env.ExpectUpdated(dep)
246+
247+
env.ConsistentlyExpectNoDisruptions(5, time.Minute)
248+
})
249+
It("should not allow consolidation if the budget is fully blocking during a scheduled time", func() {
250+
// We're going to define a budget that doesn't allow any drift to happen
251+
// This is going to be on a schedule that only lasts 30 minutes, whose window starts 15 minutes before
252+
// the current time and extends 15 minutes past the current time
253+
// Times need to be in UTC since the karpenter containers were built in UTC time
254+
windowStart := time.Now().Add(-time.Minute * 15).UTC()
255+
nodePool.Spec.Disruption.Budgets = []corev1beta1.Budget{{
256+
Nodes: "0",
257+
Schedule: lo.ToPtr(fmt.Sprintf("%d %d * * *", windowStart.Minute(), windowStart.Hour())),
258+
Duration: &metav1.Duration{Duration: time.Minute * 30},
259+
}}
260+
261+
// Hostname anti-affinity to require one pod on each node
262+
dep.Spec.Template.Spec.Affinity = &v1.Affinity{
263+
PodAntiAffinity: &v1.PodAntiAffinity{
264+
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
265+
{
266+
LabelSelector: dep.Spec.Selector,
267+
TopologyKey: v1.LabelHostname,
268+
},
269+
},
270+
},
271+
}
272+
env.ExpectCreated(nodeClass, nodePool, dep)
273+
274+
env.EventuallyExpectCreatedNodeClaimCount("==", 5)
275+
env.EventuallyExpectCreatedNodeCount("==", 5)
276+
env.EventuallyExpectHealthyPodCount(selector, int(numPods))
277+
278+
dep.Spec.Replicas = lo.ToPtr[int32](1)
279+
By("making the nodes empty")
280+
// Update the deployment to only contain 1 replica.
281+
env.ExpectUpdated(dep)
282+
283+
env.ConsistentlyExpectNoDisruptions(5, time.Minute)
284+
})
285+
})
66286
DescribeTable("should consolidate nodes (delete)", Label(debug.NoWatch), Label(debug.NoEvents),
67287
func(spotToSpot bool) {
68288
nodePool := test.NodePool(corev1beta1.NodePool{

0 commit comments

Comments
 (0)