@@ -22,10 +22,12 @@ import (
22
22
23
23
"github.com/aws/aws-sdk-go/aws"
24
24
"github.com/samber/lo"
25
+ appsv1 "k8s.io/api/apps/v1"
25
26
v1 "k8s.io/api/core/v1"
26
27
"k8s.io/apimachinery/pkg/api/resource"
27
28
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
28
29
"k8s.io/apimachinery/pkg/labels"
30
+ "sigs.k8s.io/controller-runtime/pkg/client"
29
31
30
32
corev1beta1 "sigs.k8s.io/karpenter/pkg/apis/v1beta1"
31
33
"sigs.k8s.io/karpenter/pkg/test"
@@ -63,6 +65,224 @@ var _ = AfterEach(func() { env.Cleanup() })
63
65
var _ = AfterEach (func () { env .AfterEach () })
64
66
65
67
var _ = Describe ("Consolidation" , func () {
68
+ Context ("Budgets" , func () {
69
+ var nodePool * corev1beta1.NodePool
70
+ var dep * appsv1.Deployment
71
+ var selector labels.Selector
72
+ var numPods int32
73
+ BeforeEach (func () {
74
+ nodePool = env .DefaultNodePool (nodeClass )
75
+ nodePool .Spec .Disruption .ConsolidateAfter = nil
76
+
77
+ numPods = 5
78
+ dep = test .Deployment (test.DeploymentOptions {
79
+ Replicas : numPods ,
80
+ PodOptions : test.PodOptions {
81
+ ObjectMeta : metav1.ObjectMeta {
82
+ Labels : map [string ]string {"app" : "regular-app" },
83
+ },
84
+ ResourceRequirements : v1.ResourceRequirements {
85
+ Requests : v1.ResourceList {v1 .ResourceCPU : resource .MustParse ("1" )},
86
+ },
87
+ },
88
+ })
89
+ selector = labels .SelectorFromSet (dep .Spec .Selector .MatchLabels )
90
+ })
91
+ It ("should respect budgets for empty delete consolidation" , func () {
92
+ nodePool .Spec .Disruption .Budgets = []corev1beta1.Budget {
93
+ {
94
+ Nodes : "40%" ,
95
+ },
96
+ }
97
+
98
+ // Hostname anti-affinity to require one pod on each node
99
+ dep .Spec .Template .Spec .Affinity = & v1.Affinity {
100
+ PodAntiAffinity : & v1.PodAntiAffinity {
101
+ RequiredDuringSchedulingIgnoredDuringExecution : []v1.PodAffinityTerm {
102
+ {
103
+ LabelSelector : dep .Spec .Selector ,
104
+ TopologyKey : v1 .LabelHostname ,
105
+ },
106
+ },
107
+ },
108
+ }
109
+ env .ExpectCreated (nodeClass , nodePool , dep )
110
+
111
+ env .EventuallyExpectCreatedNodeClaimCount ("==" , 5 )
112
+ nodes := env .EventuallyExpectCreatedNodeCount ("==" , 5 )
113
+ env .EventuallyExpectHealthyPodCount (selector , int (numPods ))
114
+
115
+ By ("adding finalizers to the nodes to prevent termination" )
116
+ for _ , node := range nodes {
117
+ Expect (env .Client .Get (env .Context , client .ObjectKeyFromObject (node ), node )).To (Succeed ())
118
+ node .Finalizers = append (node .Finalizers , common .TestingFinalizer )
119
+ env .ExpectUpdated (node )
120
+ }
121
+
122
+ dep .Spec .Replicas = lo.ToPtr [int32 ](1 )
123
+ By ("making the nodes empty" )
124
+ // Update the deployment to only contain 1 replica.
125
+ env .ExpectUpdated (dep )
126
+
127
+ // Ensure that we get two nodes tainted, and they have overlap during the drift
128
+ env .EventuallyExpectTaintedNodeCount ("==" , 2 )
129
+ nodes = env .ConsistentlyExpectTaintedNodeCount ("==" , 2 , time .Second * 5 )
130
+
131
+ // Remove the finalizer from each node so that we can terminate
132
+ for _ , node := range nodes {
133
+ Expect (env .ExpectTestingFinalizerRemoved (node )).To (Succeed ())
134
+ }
135
+
136
+ // After the deletion timestamp is set and all pods are drained
137
+ // the node should be gone
138
+ env .EventuallyExpectNotFound (nodes [0 ], nodes [1 ])
139
+
140
+ // This check ensures that we are consolidating nodes at the same time
141
+ env .EventuallyExpectTaintedNodeCount ("==" , 2 )
142
+ nodes = env .ConsistentlyExpectTaintedNodeCount ("==" , 2 , time .Second * 5 )
143
+
144
+ for _ , node := range nodes {
145
+ Expect (env .ExpectTestingFinalizerRemoved (node )).To (Succeed ())
146
+ }
147
+ env .EventuallyExpectNotFound (nodes [0 ], nodes [1 ])
148
+
149
+ // Expect there to only be one node remaining for the last replica
150
+ env .ExpectNodeCount ("==" , 1 )
151
+ })
152
+ It ("should respect budgets for non-empty delete consolidation" , func () {
153
+ // This test will hold consolidation until we are ready to execute it
154
+ nodePool .Spec .Disruption .ConsolidateAfter = & corev1beta1.NillableDuration {}
155
+
156
+ nodePool = test .ReplaceRequirements (nodePool ,
157
+ v1.NodeSelectorRequirement {
158
+ Key : v1beta1 .LabelInstanceSize ,
159
+ Operator : v1 .NodeSelectorOpIn ,
160
+ Values : []string {"2xlarge" },
161
+ },
162
+ )
163
+ // We're expecting to create 3 nodes, so we'll expect to see at most 2 nodes deleting at one time.
164
+ nodePool .Spec .Disruption .Budgets = []corev1beta1.Budget {{
165
+ Nodes : "50%" ,
166
+ }}
167
+ numPods = 9
168
+ dep = test .Deployment (test.DeploymentOptions {
169
+ Replicas : numPods ,
170
+ PodOptions : test.PodOptions {
171
+ ObjectMeta : metav1.ObjectMeta {
172
+ Labels : map [string ]string {"app" : "large-app" },
173
+ },
174
+ // Each 2xlarge has 8 cpu, so each node should fit no more than 3 pods.
175
+ ResourceRequirements : v1.ResourceRequirements {
176
+ Requests : v1.ResourceList {
177
+ v1 .ResourceCPU : resource .MustParse ("2100m" ),
178
+ },
179
+ },
180
+ },
181
+ })
182
+ selector = labels .SelectorFromSet (dep .Spec .Selector .MatchLabels )
183
+ env .ExpectCreated (nodeClass , nodePool , dep )
184
+
185
+ env .EventuallyExpectCreatedNodeClaimCount ("==" , 3 )
186
+ nodes := env .EventuallyExpectCreatedNodeCount ("==" , 3 )
187
+ env .EventuallyExpectHealthyPodCount (selector , int (numPods ))
188
+
189
+ By ("scaling down the deployment" )
190
+ // Update the deployment to a third of the replicas.
191
+ dep .Spec .Replicas = lo.ToPtr [int32 ](3 )
192
+ env .ExpectUpdated (dep )
193
+
194
+ env .ForcePodsToSpread (nodes ... )
195
+ env .EventuallyExpectHealthyPodCount (selector , 3 )
196
+
197
+ By ("cordoning and adding finalizer to the nodes" )
198
+ // Add a finalizer to each node so that we can stop termination disruptions
199
+ for _ , node := range nodes {
200
+ Expect (env .Client .Get (env .Context , client .ObjectKeyFromObject (node ), node )).To (Succeed ())
201
+ node .Finalizers = append (node .Finalizers , common .TestingFinalizer )
202
+ env .ExpectUpdated (node )
203
+ }
204
+
205
+ By ("enabling consolidation" )
206
+ nodePool .Spec .Disruption .ConsolidateAfter = nil
207
+ env .ExpectUpdated (nodePool )
208
+
209
+ // Ensure that we get two nodes tainted, and they have overlap during the drift
210
+ env .EventuallyExpectTaintedNodeCount ("==" , 2 )
211
+ nodes = env .ConsistentlyExpectTaintedNodeCount ("==" , 2 , time .Second * 5 )
212
+
213
+ for _ , node := range nodes {
214
+ Expect (env .ExpectTestingFinalizerRemoved (node )).To (Succeed ())
215
+ }
216
+ env .EventuallyExpectNotFound (nodes [0 ], nodes [1 ])
217
+ env .ExpectNodeCount ("==" , 1 )
218
+ })
219
+ It ("should not allow consolidation if the budget is fully blocking" , func () {
220
+ // We're going to define a budget that doesn't allow any consolidation to happen
221
+ nodePool .Spec .Disruption .Budgets = []corev1beta1.Budget {{
222
+ Nodes : "0" ,
223
+ }}
224
+
225
+ // Hostname anti-affinity to require one pod on each node
226
+ dep .Spec .Template .Spec .Affinity = & v1.Affinity {
227
+ PodAntiAffinity : & v1.PodAntiAffinity {
228
+ RequiredDuringSchedulingIgnoredDuringExecution : []v1.PodAffinityTerm {
229
+ {
230
+ LabelSelector : dep .Spec .Selector ,
231
+ TopologyKey : v1 .LabelHostname ,
232
+ },
233
+ },
234
+ },
235
+ }
236
+ env .ExpectCreated (nodeClass , nodePool , dep )
237
+
238
+ env .EventuallyExpectCreatedNodeClaimCount ("==" , 5 )
239
+ env .EventuallyExpectCreatedNodeCount ("==" , 5 )
240
+ env .EventuallyExpectHealthyPodCount (selector , int (numPods ))
241
+
242
+ dep .Spec .Replicas = lo.ToPtr [int32 ](1 )
243
+ By ("making the nodes empty" )
244
+ // Update the deployment to only contain 1 replica.
245
+ env .ExpectUpdated (dep )
246
+
247
+ env .ConsistentlyExpectNoDisruptions (5 , time .Minute )
248
+ })
249
+ It ("should not allow consolidation if the budget is fully blocking during a scheduled time" , func () {
250
+ // We're going to define a budget that doesn't allow any drift to happen
251
+ // This is going to be on a schedule that only lasts 30 minutes, whose window starts 15 minutes before
252
+ // the current time and extends 15 minutes past the current time
253
+ // Times need to be in UTC since the karpenter containers were built in UTC time
254
+ windowStart := time .Now ().Add (- time .Minute * 15 ).UTC ()
255
+ nodePool .Spec .Disruption .Budgets = []corev1beta1.Budget {{
256
+ Nodes : "0" ,
257
+ Schedule : lo .ToPtr (fmt .Sprintf ("%d %d * * *" , windowStart .Minute (), windowStart .Hour ())),
258
+ Duration : & metav1.Duration {Duration : time .Minute * 30 },
259
+ }}
260
+
261
+ // Hostname anti-affinity to require one pod on each node
262
+ dep .Spec .Template .Spec .Affinity = & v1.Affinity {
263
+ PodAntiAffinity : & v1.PodAntiAffinity {
264
+ RequiredDuringSchedulingIgnoredDuringExecution : []v1.PodAffinityTerm {
265
+ {
266
+ LabelSelector : dep .Spec .Selector ,
267
+ TopologyKey : v1 .LabelHostname ,
268
+ },
269
+ },
270
+ },
271
+ }
272
+ env .ExpectCreated (nodeClass , nodePool , dep )
273
+
274
+ env .EventuallyExpectCreatedNodeClaimCount ("==" , 5 )
275
+ env .EventuallyExpectCreatedNodeCount ("==" , 5 )
276
+ env .EventuallyExpectHealthyPodCount (selector , int (numPods ))
277
+
278
+ dep .Spec .Replicas = lo.ToPtr [int32 ](1 )
279
+ By ("making the nodes empty" )
280
+ // Update the deployment to only contain 1 replica.
281
+ env .ExpectUpdated (dep )
282
+
283
+ env .ConsistentlyExpectNoDisruptions (5 , time .Minute )
284
+ })
285
+ })
66
286
DescribeTable ("should consolidate nodes (delete)" , Label (debug .NoWatch ), Label (debug .NoEvents ),
67
287
func (spotToSpot bool ) {
68
288
nodePool := test .NodePool (corev1beta1.NodePool {
0 commit comments