@@ -28,7 +28,8 @@ import (
28
28
"sigs.k8s.io/karpenter/pkg/controllers/provisioning/scheduling"
29
29
)
30
30
31
- const SingleNodeConsolidationTimeoutDuration = 3 * time .Minute
31
+ var SingleNodeConsolidationTimeoutDuration = 3 * time .Minute
32
+
32
33
const SingleNodeConsolidationType = "single"
33
34
34
35
// SingleNodeConsolidation is the consolidation controller that performs single-node consolidation.
@@ -45,85 +46,6 @@ func NewSingleNodeConsolidation(consolidation consolidation) *SingleNodeConsolid
45
46
}
46
47
}
47
48
48
- func (s * SingleNodeConsolidation ) groupCandidatesByNodePool (candidates []* Candidate ) (map [string ][]* Candidate , []string ) {
49
- nodePoolCandidates := make (map [string ][]* Candidate )
50
- nodePoolNames := []string {}
51
-
52
- for _ , candidate := range candidates {
53
- nodePoolName := candidate .nodePool .Name
54
- if _ , exists := nodePoolCandidates [nodePoolName ]; ! exists {
55
- nodePoolNames = append (nodePoolNames , nodePoolName )
56
- }
57
- nodePoolCandidates [nodePoolName ] = append (nodePoolCandidates [nodePoolName ], candidate )
58
- }
59
- return nodePoolCandidates , nodePoolNames
60
- }
61
-
62
- func (s * SingleNodeConsolidation ) sortNodePoolsByTimeout (ctx context.Context , nodePoolNames []string ) {
63
- // Log the timed out nodepools that we're prioritizing
64
- timedOutNodePools := []string {}
65
- for np := range s .nodePoolsTimedOut {
66
- timedOutNodePools = append (timedOutNodePools , np )
67
- }
68
- if len (timedOutNodePools ) > 0 {
69
- log .FromContext (ctx ).V (1 ).Info ("prioritizing nodepools that have not yet been considered due to timeouts in previous runs: %v" , timedOutNodePools )
70
- }
71
-
72
- // Prioritize nodepools that timed out in previous runs
73
- sort .Slice (nodePoolNames , func (i , j int ) bool {
74
- // If nodepool i timed out but j didn't, i comes first
75
- if s.nodePoolsTimedOut [nodePoolNames [i ]] && ! s.nodePoolsTimedOut [nodePoolNames [j ]] {
76
- return true
77
- }
78
- // If nodepool j timed out but i didn't, j comes first
79
- if ! s.nodePoolsTimedOut [nodePoolNames [i ]] && s.nodePoolsTimedOut [nodePoolNames [j ]] {
80
- return false
81
- }
82
- // If both or neither timed out, keep original order
83
- return i < j
84
- })
85
- }
86
-
87
- func (s * SingleNodeConsolidation ) shuffleCandidates (nodePoolCandidates map [string ][]* Candidate , nodePoolNames []string ) []* Candidate {
88
- result := make ([]* Candidate , 0 )
89
- maxCandidatesPerNodePool := 0
90
-
91
- // Find the maximum number of candidates in any nodepool
92
- for _ , nodePoolName := range nodePoolNames {
93
- if len (nodePoolCandidates [nodePoolName ]) > maxCandidatesPerNodePool {
94
- maxCandidatesPerNodePool = len (nodePoolCandidates [nodePoolName ])
95
- }
96
- }
97
-
98
- // Interweave candidates from different nodepools
99
- for i := range maxCandidatesPerNodePool {
100
- for _ , nodePoolName := range nodePoolNames {
101
- if i < len (nodePoolCandidates [nodePoolName ]) {
102
- result = append (result , nodePoolCandidates [nodePoolName ][i ])
103
- }
104
- }
105
- }
106
-
107
- return result
108
- }
109
-
110
- // sortCandidates interweaves candidates from different nodepools and prioritizes nodepools
111
- // that timed out in previous runs
112
- func (s * SingleNodeConsolidation ) sortCandidates (candidates []* Candidate ) []* Candidate {
113
- ctx := context .Background ()
114
-
115
- // First sort by disruption cost as the base ordering
116
- sort .Slice (candidates , func (i int , j int ) bool {
117
- return candidates [i ].disruptionCost < candidates [j ].disruptionCost
118
- })
119
-
120
- nodePoolCandidates , nodePoolNames := s .groupCandidatesByNodePool (candidates )
121
-
122
- s .sortNodePoolsByTimeout (ctx , nodePoolNames )
123
-
124
- return s .shuffleCandidates (nodePoolCandidates , nodePoolNames )
125
- }
126
-
127
49
// ComputeCommand generates a disruption command given candidates
128
50
// nolint:gocyclo
129
51
func (s * SingleNodeConsolidation ) ComputeCommand (ctx context.Context , disruptionBudgetMapping map [string ]int , candidates ... * Candidate ) (Command , scheduling.Results , error ) {
@@ -146,6 +68,19 @@ func (s *SingleNodeConsolidation) ComputeCommand(ctx context.Context, disruption
146
68
}
147
69
148
70
for i , candidate := range candidates {
71
+ if s .clock .Now ().After (timeout ) {
72
+ ConsolidationTimeoutsTotal .Inc (map [string ]string {consolidationTypeLabel : s .ConsolidationType ()})
73
+ log .FromContext (ctx ).V (1 ).Info (fmt .Sprintf ("abandoning single-node consolidation due to timeout after evaluating %d candidates" , i ))
74
+
75
+ // Mark all nodepools that we haven't seen yet as timed out
76
+ for _ , c := range candidates [i :] {
77
+ if ! nodePoolsSeen [c .nodePool .Name ] {
78
+ s .nodePoolsTimedOut [c .nodePool .Name ] = true
79
+ }
80
+ }
81
+
82
+ return Command {}, scheduling.Results {}, nil
83
+ }
149
84
// Track that we've considered this nodepool
150
85
nodePoolsSeen [candidate .nodePool .Name ] = true
151
86
@@ -162,19 +97,6 @@ func (s *SingleNodeConsolidation) ComputeCommand(ctx context.Context, disruption
162
97
if len (candidate .reschedulablePods ) == 0 {
163
98
continue
164
99
}
165
- if s .clock .Now ().After (timeout ) {
166
- ConsolidationTimeoutsTotal .Inc (map [string ]string {consolidationTypeLabel : s .ConsolidationType ()})
167
- log .FromContext (ctx ).V (1 ).Info (fmt .Sprintf ("abandoning single-node consolidation due to timeout after evaluating %d candidates" , i ))
168
-
169
- // Mark all nodepools that we haven't seen yet as timed out
170
- for _ , c := range candidates [i :] {
171
- if ! nodePoolsSeen [c .nodePool .Name ] {
172
- s .nodePoolsTimedOut [c .nodePool .Name ] = true
173
- }
174
- }
175
-
176
- return Command {}, scheduling.Results {}, nil
177
- }
178
100
// compute a possible consolidation option
179
101
cmd , results , err := s .computeConsolidation (ctx , candidate )
180
102
if err != nil {
@@ -229,3 +151,97 @@ func (s *SingleNodeConsolidation) Class() string {
229
151
func (s * SingleNodeConsolidation ) ConsolidationType () string {
230
152
return SingleNodeConsolidationType
231
153
}
154
+
155
+ func (s * SingleNodeConsolidation ) groupCandidatesByNodePool (candidates []* Candidate ) (map [string ][]* Candidate , []string ) {
156
+ nodePoolCandidates := make (map [string ][]* Candidate )
157
+ nodePoolNames := []string {}
158
+
159
+ for _ , candidate := range candidates {
160
+ nodePoolName := candidate .nodePool .Name
161
+ if _ , exists := nodePoolCandidates [nodePoolName ]; ! exists {
162
+ nodePoolNames = append (nodePoolNames , nodePoolName )
163
+ }
164
+ nodePoolCandidates [nodePoolName ] = append (nodePoolCandidates [nodePoolName ], candidate )
165
+ }
166
+ return nodePoolCandidates , nodePoolNames
167
+ }
168
+
169
+ func (s * SingleNodeConsolidation ) sortNodePoolsByTimeout (ctx context.Context , nodePoolNames []string ) {
170
+ // Log the timed out nodepools that we're prioritizing
171
+ timedOutNodePools := []string {}
172
+ for np := range s .nodePoolsTimedOut {
173
+ timedOutNodePools = append (timedOutNodePools , np )
174
+ }
175
+ if len (timedOutNodePools ) > 0 {
176
+ log .FromContext (ctx ).V (1 ).Info ("prioritizing nodepools that have not yet been considered due to timeouts in previous runs: %v" , timedOutNodePools )
177
+ }
178
+
179
+ // Prioritize nodepools that timed out in previous runs
180
+ sort .Slice (nodePoolNames , func (i , j int ) bool {
181
+ // If nodepool i timed out but j didn't, i comes first
182
+ if s.nodePoolsTimedOut [nodePoolNames [i ]] && ! s.nodePoolsTimedOut [nodePoolNames [j ]] {
183
+ return true
184
+ }
185
+ // If nodepool j timed out but i didn't, j comes first
186
+ if ! s.nodePoolsTimedOut [nodePoolNames [i ]] && s.nodePoolsTimedOut [nodePoolNames [j ]] {
187
+ return false
188
+ }
189
+ // If both or neither timed out, keep original order
190
+ return i < j
191
+ })
192
+ }
193
+
194
+ func (s * SingleNodeConsolidation ) shuffleCandidates (nodePoolCandidates map [string ][]* Candidate , nodePoolNames []string ) []* Candidate {
195
+ result := make ([]* Candidate , 0 )
196
+ maxCandidatesPerNodePool := 0
197
+
198
+ // Find the maximum number of candidates in any nodepool
199
+ for _ , nodePoolName := range nodePoolNames {
200
+ if len (nodePoolCandidates [nodePoolName ]) > maxCandidatesPerNodePool {
201
+ maxCandidatesPerNodePool = len (nodePoolCandidates [nodePoolName ])
202
+ }
203
+ }
204
+
205
+ // Interweave candidates from different nodepools
206
+ for i := range maxCandidatesPerNodePool {
207
+ for _ , nodePoolName := range nodePoolNames {
208
+ if i < len (nodePoolCandidates [nodePoolName ]) {
209
+ result = append (result , nodePoolCandidates [nodePoolName ][i ])
210
+ }
211
+ }
212
+ }
213
+
214
+ return result
215
+ }
216
+
217
+ // sortCandidates interweaves candidates from different nodepools and prioritizes nodepools
218
+ // that timed out in previous runs
219
+ func (s * SingleNodeConsolidation ) sortCandidates (candidates []* Candidate ) []* Candidate {
220
+ ctx := context .Background ()
221
+
222
+ // First sort by disruption cost as the base ordering
223
+ sort .Slice (candidates , func (i int , j int ) bool {
224
+ return candidates [i ].disruptionCost < candidates [j ].disruptionCost
225
+ })
226
+
227
+ nodePoolCandidates , nodePoolNames := s .groupCandidatesByNodePool (candidates )
228
+
229
+ s .sortNodePoolsByTimeout (ctx , nodePoolNames )
230
+
231
+ return s .shuffleCandidates (nodePoolCandidates , nodePoolNames )
232
+ }
233
+
234
+ // SortCandidates is a public wrapper around sortCandidates for testing
235
+ func (s * SingleNodeConsolidation ) SortCandidates (candidates []* Candidate ) []* Candidate {
236
+ return s .sortCandidates (candidates )
237
+ }
238
+
239
+ // MarkNodePoolTimedOut marks a nodepool as timed out for testing
240
+ func (s * SingleNodeConsolidation ) MarkNodePoolTimedOut (nodePoolName string ) {
241
+ s .nodePoolsTimedOut [nodePoolName ] = true
242
+ }
243
+
244
+ // IsNodePoolTimedOut checks if a nodepool is marked as timed out for testing
245
+ func (s * SingleNodeConsolidation ) IsNodePoolTimedOut (nodePoolName string ) bool {
246
+ return s .nodePoolsTimedOut [nodePoolName ]
247
+ }
0 commit comments