Skip to content

Commit 319d1e4

Browse files
committed
refactor: unit maxComponentSets caculation
Signed-off-by: zhzhuang-zju <[email protected]>
1 parent 22d9c36 commit 319d1e4

File tree

4 files changed

+216
-281
lines changed

4 files changed

+216
-281
lines changed

pkg/estimator/client/general.go

Lines changed: 38 additions & 194 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ package client
1919
import (
2020
"context"
2121
"fmt"
22-
"maps"
2322
"math"
2423
"sort"
2524

@@ -29,7 +28,11 @@ import (
2928

3029
clusterv1alpha1 "github.com/karmada-io/karmada/pkg/apis/cluster/v1alpha1"
3130
workv1alpha2 "github.com/karmada-io/karmada/pkg/apis/work/v1alpha2"
31+
"github.com/karmada-io/karmada/pkg/estimator"
32+
"github.com/karmada-io/karmada/pkg/estimator/pb"
3233
"github.com/karmada-io/karmada/pkg/features"
34+
"github.com/karmada-io/karmada/pkg/util"
35+
schedulerframework "github.com/karmada-io/karmada/pkg/util/lifted/scheduler/framework"
3336
)
3437

3538
// GeneralEstimator is the default replica estimator.
@@ -127,9 +130,9 @@ func (ge *GeneralEstimator) maxAvailableComponentSets(cluster *clusterv1alpha1.C
127130
return int32(allowedPods) // #nosec G115: integer overflow conversion int64 -> int32
128131
}
129132

130-
podBound := allowedPods / podsPerSet
133+
podBound := int32(allowedPods / podsPerSet) // #nosec G115: integer overflow conversion int64 -> int32
131134
if len(perSet) == 0 || allZero(perSet) {
132-
return int32(podBound) // #nosec G115: integer overflow conversion int64 -> int32
135+
return podBound
133136
}
134137

135138
// Find limiting resource requirement, which will bound maxSet calculation
@@ -144,116 +147,57 @@ func (ge *GeneralEstimator) maxAvailableComponentSets(cluster *clusterv1alpha1.C
144147
return 0 // no capacity for this resource
145148
}
146149

147-
resBound := resAvail / req
150+
resBound := int32(resAvail / req) // #nosec G115: integer overflow conversion int64 -> int32
148151
if resBound < maxSets {
149152
maxSets = resBound
150153
}
151154
}
152155

153156
if features.FeatureGate.Enabled(features.CustomizedClusterResourceModeling) && len(cluster.Status.ResourceSummary.AllocatableModelings) > 0 {
154-
if num, err := getMaximumSetsBasedOnResourceModels(cluster, components, podBound); err != nil {
157+
if num, err := getMaximumSetsBasedOnResourceModels(cluster, components, maxSets); err != nil {
155158
klog.Warningf("Failed to get maximum sets based on resource models, skipping: %v", err)
156159
} else if num < maxSets {
157160
maxSets = num
158161
}
159162
}
160163

161-
return int32(maxSets) // #nosec G115: integer overflow conversion int64 -> int32
164+
return maxSets
162165
}
163166

164167
// getMaximumSetsBasedOnResourceModels computes the maximum number of full sets that can be
165168
// placed on a cluster using the cluster's ResourceModels. It expands one set into
166169
// replica kinds (demand + count) and performs a first-fit-decreasing placement onto model-grade nodes.
167170
// `upperBound` caps the search. We can set this using the podBound (allowedPods / podsPerSet)
168-
func getMaximumSetsBasedOnResourceModels(
169-
cluster *clusterv1alpha1.Cluster,
170-
components []workv1alpha2.Component,
171-
upperBound int64,
172-
) (int64, error) {
173-
if upperBound <= 0 {
174-
return 0, nil
175-
}
176-
177-
// Compressed one-set: per-kind (identical replicas grouped)
178-
oneSetKinds := expandKindsOneSet(components)
179-
if len(oneSetKinds) == 0 {
180-
// If there are no pods to schedule, just return upperBound
181-
return upperBound, nil
182-
}
183-
184-
// Use cluster "available" totals (allocatable - allocated - allocating) for normalized scoring
185-
// This reflects what the cluster can actually accept now
186-
totals := availableResourceMap(cluster.Status.ResourceSummary)
187-
188-
for i := range oneSetKinds {
189-
oneSetKinds[i].score = demandScoreNormalized(oneSetKinds[i].dem, totals)
190-
}
191-
sort.Slice(oneSetKinds, func(i, j int) bool {
192-
if oneSetKinds[i].score == oneSetKinds[j].score {
193-
return demandSum(oneSetKinds[i].dem) > demandSum(oneSetKinds[j].dem)
194-
}
195-
return oneSetKinds[i].score > oneSetKinds[j].score
196-
})
197-
198-
//Build model nodes from Spec.ResourceModels and Status.AllocatableModelings
199-
nodes, err := buildModelNodes(cluster)
171+
func getMaximumSetsBasedOnResourceModels(cluster *clusterv1alpha1.Cluster, components []workv1alpha2.Component, upperSets int32) (int32, error) {
172+
nodes, err := getNodesAvailableResources(cluster)
200173
if err != nil {
201174
return -1, err
202175
}
203-
if len(nodes) == 0 {
204-
return 0, nil
205-
}
206176

207-
var sets int64
208-
for sets < upperBound {
209-
if !placeOneSet(oneSetKinds, nodes) {
210-
break
177+
pbComponents := make([]pb.Component, 0, len(components))
178+
for _, comp := range components {
179+
// Deep-copy so that pointer is not shared between goroutines
180+
var cr *workv1alpha2.ComponentReplicaRequirements
181+
if comp.ReplicaRequirements != nil {
182+
cr = comp.ReplicaRequirements.DeepCopy()
211183
}
212-
sets++
213-
}
214-
return sets, nil
215-
}
216184

217-
// placeOneSet attempts to place exactly ONE full set (all kinds with their per-set replica counts)
218-
// onto the provided working node capacities (in-place)
219-
// Returns true if successful
220-
func placeOneSet(orderedKinds []replicaKind, work []modelNode) bool {
221-
for _, k := range orderedKinds {
222-
remaining := k.count
223-
if remaining <= 0 {
224-
continue
225-
}
226-
// first-fit across nodes
227-
for n := range work {
228-
if remaining <= 0 {
229-
break
230-
}
231-
fit := maxFit(work[n].cap, k.dem)
232-
if fit <= 0 {
233-
continue
234-
}
235-
place := fit
236-
if place > remaining {
237-
place = remaining
238-
}
239-
consumeMul(work[n].cap, k.dem, place)
240-
remaining -= place
241-
}
242-
if remaining > 0 {
243-
return false
244-
}
185+
pbComponents = append(pbComponents, pb.Component{
186+
Name: comp.Name,
187+
Replicas: comp.Replicas,
188+
ReplicaRequirements: toPBReplicaRequirements(cr),
189+
})
245190
}
246-
return true
247-
}
248191

249-
// modelNode holds remaining capacity for a given node across all resource types
250-
type modelNode struct {
251-
cap map[corev1.ResourceName]int64
192+
matchNode := func(nodeClaim *pb.NodeClaim, node *schedulerframework.NodeInfo) bool {
193+
return true
194+
}
195+
return estimator.NewSchedulingSimulator(nodes, matchNode).SimulateSchedulingFFD(pbComponents, upperSets), nil
252196
}
253197

254198
// buildModelNodes constructs identical nodes for each model grade using its Min vector,
255199
// repeated AllocatableModelings[grade].Count times. Grades are indexed directly.
256-
func buildModelNodes(cluster *clusterv1alpha1.Cluster) ([]modelNode, error) {
200+
func getNodesAvailableResources(cluster *clusterv1alpha1.Cluster) ([]*schedulerframework.NodeInfo, error) {
257201
if cluster == nil {
258202
return nil, fmt.Errorf("nil cluster")
259203
}
@@ -267,11 +211,11 @@ func buildModelNodes(cluster *clusterv1alpha1.Cluster) ([]modelNode, error) {
267211
}
268212

269213
// Build capacity template per grade
270-
capsByGrade := make(map[uint]map[corev1.ResourceName]int64, len(spec))
214+
capsByGrade := make(map[uint]corev1.ResourceList, len(spec))
271215
for _, m := range spec {
272-
tmpl := make(map[corev1.ResourceName]int64, len(m.Ranges))
216+
tmpl := make(corev1.ResourceList, len(m.Ranges))
273217
for _, r := range m.Ranges {
274-
tmpl[r.Name] = quantityAsInt64(r.Min)
218+
tmpl[r.Name] = r.Min
275219
}
276220
capsByGrade[m.Grade] = tmpl
277221
}
@@ -293,122 +237,22 @@ func buildModelNodes(cluster *clusterv1alpha1.Cluster) ([]modelNode, error) {
293237
sort.Ints(grades)
294238

295239
// Emit nodes for grades present in both spec & status.
296-
var nodes []modelNode
240+
var nodes []*schedulerframework.NodeInfo
297241
for _, grade := range grades {
298242
tmpl, cnt := capsByGrade[uint(grade)], countByGrade[uint(grade)] // #nosec G115: integer overflow conversion int -> uint
299243
if tmpl == nil || cnt == 0 {
300244
continue
301245
}
302-
for range cnt {
303-
capCopy := maps.Clone(tmpl)
304-
nodes = append(nodes, modelNode{cap: capCopy})
305-
}
306-
}
307-
return nodes, nil
308-
}
309246

310-
// replicaKind represents a single type of component, including replica demand and count
311-
type replicaKind struct {
312-
dem map[corev1.ResourceName]int64 // per-replica demand
313-
count int64 // how many replicas
314-
score float64 // ordering heuristic (higher first)
315-
}
316-
317-
// expandKindsOneSet flattens components into a slice of unique replica kinds.
318-
// Each entry holds the per-replica demand and how many replicas of that kind a set needs.
319-
func expandKindsOneSet(components []workv1alpha2.Component) []replicaKind {
320-
kinds := make([]replicaKind, 0, len(components))
321-
for _, c := range components {
322-
if c.ReplicaRequirements == nil || c.ReplicaRequirements.ResourceRequest == nil {
323-
continue
324-
}
325-
// normalize per-replica demand
326-
base := make(map[corev1.ResourceName]int64, len(c.ReplicaRequirements.ResourceRequest))
327-
for name, qty := range c.ReplicaRequirements.ResourceRequest {
328-
base[name] = quantityAsInt64(qty)
329-
}
330-
// skip zero-demand or non-positive replica count
331-
if allZero(base) || c.Replicas <= 0 {
332-
continue
333-
}
334-
335-
k := replicaKind{
336-
dem: base,
337-
count: int64(c.Replicas),
338-
// score is filled later once we know cluster-wide totals
339-
}
340-
kinds = append(kinds, k)
341-
}
342-
return kinds
343-
}
344-
345-
// demandScoreNormalized returns the "max utilization ratio" of a demand vector against total capacities
346-
// If a resource is missing/zero in total, treat it as maximally constrained
347-
func demandScoreNormalized(
348-
demand map[corev1.ResourceName]int64,
349-
total map[corev1.ResourceName]int64,
350-
) float64 {
351-
var maxRatio float64
352-
for res, req := range demand {
353-
if req <= 0 {
354-
continue
355-
}
356-
totalCap := float64(total[res])
357-
if totalCap <= 0 {
358-
return math.MaxFloat64
359-
}
360-
ratio := float64(req) / totalCap
361-
if ratio > maxRatio {
362-
maxRatio = ratio
363-
}
364-
}
365-
return maxRatio
366-
}
367-
368-
// demandSum is used as a tie-breaker when initial scores are equal
369-
func demandSum(m map[corev1.ResourceName]int64) int64 {
370-
var s int64
371-
for _, v := range m {
372-
if v > 0 {
373-
s += v
374-
}
375-
}
376-
return s
377-
}
378-
379-
// maxFit returns how many copies of `dem` fit in `cap` simultaneously
380-
func maxFit(capacity map[corev1.ResourceName]int64, dem map[corev1.ResourceName]int64) int64 {
381-
var limit int64 = math.MaxInt64
382-
for k, req := range dem {
383-
if req <= 0 {
384-
continue
385-
}
386-
avail := capacity[k]
387-
if avail <= 0 {
388-
return 0
389-
}
390-
bound := avail / req
391-
if bound < limit {
392-
limit = bound
393-
}
394-
}
395-
if limit == math.MaxInt64 {
396-
return 0
397-
}
398-
return limit
399-
}
400-
401-
// consumeMul subtracts mult * dem from cap
402-
func consumeMul(capacity map[corev1.ResourceName]int64, dem map[corev1.ResourceName]int64, mult int64) {
403-
if mult <= 0 {
404-
return
405-
}
406-
for k, req := range dem {
407-
if req <= 0 {
408-
continue
247+
tmpl[corev1.ResourcePods] = *resource.NewQuantity(math.MaxInt64, resource.DecimalSI)
248+
for i := 0; i < cnt; i++ {
249+
node := &schedulerframework.NodeInfo{
250+
Allocatable: util.NewResource(tmpl),
251+
}
252+
nodes = append(nodes, node)
409253
}
410-
capacity[k] -= req * mult
411254
}
255+
return nodes, nil
412256
}
413257

414258
// podsInSet computes the total number of pods in the CRD

pkg/estimator/client/general_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -244,16 +244,16 @@ func comp(name string, replicas int32, rl corev1.ResourceList) workv1alpha2.Comp
244244
func TestGetMaximumSetsBasedOnResourceModels(t *testing.T) {
245245
const (
246246
GPU corev1.ResourceName = "nvidia.com/gpu"
247-
BIGU int64 = 100 // define a large upper bound so we can test model decision algo
247+
BIGU int32 = 100 // define a large upper bound so we can test model decision algo
248248
)
249249

250250
tests := []struct {
251251
name string
252252
cluster clusterv1alpha1.Cluster
253253
components []workv1alpha2.Component
254-
upperBound int64
254+
upperBound int32
255255
expectError bool
256-
expectedSets int64
256+
expectedSets int32
257257
}{
258258
{
259259
name: "No grades defined → error",

0 commit comments

Comments
 (0)