Skip to content

Commit 3f02b07

Browse files
committed
guard preemptive reclaim by realCapability
Signed-off-by: zhoujinyu <2319109590@qq.com>
1 parent f4db07d commit 3f02b07

File tree

2 files changed

+57
-0
lines changed

2 files changed

+57
-0
lines changed

pkg/scheduler/plugins/capacity/capacity.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,13 @@ func (cp *capacityPlugin) OnSessionOpen(ssn *framework.Session) {
180180
attr := cp.queueOpts[queue.UID]
181181
futureUsed := attr.allocated.Clone().Add(task.Resreq)
182182

183+
if allocatable, _ := futureUsed.LessEqualWithDimensionAndResourcesName(attr.realCapability, task.Resreq); !allocatable {
184+
klog.V(3).Infof("Queue <%v> cannot reclaim for <%s/%s> because futureUsed <%v> exceeds realCapability <%v>.",
185+
queue.Name, task.Namespace, task.Name, futureUsed, attr.realCapability)
186+
metrics.UpdateQueueOverused(attr.name, true)
187+
return false
188+
}
189+
183190
// If there is a single dimension whose deserved is greater than allocated, current task can reclaim by preempt others.
184191
isPreemptive, resourceNames := futureUsed.LessEqualPartlyWithDimensionZeroFiltered(attr.deserved, task.Resreq)
185192
if isPreemptive {

pkg/scheduler/plugins/capacity/capacity_test.go

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -958,3 +958,53 @@ func Test_updateQueueAttrShare(t *testing.T) {
958958
})
959959
}
960960
}
961+
962+
func TestPreemptiveFn_RespectsCapability(t *testing.T) {
963+
// Queue has deserved and capability both {cpu: 8, mem: 100Gi}, allocated {cpu: 7, mem: 90Gi}.
964+
// A task requesting {cpu: 2, mem: 5Gi} would keep memory below deserved but push CPU above capability.
965+
// preemptiveFn should reject preemption in this case.
966+
967+
deserved := api.NewResource(api.BuildResourceList("8", "100Gi"))
968+
allocated := api.NewResource(api.BuildResourceList("7", "90Gi"))
969+
capability := api.NewResource(api.BuildResourceList("8", "100Gi"))
970+
971+
attr := &queueAttr{
972+
queueID: "q-test",
973+
name: "q-test",
974+
deserved: deserved,
975+
allocated: allocated,
976+
capability: capability,
977+
realCapability: capability.Clone(),
978+
}
979+
980+
taskRes := api.NewResource(api.BuildResourceList("2", "5Gi"))
981+
task := &api.TaskInfo{
982+
Resreq: taskRes,
983+
InitResreq: taskRes,
984+
}
985+
986+
queue := &api.QueueInfo{
987+
UID: "q-test",
988+
Name: "q-test",
989+
}
990+
991+
futureUsed := attr.allocated.Clone().Add(task.Resreq)
992+
if allocatable, _ := futureUsed.LessEqualWithDimensionAndResourcesName(attr.realCapability, task.Resreq); allocatable {
993+
t.Fatalf("precondition error: futureUsed %v should exceed capability %v", futureUsed, attr.realCapability)
994+
}
995+
996+
// Simulate the logic in preemptiveFn after our fix: first capability, then deserved.
997+
allocatable, _ := futureUsed.LessEqualWithDimensionAndResourcesName(attr.realCapability, task.Resreq)
998+
if allocatable {
999+
t.Fatalf("expected allocatable=false when futureUsed exceeds capability, got true")
1000+
}
1001+
1002+
// For completeness, check that even though memory is below deserved, we still don't allow reclaim
1003+
// because capability check fails.
1004+
isPreemptive, _ := futureUsed.LessEqualPartlyWithDimensionZeroFiltered(attr.deserved, task.Resreq)
1005+
if isPreemptive {
1006+
t.Logf("deserved check alone would allow reclaim, but capability check must block it")
1007+
}
1008+
1009+
_ = queue // queue is only needed to mirror function signature; logic above is independent of it.
1010+
}

0 commit comments

Comments
 (0)