Skip to content

Commit 7de278a

Browse files
committed
feat(scheduler): add built-in scenario generators
Signed-off-by: Erez Freiberger <enoodle@gmail.com>
1 parent 4fc65f5 commit 7de278a

6 files changed

Lines changed: 807 additions & 8 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
77
## [Unreleased]
88

99
### Added
10+
- Added built-in `NodeLocalGreedy` and `MultiNodeGang` scenario generator implementations for bounded reclaim, preempt, and consolidation search.
1011
- Added an opt-in `deviceaccess` admission plugin (`--block-nvidia-visible-devices`, config field `admission.blockNvidiaVisibleDevices`, default disabled) that (1) rejects pods overriding the `NVIDIA_VISIBLE_DEVICES` environment variable with values other than `void`/`none` (or via a `valueFrom` reference), and (2) injects `NVIDIA_VISIBLE_DEVICES=void` into containers that do not request a GPU, blocking their access to GPUs on the node.
1112
- Added support for configuring admission Pod Disruption Budget via Helm values (`admission.podDisruptionBudget`) [#1490](https://github.com/kai-scheduler/KAI-Scheduler/pull/1490) [dttung2905](https://github.com/dttung2905)
1213
- Added an opt-in `hamicore` binder plugin (depends on `gpusharing`) to write the HAMI-core GPU memory limit (`CUDA_DEVICE_MEMORY_LIMIT`) for fractional GPU pods.
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
// Copyright 2025 NVIDIA CORPORATION
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
package solvers
5+
6+
import (
7+
"github.com/kai-scheduler/KAI-scheduler/pkg/common/constants"
8+
"github.com/kai-scheduler/KAI-scheduler/pkg/scheduler/api"
9+
"github.com/kai-scheduler/KAI-scheduler/pkg/scheduler/framework"
10+
)
11+
12+
type multiNodeGangGenerator struct {
13+
builder *PodAccumulatedScenarioBuilder
14+
first bool
15+
}
16+
17+
func NewMultiNodeGangGenerator(ctx framework.ScenarioGeneratorContext) framework.ScenarioGenerator {
18+
solveCtx, generateVictimsQueue, ok := validateScenarioGeneratorContext(ctx)
19+
if !ok {
20+
return nil
21+
}
22+
victimsQueue := generateVictimsQueue()
23+
if victimsQueue == nil {
24+
return nil
25+
}
26+
27+
return &multiNodeGangGenerator{
28+
builder: NewPodAccumulatedScenarioBuilder(
29+
solveCtx.Session,
30+
solveCtx.PartialPendingJob,
31+
solveCtx.RecordedVictimsJobs,
32+
victimsQueue,
33+
solveCtx.FeasibleNodes,
34+
),
35+
first: true,
36+
}
37+
}
38+
39+
func (g *multiNodeGangGenerator) Name() string {
40+
return constants.GeneratorMultiNodeGang
41+
}
42+
43+
func (g *multiNodeGangGenerator) Next() api.ScenarioInfo {
44+
if g.first {
45+
g.first = false
46+
return g.builder.GetValidScenario()
47+
}
48+
return g.builder.GetNextScenario()
49+
}
Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,183 @@
1+
// Copyright 2025 NVIDIA CORPORATION
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
package solvers
5+
6+
import (
7+
"sort"
8+
"strings"
9+
10+
"github.com/kai-scheduler/KAI-scheduler/pkg/common/constants"
11+
"github.com/kai-scheduler/KAI-scheduler/pkg/scheduler/actions/common/solvers/scenario"
12+
"github.com/kai-scheduler/KAI-scheduler/pkg/scheduler/api"
13+
"github.com/kai-scheduler/KAI-scheduler/pkg/scheduler/api/common_info"
14+
"github.com/kai-scheduler/KAI-scheduler/pkg/scheduler/api/pod_info"
15+
"github.com/kai-scheduler/KAI-scheduler/pkg/scheduler/api/podgroup_info"
16+
"github.com/kai-scheduler/KAI-scheduler/pkg/scheduler/framework"
17+
)
18+
19+
type nodeLocalGreedyGenerator struct {
20+
solveCtx *SolveContext
21+
generateVictimsQueue GenerateVictimsQueue
22+
builder *PodAccumulatedScenarioBuilder
23+
scenarios []*scenario.ByNodeScenario
24+
advanceNext bool
25+
}
26+
27+
func NewNodeLocalGreedyGenerator(ctx framework.ScenarioGeneratorContext) framework.ScenarioGenerator {
28+
solveCtx, generateVictimsQueue, ok := validateScenarioGeneratorContext(ctx)
29+
if !ok {
30+
return nil
31+
}
32+
return &nodeLocalGreedyGenerator{
33+
solveCtx: solveCtx,
34+
generateVictimsQueue: generateVictimsQueue,
35+
}
36+
}
37+
38+
func (g *nodeLocalGreedyGenerator) Name() string {
39+
return constants.GeneratorNodeLocalGreedy
40+
}
41+
42+
func (g *nodeLocalGreedyGenerator) Next() api.ScenarioInfo {
43+
if !g.ensureBuilder() {
44+
return nil
45+
}
46+
for {
47+
if sn := g.popScenario(); sn != nil {
48+
return sn
49+
}
50+
accumulated := g.nextAccumulatedScenario()
51+
if accumulated == nil {
52+
return nil
53+
}
54+
g.scenarios = nodeLocalScenarios(g.builder.session, accumulated)
55+
}
56+
}
57+
58+
func (g *nodeLocalGreedyGenerator) ensureBuilder() bool {
59+
if g.builder != nil {
60+
return true
61+
}
62+
victimsQueue := g.generateVictimsQueue()
63+
if victimsQueue == nil {
64+
return false
65+
}
66+
g.builder = NewPodAccumulatedScenarioBuilder(
67+
g.solveCtx.Session,
68+
g.solveCtx.PartialPendingJob,
69+
g.solveCtx.RecordedVictimsJobs,
70+
victimsQueue,
71+
g.solveCtx.FeasibleNodes,
72+
)
73+
return true
74+
}
75+
76+
func addPotentialVictimsGroupedByJob(sn *scenario.ByNodeScenario, tasks []*pod_info.PodInfo) {
77+
groupedTasks := map[common_info.PodGroupID][]*pod_info.PodInfo{}
78+
var jobOrder []common_info.PodGroupID
79+
for _, task := range tasks {
80+
if _, found := groupedTasks[task.Job]; !found {
81+
jobOrder = append(jobOrder, task.Job)
82+
}
83+
groupedTasks[task.Job] = append(groupedTasks[task.Job], task)
84+
}
85+
for _, jobID := range jobOrder {
86+
sn.AddPotentialVictimsTasks(groupedTasks[jobID])
87+
}
88+
}
89+
90+
func (g *nodeLocalGreedyGenerator) popScenario() *scenario.ByNodeScenario {
91+
if len(g.scenarios) == 0 {
92+
return nil
93+
}
94+
sn := g.scenarios[0]
95+
g.scenarios = g.scenarios[1:]
96+
return sn
97+
}
98+
99+
func (g *nodeLocalGreedyGenerator) nextAccumulatedScenario() *scenario.ByNodeScenario {
100+
for {
101+
if g.advanceNext {
102+
if g.builder.victimsJobsQueue.IsEmpty() {
103+
return nil
104+
}
105+
if !g.builder.addNextPotentialVictims() {
106+
continue
107+
}
108+
}
109+
g.advanceNext = true
110+
111+
if g.builder.lastScenario == nil {
112+
return nil
113+
}
114+
if !g.builder.outerScenarioValid() {
115+
continue
116+
}
117+
return g.builder.lastScenario
118+
}
119+
}
120+
121+
func nodeLocalScenarios(session *framework.Session, base *scenario.ByNodeScenario) []*scenario.ByNodeScenario {
122+
if base == nil {
123+
return nil
124+
}
125+
if len(base.PotentialVictimsTasks()) == 0 {
126+
if len(base.RecordedVictimsTasks()) == 0 {
127+
return nil
128+
}
129+
return []*scenario.ByNodeScenario{base}
130+
}
131+
132+
var scenarios []*scenario.ByNodeScenario
133+
seen := map[string]struct{}{}
134+
for _, nodeName := range nodeNamesOfJob(base.LatestPotentialVictim()) {
135+
victimTasks := base.VictimsTasksFromNodes([]string{nodeName})
136+
if len(victimTasks) == 0 {
137+
continue
138+
}
139+
key := victimUIDSetKey(victimTasks)
140+
if _, found := seen[key]; found {
141+
continue
142+
}
143+
seen[key] = struct{}{}
144+
sn := scenario.NewByNodeScenario(
145+
session,
146+
base.GetPreemptor(),
147+
base.PendingTasks(),
148+
nil,
149+
base.RecordedVictimsJobs(),
150+
)
151+
addPotentialVictimsGroupedByJob(sn, victimTasks)
152+
scenarios = append(scenarios, sn)
153+
}
154+
return scenarios
155+
}
156+
157+
func nodeNamesOfJob(job *podgroup_info.PodGroupInfo) []string {
158+
if job == nil {
159+
return nil
160+
}
161+
seen := map[string]struct{}{}
162+
for _, task := range job.GetAllPodsMap() {
163+
if task.NodeName == "" {
164+
continue
165+
}
166+
seen[task.NodeName] = struct{}{}
167+
}
168+
nodeNames := make([]string, 0, len(seen))
169+
for nodeName := range seen {
170+
nodeNames = append(nodeNames, nodeName)
171+
}
172+
sort.Strings(nodeNames)
173+
return nodeNames
174+
}
175+
176+
func victimUIDSetKey(tasks []*pod_info.PodInfo) string {
177+
uids := make([]string, 0, len(tasks))
178+
for _, task := range tasks {
179+
uids = append(uids, string(task.UID))
180+
}
181+
sort.Strings(uids)
182+
return strings.Join(uids, "\x00")
183+
}

0 commit comments

Comments
 (0)