Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
### Fixed
- Fixed default node-scale-adjuster image name (`node-scale-adjuster` → `nodescaleadjuster`) so it matches the image published to GHCR
- Account for native sidecar containers (initContainers with `restartPolicy: Always`, KEP-753) in pod resource accounting, matching kubelet's `AggregateContainerRequests`. Previously, native sidecar requests were max'd against regular containers instead of summed with them, causing the scheduler to bind pods that kubelet then rejected at admission with `OutOfCpu`/`OutOfGpu`. [#1556](https://github.com/kai-scheduler/KAI-Scheduler/pull/1556)
- Fixed scheduler nil-pointer panic in the preempt scenario builder when a (partial) job has no tasks to allocate (`NewIdleGpusFilter` dereferenced a nil scenario); added the missing nil-guard matching the sibling filters [#1664](https://github.com/kai-scheduler/KAI-Scheduler/issues/1664) [sam-huang1223](https://github.com/sam-huang1223)

## [v0.15.2] - 2026-06-10

Expand All @@ -22,6 +23,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).

### Changed
- Updated Go toolchain and base build images to v1.26.3.
- Fixed Helm chart not wiring `podgrouper.queueLabelKey` into `spec.global.queueLabelKey` on the Config CR, so custom queue label keys were ignored at install time [#1655](https://github.com/kai-scheduler/KAI-Scheduler/pull/1655) [dttung2905](https://github.com/dttung2905)

## [v0.15.0] - 2026-05-20

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@ type AccumulatedIdleGpus struct {

func NewIdleGpusFilter(
scenario *scenario.ByNodeScenario, nodeInfosMap map[string]*node_info.NodeInfo) *AccumulatedIdleGpus {
if scenario == nil {
return nil
}
idleGpusMap, relevantNodesSorted := createGpuMap(nodeInfosMap, len(scenario.PendingTasks()))

filter := &AccumulatedIdleGpus{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1378,3 +1378,9 @@ func TestAccumulatedIdleGpus_Filter(t *testing.T) {
})
}
}

func Test_NewIdleGpusFilter_NilScenario(t *testing.T) {
if filter := NewIdleGpusFilter(nil, map[string]*node_info.NodeInfo{}); filter != nil {
t.Fatalf("expected nil filter for nil scenario, got %#v", filter)
}
}
3 changes: 3 additions & 0 deletions pkg/scheduler/actions/common/solvers/pod_scenario_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,9 @@ func (asb *PodAccumulatedScenarioBuilder) GetNextScenario() *solverscenario.ByNo
// outer state. advanceFirst controls whether the first pass starts by popping a
// victim or by evaluating the current state as-is.
func (asb *PodAccumulatedScenarioBuilder) iterate(advanceFirst bool) *solverscenario.ByNodeScenario {
if asb.lastScenario == nil {
return nil
}
needAdvance := advanceFirst
for {
if sub := asb.nextFromSubEmitter(); sub != nil {
Expand Down
18 changes: 18 additions & 0 deletions pkg/scheduler/actions/common/solvers/pod_scenario_builder_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,24 @@ var _ = Describe("PodAccumulatedScenarioBuilder", func() {
Expect(numberOfGeneratedScenarios).To(Equal(len(potentialVictimsPerScenario)))
})
})

Context("preemptor with no tasks to allocate - nil scenario", func() {
BeforeEach(func() {
ssn, _ = initializeSession(0, 0)
submitQueue := createQueue("team-a")
ssn.ClusterInfo.Queues[submitQueue.UID] = submitQueue
// Running tasks are not allocatable, so the job has no tasks to allocate.
reclaimerJob, _ = createJobWithTasks(1, 1, "team-a", v1.PodRunning, []v1.ResourceRequirements{})
recordedVictimsJobs := []*podgroup_info.PodGroupInfo{}
victimsQueue := utils.GetVictimsQueue(ssn, nil)

scenarioBuilder = NewPodAccumulatedScenarioBuilder(ssn, reclaimerJob, recordedVictimsJobs, victimsQueue, ssn.ClusterInfo.Nodes)
})
It("does not panic and yields no scenario", func() {
Expect(scenarioBuilder.GetValidScenario()).To(BeNil())
Expect(scenarioBuilder.GetNextScenario()).To(BeNil())
})
})
})

func initializeSession(jobsCount, tasksPerJob int) (*framework.Session, []*pod_info.PodInfo) {
Expand Down
Loading