Skip to content

Commit db28c0f

Browse files
authored
Take SubGroups into consideration when checking if job is ready for scheduling (#347)
1 parent d59ab5e commit db28c0f

File tree

8 files changed

+553
-19
lines changed

8 files changed

+553
-19
lines changed

pkg/scheduler/api/cluster_info.go

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ import (
2424

2525
v1 "k8s.io/api/core/v1"
2626

27+
kueue "sigs.k8s.io/kueue/apis/kueue/v1alpha1"
28+
2729
"github.com/NVIDIA/KAI-scheduler/pkg/scheduler/api/bindrequest_info"
2830
"github.com/NVIDIA/KAI-scheduler/pkg/scheduler/api/common_info"
2931
"github.com/NVIDIA/KAI-scheduler/pkg/scheduler/api/configmap_info"
@@ -34,7 +36,6 @@ import (
3436
"github.com/NVIDIA/KAI-scheduler/pkg/scheduler/api/storagecapacity_info"
3537
"github.com/NVIDIA/KAI-scheduler/pkg/scheduler/api/storageclaim_info"
3638
"github.com/NVIDIA/KAI-scheduler/pkg/scheduler/api/storageclass_info"
37-
kueue "sigs.k8s.io/kueue/apis/kueue/v1alpha1"
3839
)
3940

4041
// ClusterInfo is a snapshot of cluster by cache.
@@ -93,9 +94,14 @@ func (ci ClusterInfo) String() string {
9394
str = str + fmt.Sprintf("\t Job(%s) name(%s) minAvailable(%v)\n",
9495
job.UID, job.Name, job.MinAvailable)
9596

97+
for _, subGroup := range job.SubGroups {
98+
str = str + fmt.Sprintf("\t\t subGroup(%s), minAvailable(%v)\n",
99+
subGroup.Name, subGroup.MinAvailable)
100+
}
101+
96102
i := 0
97103
for _, task := range job.PodInfos {
98-
str = str + fmt.Sprintf("\t\t %d: %v\n", i, task)
104+
str = str + fmt.Sprintf("\t\t task %d: %v\n", i, task)
99105
i++
100106
}
101107
}

pkg/scheduler/api/podgroup_info/job_info.go

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -351,7 +351,15 @@ func (pgi *PodGroupInfo) GetTasksActiveAllocatedReqResource() *resource_info.Res
351351

352352
func (pgi *PodGroupInfo) IsReadyForScheduling() bool {
353353
validTasks := pgi.GetNumAliveTasks() - pgi.GetNumGatedTasks()
354-
return int32(validTasks) >= pgi.MinAvailable
354+
if int32(validTasks) < pgi.MinAvailable {
355+
return false
356+
}
357+
for _, subGroup := range pgi.SubGroups {
358+
if !subGroup.IsReadyForScheduling() {
359+
return false
360+
}
361+
}
362+
return true
355363
}
356364

357365
func (pgi *PodGroupInfo) IsElastic() bool {
@@ -432,9 +440,14 @@ func (pgi *PodGroupInfo) CloneWithTasks(tasks []*pod_info.PodInfo) *PodGroupInfo
432440
func (pgi *PodGroupInfo) String() string {
433441
res := ""
434442

443+
for _, subGroup := range pgi.SubGroups {
444+
res = res + fmt.Sprintf("\t\t subGroup %s: minAvailable(%v)\n",
445+
subGroup.Name, subGroup.MinAvailable)
446+
}
447+
435448
i := 0
436449
for _, task := range pgi.PodInfos {
437-
res = res + fmt.Sprintf("\n\t %d: %v", i, task)
450+
res = res + fmt.Sprintf("\n\t task %d: %v", i, task)
438451
i++
439452
}
440453

0 commit comments

Comments
 (0)