Skip to content

Commit 574f404

Browse files
committed
refactor: simplify the grouping of device affinity
1 parent 8bc5535 commit 574f404

File tree

2 files changed

+23
-82
lines changed

2 files changed

+23
-82
lines changed

pkg/agent/qrm-plugins/gpu/strategy/allocate/strategies/deviceaffinity/bind_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1129,7 +1129,7 @@ func TestBind_NumberOfDevicesAllocated(t *testing.T) {
11291129
},
11301130
},
11311131
},
1132-
sortedDevices: []string{"gpu-1", "gpu-2", "gpu-5", "gpu-6", "gpu-7", "gpu-8", "gpu-9", "gpu-10", "gpu-11", "gpu-12"},
1132+
sortedDevices: []string{"gpu-1", "gpu-2", "gpu-3", "gpu-5", "gpu-6", "gpu-7", "gpu-8", "gpu-9", "gpu-10", "gpu-11", "gpu-12"},
11331133
// Allocate gpu-1 and gpu-5 first because they are reusable devices
11341134
// Allocate gpu-2 next because they have affinity with gpu-1 at the highest affinity priority (level 0)
11351135
// Allocate gpu-6 and gpu-8 next because they have affinity with gpu-5 at the next highest affinity priority (level 1)

pkg/util/machine/device.go

Lines changed: 22 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ package machine
1919
import (
2020
"fmt"
2121
"sort"
22-
"strings"
2322
"sync"
2423

2524
"k8s.io/apimachinery/pkg/util/sets"
@@ -149,98 +148,40 @@ type DeviceTopology struct {
149148
Devices map[string]DeviceInfo
150149
}
151150

152-
// GroupDeviceAffinity forms a topology graph such that all groups of DeviceIDs are within a certain affinity priority level
153-
// It preserves sub-group boundaries and eliminates duplicates
154-
// E.g. if priority 0 has groups [1, 3, 5, 6] and [0, 2, 4, 7], priority 1 will be either [1, 3, 5, 6, 0, 2, 4, 7] or [0, 2, 4, 7, 1, 3, 5, 6] and not any other permutation
155-
// This is to ensure that the higher priority affinity groups keep its permutation when it is in lower priority affinity group.
151+
// GroupDeviceAffinity forms a topology graph such that all groups of DeviceIDs within a certain affinity priority level
156152
func (t *DeviceTopology) GroupDeviceAffinity() map[AffinityPriority][]DeviceIDs {
157153
deviceAffinityGroup := make(map[AffinityPriority][]DeviceIDs)
158-
159-
// Collect unique groups per priority
160-
uniqueGroups := make(map[AffinityPriority]map[string]DeviceIDs)
161-
162-
for id, deviceInfo := range t.Devices {
163-
for priority, group := range deviceInfo.DeviceAffinity {
164-
// Ensure the device itself is included
165-
if !slices.Contains(group, id) {
166-
group = append(group, id)
154+
for deviceId, deviceInfo := range t.Devices {
155+
for priority, affinityDeviceIDs := range deviceInfo.DeviceAffinity {
156+
// Add itself in the group if it is not already included
157+
if !slices.Contains(affinityDeviceIDs, deviceId) {
158+
affinityDeviceIDs = append(affinityDeviceIDs, deviceId)
167159
}
168-
169-
// Sort for consistent deduplication key
170-
sortedGroup := make([]string, len(group))
171-
copy(sortedGroup, group)
172-
sort.Strings(sortedGroup)
173-
174-
key := strings.Join(sortedGroup, ",")
175-
if _, ok := uniqueGroups[priority]; !ok {
176-
uniqueGroups[priority] = make(map[string]DeviceIDs)
160+
// Sort the strings for easier deduplication
161+
sort.Strings(affinityDeviceIDs)
162+
if _, ok := deviceAffinityGroup[priority]; !ok {
163+
deviceAffinityGroup[priority] = make([]DeviceIDs, 0)
177164
}
178-
uniqueGroups[priority][key] = group
179-
}
180-
}
181-
182-
// Iterate priorities in order
183-
for priority := 0; ; priority++ {
184-
groupsMap, ok := uniqueGroups[AffinityPriority(priority)]
185-
if !ok || len(groupsMap) == 0 {
186-
break // no more groups at this priority
187-
}
188165

189-
// Build lower-group map for merging (priority > 0)
190-
lowerGroupMap := make(map[string]int)
191-
if priority > 0 {
192-
for idx, g := range deviceAffinityGroup[AffinityPriority(priority-1)] {
193-
for _, d := range g {
194-
lowerGroupMap[d] = idx
195-
}
166+
// Add the affinityDeviceIDs to the priority level if it is not already there
167+
if !containsGroup(deviceAffinityGroup[priority], affinityDeviceIDs) {
168+
deviceAffinityGroup[priority] = append(deviceAffinityGroup[priority], affinityDeviceIDs)
196169
}
197-
}
198170

199-
for _, group := range groupsMap {
200-
if priority > 0 {
201-
// Merge according to lower-priority group boundaries
202-
lowerGroups := make(map[int][]string)
203-
for _, d := range group {
204-
if idx, ok := lowerGroupMap[d]; ok {
205-
lowerGroups[idx] = append(lowerGroups[idx], d)
206-
} else {
207-
lowerGroups[-1] = append(lowerGroups[-1], d)
208-
}
209-
}
210-
211-
merged := []string{}
212-
for idx := 0; idx < len(deviceAffinityGroup[AffinityPriority(priority-1)]); idx++ {
213-
if devs, ok := lowerGroups[idx]; ok {
214-
merged = append(merged, devs...)
215-
}
216-
}
217-
if devs, ok := lowerGroups[-1]; ok {
218-
merged = append(merged, devs...)
219-
}
220-
group = merged
221-
}
222-
223-
// Deduplicate final groups
224-
key := strings.Join(group, ",")
225-
if _, ok := deviceAffinityGroup[AffinityPriority(priority)]; !ok {
226-
deviceAffinityGroup[AffinityPriority(priority)] = []DeviceIDs{}
227-
}
228-
alreadyExists := false
229-
for _, g := range deviceAffinityGroup[AffinityPriority(priority)] {
230-
if strings.Join(g, ",") == key {
231-
alreadyExists = true
232-
break
233-
}
234-
}
235-
if !alreadyExists {
236-
deviceAffinityGroup[AffinityPriority(priority)] = append(deviceAffinityGroup[AffinityPriority(priority)], group)
237-
}
238171
}
239172
}
240-
241173
return deviceAffinityGroup
242174
}
243175

176+
func containsGroup(groups []DeviceIDs, candidate DeviceIDs) bool {
177+
for _, g := range groups {
178+
if slices.Equal(g, candidate) {
179+
return true
180+
}
181+
}
182+
return false
183+
}
184+
244185
func (t *DeviceTopology) GetDeviceAffinityMap(deviceId string) (map[AffinityPriority]DeviceIDs, error) {
245186
info, ok := t.Devices[deviceId]
246187
if !ok {

0 commit comments

Comments
 (0)