Skip to content

Commit b917052

Browse files
committed
fix: remove need for numa information for rdma
1 parent efe914c commit b917052

9 files changed

Lines changed: 207 additions & 86 deletions

File tree

pkg/agent/qrm-plugins/gpu/baseplugin/reporter/reporter.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -310,8 +310,8 @@ func (p *gpuReporterPlugin) getResourcePropertyReportField(latestDeviceTopology
310310

311311
// getGPUResourceProperty returns the different dimensions to differentiate affinity priority of gpu devices.
312312
func (p *gpuReporterPlugin) getGPUResourceProperty(deviceTopology *machine.DeviceTopology) []*nodev1alpha1.Property {
313-
if deviceTopology == nil || deviceTopology.PriorityDimensions == nil {
314-
return nil
313+
if deviceTopology == nil || len(deviceTopology.PriorityDimensions) == 0 {
314+
return []*nodev1alpha1.Property{}
315315
}
316316

317317
return []*nodev1alpha1.Property{

pkg/agent/qrm-plugins/gpu/customdeviceplugin/gpu/gpu.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,7 @@ func (p *GPUDevicePlugin) AllocateAssociatedDevice(
154154
p.MetaServer,
155155
p.GetState().GetMachineState(),
156156
qosLevel,
157+
"",
157158
)
158159
if err != nil {
159160
return nil, fmt.Errorf("GPU allocation using strategy failed: %v", err)

pkg/agent/qrm-plugins/gpu/customdeviceplugin/rdma/rdma.go

Lines changed: 12 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@ import (
3030
gpuconsts "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/consts"
3131
"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/customdeviceplugin"
3232
"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/state"
33-
gpuutil "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/util"
3433
"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/util"
3534
"github.com/kubewharf/katalyst-core/pkg/util/general"
3635
"github.com/kubewharf/katalyst-core/pkg/util/machine"
@@ -113,13 +112,10 @@ func (p *RDMADevicePlugin) AllocateAssociatedDevice(
113112
}, nil
114113
}
115114

116-
rdmaTopology, numaTopologyReady, err := p.DeviceTopologyRegistry.GetDeviceTopology(gpuconsts.RDMADeviceType)
115+
rdmaTopology, _, err := p.DeviceTopologyRegistry.GetDeviceTopology(gpuconsts.RDMADeviceType)
117116
if err != nil {
118117
return nil, fmt.Errorf("failed to get gpu device topology: %v", err)
119118
}
120-
if !numaTopologyReady {
121-
return nil, fmt.Errorf("gpu device topology is not ready")
122-
}
123119

124120
hintNodes, err := machine.NewCPUSetUint64(deviceReq.GetHint().GetNodes()...)
125121
if err != nil {
@@ -131,7 +127,7 @@ func (p *RDMADevicePlugin) AllocateAssociatedDevice(
131127

132128
// No accompany resource name
133129
if accompanyResourceName == "" {
134-
allocatedRdmaDevices, err = p.allocateWithNoAccompanyResource(deviceReq, rdmaTopology, hintNodes)
130+
allocatedRdmaDevices, err = p.allocateWithNoAccompanyResource(deviceReq)
135131
if err != nil {
136132
return nil, fmt.Errorf("failed to allocate with no accompany resource: %v", err)
137133
}
@@ -159,7 +155,7 @@ func (p *RDMADevicePlugin) AllocateAssociatedDevice(
159155
allocationInfo := &state.AllocationInfo{
160156
AllocationMeta: commonstate.GenerateGenericContainerAllocationMeta(resReq, commonstate.EmptyOwnerPoolName, qosLevel),
161157
AllocatedAllocation: state.Allocation{
162-
Quantity: 1,
158+
Quantity: float64(len(allocatedRdmaDevices)),
163159
NUMANodes: hintNodes.ToSliceInt(),
164160
},
165161
}
@@ -189,9 +185,7 @@ func (p *RDMADevicePlugin) AllocateAssociatedDevice(
189185

190186
// allocateWithNoAccompanyResource allocates the rdma devices by best effort basis on the by making sure that
191187
// it fits the hint nodes.
192-
func (p *RDMADevicePlugin) allocateWithNoAccompanyResource(
193-
deviceReq *pluginapi.DeviceRequest, rdmaTopology *machine.DeviceTopology, hintNodes machine.CPUSet,
194-
) ([]string, error) {
188+
func (p *RDMADevicePlugin) allocateWithNoAccompanyResource(deviceReq *pluginapi.DeviceRequest) ([]string, error) {
195189
reqQuantity := deviceReq.GetDeviceRequest()
196190

197191
machineState, ok := p.GetState().GetMachineState()[gpuconsts.RDMADeviceType]
@@ -220,10 +214,6 @@ func (p *RDMADevicePlugin) allocateWithNoAccompanyResource(
220214
}
221215

222216
for _, device := range availableDevices {
223-
if !gpuutil.IsNUMAAffinityDevice(device, rdmaTopology, hintNodes) {
224-
continue
225-
}
226-
227217
if !machineState.IsRequestSatisfied(device, 1, 1) {
228218
general.Infof("available numa affinity rdma %s is already allocated", device)
229219
continue
@@ -242,8 +232,6 @@ func (p *RDMADevicePlugin) allocateWithNoAccompanyResource(
242232
func (p *RDMADevicePlugin) allocateWithAccompanyResource(
243233
deviceReq *pluginapi.DeviceRequest, resReq *pluginapi.ResourceRequest, accompanyResourceName string,
244234
) ([]string, error) {
245-
var err error
246-
247235
// Find out the accompany devices that are allocated to the container and allocate RDMA devices that correspond to the numa nodes of accompany device
248236
accompanyDeviceType := p.ResolveResourceName(accompanyResourceName, false)
249237
if accompanyDeviceType == "" {
@@ -258,17 +246,9 @@ func (p *RDMADevicePlugin) allocateWithAccompanyResource(
258246

259247
// Allocate target device according to ratio of accompany resource to target device
260248
podResourceEntries := p.GetState().GetPodResourceEntries()
261-
totalAllocated, accompanyResourceIds := podResourceEntries.GetTotalAllocatedResourceOfContainer(v1.ResourceName(accompanyDeviceType), resReq.PodUid, resReq.ContainerName)
262-
263-
rdmaToBeAllocated := int(math.Ceil(float64(totalAllocated) * accompanyResourceToTargetDeviceRatio))
249+
totalAllocated := podResourceEntries.GetTotalAllocatedResourceOfContainer(v1.ResourceName(accompanyDeviceType), resReq.PodUid, resReq.ContainerName)
264250

265-
// For every gpu that is allocated to the container, find out the rdma devices that have affinity to the same
266-
// numa nodes as the gpu and allocate them
267-
accompanyResourceToRdmaAffinityMap, err := p.DeviceTopologyRegistry.GetDeviceNUMAAffinity(accompanyDeviceType, gpuconsts.RDMADeviceType)
268-
if err != nil {
269-
general.Warningf("failed to get gpu to rdma affinity map: %v", err)
270-
return nil, err
271-
}
251+
rdmaToBeAllocated := int(math.Ceil(float64(totalAllocated) / accompanyResourceToTargetDeviceRatio))
272252

273253
machineState := p.GetState().GetMachineState()[v1.ResourceName(gpuconsts.RDMADeviceType)]
274254

@@ -285,22 +265,15 @@ func (p *RDMADevicePlugin) allocateWithAccompanyResource(
285265
return false
286266
}
287267

288-
for accompanyResourceId := range accompanyResourceIds {
289-
rdmaDevices, ok := accompanyResourceToRdmaAffinityMap[accompanyResourceId]
290-
if !ok {
291-
general.Warningf("failed to get rdma device with accompany device id: %s", accompanyResourceId)
268+
// Allocate the rest of the available rdma devices in best-effort manner
269+
for _, deviceId := range deviceReq.AvailableDevices {
270+
// Skip rdma devices that are already allocated to other containers
271+
if !machineState.IsRequestSatisfied(deviceId, 1, 1) {
292272
continue
293273
}
294274

295-
// Iterate through the rdma devices and check if they are already allocated
296-
for _, rdmaDevice := range rdmaDevices {
297-
if !machineState.IsRequestSatisfied(rdmaDevice, 1, 1) {
298-
continue
299-
}
300-
301-
if allocateDevices(rdmaDevice) {
302-
return allocatedDevices.UnsortedList(), nil
303-
}
275+
if allocateDevices(deviceId) {
276+
return allocatedDevices.UnsortedList(), nil
304277
}
305278
}
306279

pkg/agent/qrm-plugins/gpu/customdeviceplugin/rdma/rdma_test.go

Lines changed: 165 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,9 @@ func TestRDMADevicePlugin_AllocateAssociatedDevices(t *testing.T) {
194194
machineState state.AllocationResourcesMap
195195
expectedErr bool
196196
expectedResp *pluginapi.AssociatedDeviceAllocationResponse
197+
// isRandomAllocation is true if there is more than one possible allocation
198+
isRandomAllocation bool
199+
expectedNumAllocated int
197200
}{
198201
{
199202
name: "Allocation already exists",
@@ -304,11 +307,8 @@ func TestRDMADevicePlugin_AllocateAssociatedDevices(t *testing.T) {
304307
Nodes: []uint64{0},
305308
},
306309
},
307-
expectedResp: &pluginapi.AssociatedDeviceAllocationResponse{
308-
AllocationResult: &pluginapi.AssociatedDeviceAllocation{
309-
AllocatedDevices: []string{"test-rdma-0", "test-rdma-2"},
310-
},
311-
},
310+
isRandomAllocation: true,
311+
expectedNumAllocated: 2,
312312
},
313313
{
314314
name: "No accompany resource allocates by best effort, no reusable devices, skip devices that are already allocated",
@@ -509,11 +509,162 @@ func TestRDMADevicePlugin_AllocateAssociatedDevices(t *testing.T) {
509509
Nodes: []uint64{0, 1},
510510
},
511511
},
512-
expectedResp: &pluginapi.AssociatedDeviceAllocationResponse{
513-
AllocationResult: &pluginapi.AssociatedDeviceAllocation{
514-
AllocatedDevices: []string{"test-rdma-0", "test-rdma-2"},
512+
isRandomAllocation: true,
513+
expectedNumAllocated: 2,
514+
},
515+
{
516+
name: "Accompany resource allocated, no NUMA info for rdma, still able to allocate",
517+
podUID: "test-pod",
518+
containerName: "containerName",
519+
deviceTopology: &machine.DeviceTopology{
520+
Devices: map[string]machine.DeviceInfo{
521+
"test-rdma-0": {},
522+
"test-rdma-1": {},
523+
"test-rdma-2": {},
524+
"test-rdma-3": {},
525+
},
526+
},
527+
// Ratio of 1 rdma device per 2 gpu devices
528+
accompanyDeviceTopology: &machine.DeviceTopology{
529+
Devices: map[string]machine.DeviceInfo{
530+
"test-gpu-0": {
531+
NumaNodes: []int{0},
532+
},
533+
"test-gpu-1": {
534+
NumaNodes: []int{1},
535+
},
536+
"test-gpu-2": {
537+
NumaNodes: []int{0},
538+
},
539+
"test-gpu-3": {
540+
NumaNodes: []int{1},
541+
},
542+
"test-gpu-4": {
543+
NumaNodes: []int{0},
544+
},
545+
"test-gpu-5": {
546+
NumaNodes: []int{1},
547+
},
548+
"test-gpu-6": {
549+
NumaNodes: []int{0},
550+
},
551+
"test-gpu-7": {
552+
NumaNodes: []int{1},
553+
},
554+
},
555+
},
556+
accompanyResourceName: "test-gpu",
557+
accompanyResourceAllocationInfo: &state.AllocationInfo{
558+
AllocatedAllocation: state.Allocation{
559+
Quantity: 6,
560+
},
561+
TopologyAwareAllocations: map[string]state.Allocation{
562+
"test-gpu-0": {
563+
Quantity: 1,
564+
},
565+
"test-gpu-1": {
566+
Quantity: 1,
567+
},
568+
"test-gpu-2": {
569+
Quantity: 1,
570+
},
571+
"test-gpu-4": {
572+
Quantity: 1,
573+
},
574+
"test-gpu-5": {
575+
Quantity: 1,
576+
},
577+
"test-gpu-6": {
578+
Quantity: 1,
579+
},
580+
},
581+
},
582+
machineState: state.AllocationResourcesMap{
583+
gpuconsts.RDMADeviceType: {
584+
"test-rdma-0": {},
585+
"test-rdma-1": {},
586+
"test-rdma-2": {},
587+
"test-rdma-3": {},
588+
},
589+
gpuconsts.GPUDeviceType: {
590+
"test-gpu-0": {
591+
PodEntries: map[string]state.ContainerEntries{
592+
"test-pod": {
593+
"test-container": {
594+
AllocatedAllocation: state.Allocation{
595+
Quantity: 1,
596+
},
597+
},
598+
},
599+
},
600+
},
601+
"test-gpu-1": {
602+
PodEntries: map[string]state.ContainerEntries{
603+
"test-pod": {
604+
"test-container": {
605+
AllocatedAllocation: state.Allocation{
606+
Quantity: 1,
607+
},
608+
},
609+
},
610+
},
611+
},
612+
"test-gpu-2": {
613+
PodEntries: map[string]state.ContainerEntries{
614+
"test-pod": {
615+
"test-container": {
616+
AllocatedAllocation: state.Allocation{
617+
Quantity: 1,
618+
},
619+
},
620+
},
621+
},
622+
},
623+
"test-gpu-3": {},
624+
"test-gpu-4": {
625+
PodEntries: map[string]state.ContainerEntries{
626+
"test-pod": {
627+
"test-container": {
628+
AllocatedAllocation: state.Allocation{
629+
Quantity: 1,
630+
},
631+
},
632+
},
633+
},
634+
},
635+
"test-gpu-5": {
636+
PodEntries: map[string]state.ContainerEntries{
637+
"test-pod": {
638+
"test-container": {
639+
AllocatedAllocation: state.Allocation{
640+
Quantity: 1,
641+
},
642+
},
643+
},
644+
},
645+
},
646+
"test-gpu-6": {
647+
PodEntries: map[string]state.ContainerEntries{
648+
"test-pod": {
649+
"test-container": {
650+
AllocatedAllocation: state.Allocation{
651+
Quantity: 1,
652+
},
653+
},
654+
},
655+
},
656+
},
657+
"test-gpu-7": {},
515658
},
516659
},
660+
deviceReq: &pluginapi.DeviceRequest{
661+
DeviceName: "test-rdma",
662+
ReusableDevices: nil,
663+
AvailableDevices: []string{"test-rdma-0", "test-rdma-1", "test-rdma-2", "test-rdma-3"},
664+
DeviceRequest: 0,
665+
},
666+
isRandomAllocation: true,
667+
expectedNumAllocated: 3,
517668
},
518669
}
519670

@@ -561,11 +712,16 @@ func TestRDMADevicePlugin_AllocateAssociatedDevices(t *testing.T) {
561712
assert.Error(t, err)
562713
} else {
563714
assert.NoError(t, err)
564-
evaluateAllocatedDevicesResult(t, tt.expectedResp, resp)
715+
if tt.isRandomAllocation {
716+
assert.Equal(t, tt.expectedNumAllocated, len(resp.AllocationResult.AllocatedDevices))
717+
} else {
718+
evaluateAllocatedDevicesResult(t, tt.expectedResp, resp)
719+
}
565720

566721
// Verify state is updated
567722
allocationInfo := basePlugin.GetState().GetAllocationInfo(gpuconsts.RDMADeviceType, tt.podUID, tt.containerName)
568723
assert.NotNil(t, allocationInfo)
724+
assert.Equal(t, float64(len(resp.AllocationResult.AllocatedDevices)), allocationInfo.AllocatedAllocation.Quantity)
569725
}
570726
})
571727
}

pkg/agent/qrm-plugins/gpu/resourceplugin/gpumemory/gpu_mem.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -533,6 +533,7 @@ func (p *GPUMemPlugin) Allocate(
533533
p.MetaServer,
534534
p.GetState().GetMachineState(),
535535
qosLevel,
536+
"",
536537
)
537538
if err != nil {
538539
return nil, fmt.Errorf("GPU allocation using strategy failed: %v", err)

pkg/agent/qrm-plugins/gpu/state/state.go

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ import (
2121
"sync"
2222

2323
v1 "k8s.io/api/core/v1"
24-
"k8s.io/apimachinery/pkg/util/sets"
2524
"k8s.io/klog/v2"
2625

2726
"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/commonstate"
@@ -190,22 +189,17 @@ func (pre PodResourceEntries) RemovePod(podUID string) {
190189
}
191190
}
192191

193-
// GetTotalAllocatedResourceOfContainer returns the total allocated resource quantity of a container together with
194-
// the specific resource IDs that are allocated.
192+
// GetTotalAllocatedResourceOfContainer returns the total allocated resource quantity of a container.
195193
func (pre PodResourceEntries) GetTotalAllocatedResourceOfContainer(
196194
resourceName v1.ResourceName, podUID, containerName string,
197-
) (int, sets.String) {
195+
) int {
198196
if podEntries, ok := pre[resourceName]; ok {
199197
if allocationInfo := podEntries.GetAllocationInfo(podUID, containerName); allocationInfo != nil {
200198
totalAllocationQuantity := int(allocationInfo.AllocatedAllocation.Quantity)
201-
allocationIDs := sets.NewString()
202-
for id := range allocationInfo.TopologyAwareAllocations {
203-
allocationIDs.Insert(id)
204-
}
205-
return totalAllocationQuantity, allocationIDs
199+
return totalAllocationQuantity
206200
}
207201
}
208-
return 0, nil
202+
return 0
209203
}
210204

211205
func (as *AllocationState) String() string {

0 commit comments

Comments
 (0)