fix: remove need for numa information for rdma

JustinChengLZ · JustinChengLZ · commit b9170529821c · 2026-04-02T17:06:50.000+08:00
diff --git a/pkg/agent/qrm-plugins/gpu/baseplugin/reporter/reporter.go b/pkg/agent/qrm-plugins/gpu/baseplugin/reporter/reporter.go
@@ -310,8 +310,8 @@ func (p *gpuReporterPlugin) getResourcePropertyReportField(latestDeviceTopology
 
 // getGPUResourceProperty returns the different dimensions to differentiate affinity priority of gpu devices.
 func (p *gpuReporterPlugin) getGPUResourceProperty(deviceTopology *machine.DeviceTopology) []*nodev1alpha1.Property {
-	if deviceTopology == nil || deviceTopology.PriorityDimensions == nil {
-		return nil
+	if deviceTopology == nil || len(deviceTopology.PriorityDimensions) == 0 {
+		return []*nodev1alpha1.Property{}
 	}
 
 	return []*nodev1alpha1.Property{
diff --git a/pkg/agent/qrm-plugins/gpu/customdeviceplugin/gpu/gpu.go b/pkg/agent/qrm-plugins/gpu/customdeviceplugin/gpu/gpu.go
@@ -154,6 +154,7 @@ func (p *GPUDevicePlugin) AllocateAssociatedDevice(
 			p.MetaServer,
 			p.GetState().GetMachineState(),
 			qosLevel,
+			"",
 		)
 		if err != nil {
 			return nil, fmt.Errorf("GPU allocation using strategy failed: %v", err)
diff --git a/pkg/agent/qrm-plugins/gpu/customdeviceplugin/rdma/rdma.go b/pkg/agent/qrm-plugins/gpu/customdeviceplugin/rdma/rdma.go
@@ -30,7 +30,6 @@ import (
 	gpuconsts "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/consts"
 	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/customdeviceplugin"
 	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/state"
-	gpuutil "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/util"
 	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/util"
 	"github.com/kubewharf/katalyst-core/pkg/util/general"
 	"github.com/kubewharf/katalyst-core/pkg/util/machine"
@@ -113,13 +112,10 @@ func (p *RDMADevicePlugin) AllocateAssociatedDevice(
 		}, nil
 	}
 
-	rdmaTopology, numaTopologyReady, err := p.DeviceTopologyRegistry.GetDeviceTopology(gpuconsts.RDMADeviceType)
+	rdmaTopology, _, err := p.DeviceTopologyRegistry.GetDeviceTopology(gpuconsts.RDMADeviceType)
 	if err != nil {
 		return nil, fmt.Errorf("failed to get gpu device topology: %v", err)
 	}
-	if !numaTopologyReady {
-		return nil, fmt.Errorf("gpu device topology is not ready")
-	}
 
 	hintNodes, err := machine.NewCPUSetUint64(deviceReq.GetHint().GetNodes()...)
 	if err != nil {
@@ -131,7 +127,7 @@ func (p *RDMADevicePlugin) AllocateAssociatedDevice(
 
 	// No accompany resource name
 	if accompanyResourceName == "" {
-		allocatedRdmaDevices, err = p.allocateWithNoAccompanyResource(deviceReq, rdmaTopology, hintNodes)
+		allocatedRdmaDevices, err = p.allocateWithNoAccompanyResource(deviceReq)
 		if err != nil {
 			return nil, fmt.Errorf("failed to allocate with no accompany resource: %v", err)
 		}
@@ -159,7 +155,7 @@ func (p *RDMADevicePlugin) AllocateAssociatedDevice(
 	allocationInfo := &state.AllocationInfo{
 		AllocationMeta: commonstate.GenerateGenericContainerAllocationMeta(resReq, commonstate.EmptyOwnerPoolName, qosLevel),
 		AllocatedAllocation: state.Allocation{
-			Quantity:  1,
+			Quantity:  float64(len(allocatedRdmaDevices)),
 			NUMANodes: hintNodes.ToSliceInt(),
 		},
 	}
@@ -189,9 +185,7 @@ func (p *RDMADevicePlugin) AllocateAssociatedDevice(
 
 // allocateWithNoAccompanyResource allocates the rdma devices by best effort basis on the by making sure that
 // it fits the hint nodes.
-func (p *RDMADevicePlugin) allocateWithNoAccompanyResource(
-	deviceReq *pluginapi.DeviceRequest, rdmaTopology *machine.DeviceTopology, hintNodes machine.CPUSet,
-) ([]string, error) {
+func (p *RDMADevicePlugin) allocateWithNoAccompanyResource(deviceReq *pluginapi.DeviceRequest) ([]string, error) {
 	reqQuantity := deviceReq.GetDeviceRequest()
 
 	machineState, ok := p.GetState().GetMachineState()[gpuconsts.RDMADeviceType]
@@ -220,10 +214,6 @@ func (p *RDMADevicePlugin) allocateWithNoAccompanyResource(
 	}
 
 	for _, device := range availableDevices {
-		if !gpuutil.IsNUMAAffinityDevice(device, rdmaTopology, hintNodes) {
-			continue
-		}
-
 		if !machineState.IsRequestSatisfied(device, 1, 1) {
 			general.Infof("available numa affinity rdma %s is already allocated", device)
 			continue
@@ -242,8 +232,6 @@ func (p *RDMADevicePlugin) allocateWithNoAccompanyResource(
 func (p *RDMADevicePlugin) allocateWithAccompanyResource(
 	deviceReq *pluginapi.DeviceRequest, resReq *pluginapi.ResourceRequest, accompanyResourceName string,
 ) ([]string, error) {
-	var err error
-
 	// Find out the accompany devices that are allocated to the container and allocate RDMA devices that correspond to the numa nodes of accompany device
 	accompanyDeviceType := p.ResolveResourceName(accompanyResourceName, false)
 	if accompanyDeviceType == "" {
@@ -258,17 +246,9 @@ func (p *RDMADevicePlugin) allocateWithAccompanyResource(
 
 	// Allocate target device according to ratio of accompany resource to target device
 	podResourceEntries := p.GetState().GetPodResourceEntries()
-	totalAllocated, accompanyResourceIds := podResourceEntries.GetTotalAllocatedResourceOfContainer(v1.ResourceName(accompanyDeviceType), resReq.PodUid, resReq.ContainerName)
-
-	rdmaToBeAllocated := int(math.Ceil(float64(totalAllocated) * accompanyResourceToTargetDeviceRatio))
+	totalAllocated := podResourceEntries.GetTotalAllocatedResourceOfContainer(v1.ResourceName(accompanyDeviceType), resReq.PodUid, resReq.ContainerName)
 
-	// For every gpu that is allocated to the container, find out the rdma devices that have affinity to the same
-	// numa nodes as the gpu and allocate them
-	accompanyResourceToRdmaAffinityMap, err := p.DeviceTopologyRegistry.GetDeviceNUMAAffinity(accompanyDeviceType, gpuconsts.RDMADeviceType)
-	if err != nil {
-		general.Warningf("failed to get gpu to rdma affinity map: %v", err)
-		return nil, err
-	}
+	rdmaToBeAllocated := int(math.Ceil(float64(totalAllocated) / accompanyResourceToTargetDeviceRatio))
 
 	machineState := p.GetState().GetMachineState()[v1.ResourceName(gpuconsts.RDMADeviceType)]
 
@@ -285,22 +265,15 @@ func (p *RDMADevicePlugin) allocateWithAccompanyResource(
 		return false
 	}
 
-	for accompanyResourceId := range accompanyResourceIds {
-		rdmaDevices, ok := accompanyResourceToRdmaAffinityMap[accompanyResourceId]
-		if !ok {
-			general.Warningf("failed to get rdma device with accompany device id: %s", accompanyResourceId)
+	// Allocate the rest of the available rdma devices in best-effort manner
+	for _, deviceId := range deviceReq.AvailableDevices {
+		// Skip rdma devices that are already allocated to other containers
+		if !machineState.IsRequestSatisfied(deviceId, 1, 1) {
 			continue
 		}
 
-		// Iterate through the rdma devices and check if they are already allocated
-		for _, rdmaDevice := range rdmaDevices {
-			if !machineState.IsRequestSatisfied(rdmaDevice, 1, 1) {
-				continue
-			}
-
-			if allocateDevices(rdmaDevice) {
-				return allocatedDevices.UnsortedList(), nil
-			}
+		if allocateDevices(deviceId) {
+			return allocatedDevices.UnsortedList(), nil
 		}
 	}
 
diff --git a/pkg/agent/qrm-plugins/gpu/customdeviceplugin/rdma/rdma_test.go b/pkg/agent/qrm-plugins/gpu/customdeviceplugin/rdma/rdma_test.go
@@ -194,6 +194,9 @@ func TestRDMADevicePlugin_AllocateAssociatedDevices(t *testing.T) {
 		machineState                    state.AllocationResourcesMap
 		expectedErr                     bool
 		expectedResp                    *pluginapi.AssociatedDeviceAllocationResponse
+		// isRandomAllocation is true if there is more than one possible allocation
+		isRandomAllocation   bool
+		expectedNumAllocated int
 	}{
 		{
 			name: "Allocation already exists",
@@ -304,11 +307,8 @@ func TestRDMADevicePlugin_AllocateAssociatedDevices(t *testing.T) {
 					Nodes: []uint64{0},
 				},
 			},
-			expectedResp: &pluginapi.AssociatedDeviceAllocationResponse{
-				AllocationResult: &pluginapi.AssociatedDeviceAllocation{
-					AllocatedDevices: []string{"test-rdma-0", "test-rdma-2"},
-				},
-			},
+			isRandomAllocation:   true,
+			expectedNumAllocated: 2,
 		},
 		{
 			name:          "No accompany resource allocates by best effort, no reusable devices, skip devices that are already allocated",
@@ -509,11 +509,162 @@ func TestRDMADevicePlugin_AllocateAssociatedDevices(t *testing.T) {
 					Nodes: []uint64{0, 1},
 				},
 			},
-			expectedResp: &pluginapi.AssociatedDeviceAllocationResponse{
-				AllocationResult: &pluginapi.AssociatedDeviceAllocation{
-					AllocatedDevices: []string{"test-rdma-0", "test-rdma-2"},
+			isRandomAllocation:   true,
+			expectedNumAllocated: 2,
+		},
+		{
+			name:          "Accompany resource allocated, no NUMA info for rdma, still able to allocate",
+			podUID:        "test-pod",
+			containerName: "containerName",
+			deviceTopology: &machine.DeviceTopology{
+				Devices: map[string]machine.DeviceInfo{
+					"test-rdma-0": {},
+					"test-rdma-1": {},
+					"test-rdma-2": {},
+					"test-rdma-3": {},
+				},
+			},
+			// Ratio of 1 rdma device per 2 gpu devices
+			accompanyDeviceTopology: &machine.DeviceTopology{
+				Devices: map[string]machine.DeviceInfo{
+					"test-gpu-0": {
+						NumaNodes: []int{0},
+					},
+					"test-gpu-1": {
+						NumaNodes: []int{1},
+					},
+					"test-gpu-2": {
+						NumaNodes: []int{0},
+					},
+					"test-gpu-3": {
+						NumaNodes: []int{1},
+					},
+					"test-gpu-4": {
+						NumaNodes: []int{0},
+					},
+					"test-gpu-5": {
+						NumaNodes: []int{1},
+					},
+					"test-gpu-6": {
+						NumaNodes: []int{0},
+					},
+					"test-gpu-7": {
+						NumaNodes: []int{1},
+					},
+				},
+			},
+			accompanyResourceName: "test-gpu",
+			accompanyResourceAllocationInfo: &state.AllocationInfo{
+				AllocatedAllocation: state.Allocation{
+					Quantity: 6,
+				},
+				TopologyAwareAllocations: map[string]state.Allocation{
+					"test-gpu-0": {
+						Quantity: 1,
+					},
+					"test-gpu-1": {
+						Quantity: 1,
+					},
+					"test-gpu-2": {
+						Quantity: 1,
+					},
+					"test-gpu-4": {
+						Quantity: 1,
+					},
+					"test-gpu-5": {
+						Quantity: 1,
+					},
+					"test-gpu-6": {
+						Quantity: 1,
+					},
+				},
+			},
+			machineState: state.AllocationResourcesMap{
+				gpuconsts.RDMADeviceType: {
+					"test-rdma-0": {},
+					"test-rdma-1": {},
+					"test-rdma-2": {},
+					"test-rdma-3": {},
+				},
+				gpuconsts.GPUDeviceType: {
+					"test-gpu-0": {
+						PodEntries: map[string]state.ContainerEntries{
+							"test-pod": {
+								"test-container": {
+									AllocatedAllocation: state.Allocation{
+										Quantity: 1,
+									},
+								},
+							},
+						},
+					},
+					"test-gpu-1": {
+						PodEntries: map[string]state.ContainerEntries{
+							"test-pod": {
+								"test-container": {
+									AllocatedAllocation: state.Allocation{
+										Quantity: 1,
+									},
+								},
+							},
+						},
+					},
+					"test-gpu-2": {
+						PodEntries: map[string]state.ContainerEntries{
+							"test-pod": {
+								"test-container": {
+									AllocatedAllocation: state.Allocation{
+										Quantity: 1,
+									},
+								},
+							},
+						},
+					},
+					"test-gpu-3": {},
+					"test-gpu-4": {
+						PodEntries: map[string]state.ContainerEntries{
+							"test-pod": {
+								"test-container": {
+									AllocatedAllocation: state.Allocation{
+										Quantity: 1,
+									},
+								},
+							},
+						},
+					},
+					"test-gpu-5": {
+						PodEntries: map[string]state.ContainerEntries{
+							"test-pod": {
+								"test-container": {
+									AllocatedAllocation: state.Allocation{
+										Quantity: 1,
+									},
+								},
+							},
+						},
+					},
+					"test-gpu-6": {
+						PodEntries: map[string]state.ContainerEntries{
+							"test-pod": {
+								"test-container": {
+									AllocatedAllocation: state.Allocation{
+										Quantity: 1,
+									},
+								},
+							},
+						},
+					},
+					"test-gpu-7": {},
 				},
 			},
+			deviceReq: &pluginapi.DeviceRequest{
+				DeviceName:       "test-rdma",
+				ReusableDevices:  nil,
+				AvailableDevices: []string{"test-rdma-0", "test-rdma-1", "test-rdma-2", "test-rdma-3"},
+				DeviceRequest:    0,
+			},
+			isRandomAllocation:   true,
+			expectedNumAllocated: 3,
 		},
 	}
 
@@ -561,11 +712,16 @@ func TestRDMADevicePlugin_AllocateAssociatedDevices(t *testing.T) {
 				assert.Error(t, err)
 			} else {
 				assert.NoError(t, err)
-				evaluateAllocatedDevicesResult(t, tt.expectedResp, resp)
+				if tt.isRandomAllocation {
+					assert.Equal(t, tt.expectedNumAllocated, len(resp.AllocationResult.AllocatedDevices))
+				} else {
+					evaluateAllocatedDevicesResult(t, tt.expectedResp, resp)
+				}
 
 				// Verify state is updated
 				allocationInfo := basePlugin.GetState().GetAllocationInfo(gpuconsts.RDMADeviceType, tt.podUID, tt.containerName)
 				assert.NotNil(t, allocationInfo)
+				assert.Equal(t, float64(len(resp.AllocationResult.AllocatedDevices)), allocationInfo.AllocatedAllocation.Quantity)
 			}
 		})
 	}
diff --git a/pkg/agent/qrm-plugins/gpu/resourceplugin/gpumemory/gpu_mem.go b/pkg/agent/qrm-plugins/gpu/resourceplugin/gpumemory/gpu_mem.go
@@ -533,6 +533,7 @@ func (p *GPUMemPlugin) Allocate(
 		p.MetaServer,
 		p.GetState().GetMachineState(),
 		qosLevel,
+		"",
 	)
 	if err != nil {
 		return nil, fmt.Errorf("GPU allocation using strategy failed: %v", err)
diff --git a/pkg/agent/qrm-plugins/gpu/state/state.go b/pkg/agent/qrm-plugins/gpu/state/state.go
@@ -21,7 +21,6 @@ import (
 	"sync"
 
 	v1 "k8s.io/api/core/v1"
-	"k8s.io/apimachinery/pkg/util/sets"
 	"k8s.io/klog/v2"
 
 	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/commonstate"
@@ -190,22 +189,17 @@ func (pre PodResourceEntries) RemovePod(podUID string) {
 	}
 }
 
-// GetTotalAllocatedResourceOfContainer returns the total allocated resource quantity of a container together with
-// the specific resource IDs that are allocated.
+// GetTotalAllocatedResourceOfContainer returns the total allocated resource quantity of a container.
 func (pre PodResourceEntries) GetTotalAllocatedResourceOfContainer(
 	resourceName v1.ResourceName, podUID, containerName string,
-) (int, sets.String) {
+) int {
 	if podEntries, ok := pre[resourceName]; ok {
 		if allocationInfo := podEntries.GetAllocationInfo(podUID, containerName); allocationInfo != nil {
 			totalAllocationQuantity := int(allocationInfo.AllocatedAllocation.Quantity)
-			allocationIDs := sets.NewString()
-			for id := range allocationInfo.TopologyAwareAllocations {
-				allocationIDs.Insert(id)
-			}
-			return totalAllocationQuantity, allocationIDs
+			return totalAllocationQuantity
 		}
 	}
-	return 0, nil
+	return 0
 }
 
 func (as *AllocationState) String() string {
diff --git a/pkg/agent/qrm-plugins/gpu/state/state_test.go b/pkg/agent/qrm-plugins/gpu/state/state_test.go
diff --git a/pkg/agent/qrm-plugins/gpu/strategy/allocate/manager/helper.go b/pkg/agent/qrm-plugins/gpu/strategy/allocate/manager/helper.go
diff --git a/pkg/agent/qrm-plugins/gpu/strategy/allocate/types.go b/pkg/agent/qrm-plugins/gpu/strategy/allocate/types.go

Original file line number	Diff line number	Diff line change
`@@ -310,8 +310,8 @@ func (p *gpuReporterPlugin) getResourcePropertyReportField(latestDeviceTopology`
`310`	`310`
`311`	`311`	`// getGPUResourceProperty returns the different dimensions to differentiate affinity priority of gpu devices.`
`312`	`312`	`func (p gpuReporterPlugin) getGPUResourceProperty(deviceTopology machine.DeviceTopology) []*nodev1alpha1.Property {`
`313`		`- if deviceTopology == nil \|\| deviceTopology.PriorityDimensions == nil {`
`314`		`- return nil`
	`313`	`+ if deviceTopology == nil \|\| len(deviceTopology.PriorityDimensions) == 0 {`
	`314`	`+ return []*nodev1alpha1.Property{}`
`315`	`315`	`}`
`316`	`316`
`317`	`317`	`return []*nodev1alpha1.Property{`
Original file line number	Diff line number	Diff line change
`@@ -154,6 +154,7 @@ func (p *GPUDevicePlugin) AllocateAssociatedDevice(`
`154`	`154`	`p.MetaServer,`
`155`	`155`	`p.GetState().GetMachineState(),`
`156`	`156`	`qosLevel,`
	`157`	`+ "",`
`157`	`158`	`)`
`158`	`159`	`if err != nil {`
`159`	`160`	`return nil, fmt.Errorf("GPU allocation using strategy failed: %v", err)`
Original file line number	Diff line number	Diff line change
`@@ -533,6 +533,7 @@ func (p *GPUMemPlugin) Allocate(`
`533`	`533`	`p.MetaServer,`
`534`	`534`	`p.GetState().GetMachineState(),`
`535`	`535`	`qosLevel,`
	`536`	`+ "",`
`536`	`537`	`)`
`537`	`538`	`if err != nil {`
`538`	`539`	`return nil, fmt.Errorf("GPU allocation using strategy failed: %v", err)`