diff --git a/cmd/katalyst-agent/app/options/qrm/gpu_plugin.go b/cmd/katalyst-agent/app/options/qrm/gpu_plugin.go index 28b09fdec1..96e9efde59 100644 --- a/cmd/katalyst-agent/app/options/qrm/gpu_plugin.go +++ b/cmd/katalyst-agent/app/options/qrm/gpu_plugin.go @@ -30,6 +30,7 @@ type GPUOptions struct { GPUMemoryAllocatablePerGPU string SkipGPUStateCorruption bool RDMADeviceNames []string + RequiredDeviceAffinity bool GPUStrategyOptions *gpustrategy.GPUStrategyOptions } @@ -41,6 +42,7 @@ func NewGPUOptions() *GPUOptions { GPUMemoryAllocatablePerGPU: "100", RDMADeviceNames: []string{}, GPUStrategyOptions: gpustrategy.NewGPUStrategyOptions(), + RequiredDeviceAffinity: true, } } @@ -55,6 +57,8 @@ func (o *GPUOptions) AddFlags(fss *cliflag.NamedFlagSets) { fs.BoolVar(&o.SkipGPUStateCorruption, "skip-gpu-state-corruption", o.SkipGPUStateCorruption, "skip gpu state corruption, and it will be used after updating state properties") fs.StringSliceVar(&o.RDMADeviceNames, "rdma-resource-names", o.RDMADeviceNames, "The name of the RDMA resource") + fs.BoolVar(&o.RequiredDeviceAffinity, "gpu-required-device-affinity", o.RequiredDeviceAffinity, + "required device affinity, and when true it will cause pods to admit fail if unable to meet device affinity") o.GPUStrategyOptions.AddFlags(fss) } @@ -71,5 +75,6 @@ func (o *GPUOptions) ApplyTo(conf *qrmconfig.GPUQRMPluginConfig) error { if err := o.GPUStrategyOptions.ApplyTo(conf.GPUStrategyConfig); err != nil { return err } + conf.RequiredDeviceAffinity = o.RequiredDeviceAffinity return nil } diff --git a/pkg/agent/qrm-plugins/gpu/baseplugin/base.go b/pkg/agent/qrm-plugins/gpu/baseplugin/base.go index f4dad44c49..b55db10cdd 100644 --- a/pkg/agent/qrm-plugins/gpu/baseplugin/base.go +++ b/pkg/agent/qrm-plugins/gpu/baseplugin/base.go @@ -246,7 +246,7 @@ func (p *BasePlugin) UpdateAllocatableAssociatedDevices( deviceTopology.Devices[device.ID] = machine.DeviceInfo{ Health: device.Health, NumaNodes: numaNode, - DeviceAffinity: make(map[machine.AffinityPriority]machine.DeviceIDs), + DeviceAffinity: make(map[machine.Dimension]machine.DeviceIDs), } } diff --git a/pkg/agent/qrm-plugins/gpu/baseplugin/reporter/reporter_test.go b/pkg/agent/qrm-plugins/gpu/baseplugin/reporter/reporter_test.go index 93ae5deaec..3c7aea8baf 100644 --- a/pkg/agent/qrm-plugins/gpu/baseplugin/reporter/reporter_test.go +++ b/pkg/agent/qrm-plugins/gpu/baseplugin/reporter/reporter_test.go @@ -335,104 +335,80 @@ func TestGpuReporterPlugin_GetReportContent(t *testing.T) { "gpu-0": { Health: pluginapi.Healthy, NumaNodes: []int{0}, - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ { - PriorityLevel: 0, - Dimension: machine.Dimension{ - Name: "numa", - Value: "0", - }, + Name: "numa", + Value: "0", }: {"gpu-1", "gpu-2", "gpu-3"}, }, }, "gpu-1": { Health: pluginapi.Healthy, NumaNodes: []int{0}, - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ { - PriorityLevel: 0, - Dimension: machine.Dimension{ - Name: "numa", - Value: "0", - }, + Name: "numa", + Value: "0", }: {"gpu-0", "gpu-2", "gpu-3"}, }, }, "gpu-2": { Health: pluginapi.Healthy, NumaNodes: []int{0}, - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ { - PriorityLevel: 0, - Dimension: machine.Dimension{ - Name: "numa", - Value: "0", - }, + Name: "numa", + Value: "0", }: {"gpu-0", "gpu-1", "gpu-3"}, }, }, "gpu-3": { Health: pluginapi.Healthy, NumaNodes: []int{0}, - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ { - PriorityLevel: 0, - Dimension: machine.Dimension{ - Name: "numa", - Value: "0", - }, + Name: "numa", + Value: "0", }: {"gpu-0", "gpu-1", "gpu-2"}, }, }, "gpu-4": { Health: pluginapi.Healthy, NumaNodes: []int{1}, - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ { - PriorityLevel: 0, - Dimension: machine.Dimension{ - Name: "numa", - Value: "1", - }, + Name: "numa", + Value: "1", }: {"gpu-5", "gpu-6", "gpu-7"}, }, }, "gpu-5": { Health: pluginapi.Healthy, NumaNodes: []int{1}, - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ { - PriorityLevel: 0, - Dimension: machine.Dimension{ - Name: "numa", - Value: "1", - }, + Name: "numa", + Value: "1", }: {"gpu-4", "gpu-6", "gpu-7"}, }, }, "gpu-6": { Health: pluginapi.Healthy, NumaNodes: []int{1}, - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ { - PriorityLevel: 0, - Dimension: machine.Dimension{ - Name: "numa", - Value: "1", - }, + Name: "numa", + Value: "1", }: {"gpu-4", "gpu-5", "gpu-7"}, }, }, "gpu-7": { Health: pluginapi.Healthy, NumaNodes: []int{1}, - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ { - PriorityLevel: 0, - Dimension: machine.Dimension{ - Name: "numa", - Value: "1", - }, + Name: "numa", + Value: "1", }: {"gpu-4", "gpu-5", "gpu-6"}, }, }, @@ -650,160 +626,112 @@ func TestGpuReporterPlugin_GetReportContent(t *testing.T) { "gpu-0": { Health: pluginapi.Healthy, NumaNodes: []int{0}, - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ { - PriorityLevel: 0, - Dimension: machine.Dimension{ - Name: "pcie", - Value: "0", - }, + Name: "pcie", + Value: "0", }: {"gpu-1"}, { - PriorityLevel: 1, - Dimension: machine.Dimension{ - Name: "numa", - Value: "0", - }, + Name: "numa", + Value: "0", }: {"gpu-1", "gpu-2", "gpu-3"}, }, }, "gpu-1": { Health: pluginapi.Healthy, NumaNodes: []int{0}, - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ { - PriorityLevel: 0, - Dimension: machine.Dimension{ - Name: "pcie", - Value: "0", - }, + Name: "pcie", + Value: "0", }: {"gpu-0"}, { - PriorityLevel: 1, - Dimension: machine.Dimension{ - Name: "numa", - Value: "0", - }, + Name: "numa", + Value: "0", }: {"gpu-0", "gpu-2", "gpu-3"}, }, }, "gpu-2": { Health: pluginapi.Healthy, NumaNodes: []int{0}, - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ { - PriorityLevel: 0, - Dimension: machine.Dimension{ - Name: "pcie", - Value: "1", - }, + Name: "pcie", + Value: "1", }: {"gpu-3"}, { - PriorityLevel: 1, - Dimension: machine.Dimension{ - Name: "numa", - Value: "0", - }, + Name: "numa", + Value: "0", }: {"gpu-0", "gpu-1", "gpu-3"}, }, }, "gpu-3": { Health: pluginapi.Healthy, NumaNodes: []int{0}, - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ { - PriorityLevel: 0, - Dimension: machine.Dimension{ - Name: "pcie", - Value: "1", - }, + Name: "pcie", + Value: "1", }: {"gpu-2"}, { - PriorityLevel: 1, - Dimension: machine.Dimension{ - Name: "numa", - Value: "0", - }, + Name: "numa", + Value: "0", }: {"gpu-0", "gpu-1", "gpu-2"}, }, }, "gpu-4": { Health: pluginapi.Healthy, NumaNodes: []int{1}, - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ { - PriorityLevel: 0, - Dimension: machine.Dimension{ - Name: "pcie", - Value: "2", - }, + Name: "pcie", + Value: "2", }: {"gpu-5"}, { - PriorityLevel: 1, - Dimension: machine.Dimension{ - Name: "numa", - Value: "1", - }, + Name: "numa", + Value: "1", }: {"gpu-5", "gpu-6", "gpu-7"}, }, }, "gpu-5": { Health: pluginapi.Healthy, NumaNodes: []int{1}, - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ { - PriorityLevel: 0, - Dimension: machine.Dimension{ - Name: "pcie", - Value: "2", - }, + Name: "pcie", + Value: "2", }: {"gpu-4"}, { - PriorityLevel: 1, - Dimension: machine.Dimension{ - Name: "numa", - Value: "1", - }, + Name: "numa", + Value: "1", }: {"gpu-4", "gpu-6", "gpu-7"}, }, }, "gpu-6": { Health: pluginapi.Healthy, NumaNodes: []int{1}, - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ { - PriorityLevel: 0, - Dimension: machine.Dimension{ - Name: "pcie", - Value: "3", - }, + Name: "pcie", + Value: "3", }: {"gpu-7"}, { - PriorityLevel: 1, - Dimension: machine.Dimension{ - Name: "numa", - Value: "1", - }, + Name: "numa", + Value: "1", }: {"gpu-4", "gpu-5", "gpu-7"}, }, }, "gpu-7": { Health: pluginapi.Healthy, NumaNodes: []int{1}, - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ { - PriorityLevel: 0, - Dimension: machine.Dimension{ - Name: "pcie", - Value: "3", - }, + Name: "pcie", + Value: "3", }: {"gpu-6"}, { - PriorityLevel: 1, - Dimension: machine.Dimension{ - Name: "numa", - Value: "1", - }, + Name: "numa", + Value: "1", }: {"gpu-4", "gpu-5", "gpu-6"}, }, }, @@ -1046,104 +974,80 @@ func TestGpuReporterPlugin_GetReportContent(t *testing.T) { "gpu-0": { Health: pluginapi.Unhealthy, NumaNodes: []int{0}, - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ { - PriorityLevel: 0, - Dimension: machine.Dimension{ - Name: "numa", - Value: "0", - }, + Name: "numa", + Value: "0", }: {"gpu-1", "gpu-2", "gpu-3"}, }, }, "gpu-1": { Health: pluginapi.Unhealthy, NumaNodes: []int{0}, - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ { - PriorityLevel: 0, - Dimension: machine.Dimension{ - Name: "numa", - Value: "0", - }, + Name: "numa", + Value: "0", }: {"gpu-0", "gpu-2", "gpu-3"}, }, }, "gpu-2": { Health: pluginapi.Unhealthy, NumaNodes: []int{0}, - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ { - PriorityLevel: 0, - Dimension: machine.Dimension{ - Name: "numa", - Value: "0", - }, + Name: "numa", + Value: "0", }: {"gpu-0", "gpu-1", "gpu-3"}, }, }, "gpu-3": { Health: pluginapi.Unhealthy, NumaNodes: []int{0}, - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ { - PriorityLevel: 0, - Dimension: machine.Dimension{ - Name: "numa", - Value: "0", - }, + Name: "numa", + Value: "0", }: {"gpu-0", "gpu-1", "gpu-2"}, }, }, "gpu-4": { Health: pluginapi.Unhealthy, NumaNodes: []int{1}, - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ { - PriorityLevel: 0, - Dimension: machine.Dimension{ - Name: "numa", - Value: "1", - }, + Name: "numa", + Value: "1", }: {"gpu-5", "gpu-6", "gpu-7"}, }, }, "gpu-5": { Health: pluginapi.Unhealthy, NumaNodes: []int{1}, - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ { - PriorityLevel: 0, - Dimension: machine.Dimension{ - Name: "numa", - Value: "1", - }, + Name: "numa", + Value: "1", }: {"gpu-4", "gpu-6", "gpu-7"}, }, }, "gpu-6": { Health: pluginapi.Unhealthy, NumaNodes: []int{1}, - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ { - PriorityLevel: 0, - Dimension: machine.Dimension{ - Name: "numa", - Value: "1", - }, + Name: "numa", + Value: "1", }: {"gpu-4", "gpu-5", "gpu-7"}, }, }, "gpu-7": { Health: pluginapi.Unhealthy, NumaNodes: []int{1}, - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ { - PriorityLevel: 0, - Dimension: machine.Dimension{ - Name: "numa", - Value: "1", - }, + Name: "numa", + Value: "1", }: {"gpu-4", "gpu-5", "gpu-6"}, }, }, @@ -1353,153 +1257,105 @@ func TestGpuReporterPlugin_GetReportContent(t *testing.T) { Devices: map[string]machine.DeviceInfo{ "gpu-0": { NumaNodes: []int{0}, - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ { - PriorityLevel: 0, - Dimension: machine.Dimension{ - Name: "pcie", - Value: "0", - }, + Name: "pcie", + Value: "0", }: {"gpu-1"}, { - PriorityLevel: 1, - Dimension: machine.Dimension{ - Name: "numa", - Value: "0", - }, + Name: "numa", + Value: "0", }: {"gpu-1", "gpu-2", "gpu-3"}, }, }, "gpu-1": { NumaNodes: []int{0}, - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ { - PriorityLevel: 0, - Dimension: machine.Dimension{ - Name: "pcie", - Value: "0", - }, + Name: "pcie", + Value: "0", }: {"gpu-0"}, { - PriorityLevel: 1, - Dimension: machine.Dimension{ - Name: "numa", - Value: "0", - }, + Name: "numa", + Value: "0", }: {"gpu-0", "gpu-2", "gpu-3"}, }, }, "gpu-2": { NumaNodes: []int{0}, - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ { - PriorityLevel: 0, - Dimension: machine.Dimension{ - Name: "pcie", - Value: "1", - }, + Name: "pcie", + Value: "1", }: {"gpu-3"}, { - PriorityLevel: 1, - Dimension: machine.Dimension{ - Name: "numa", - Value: "0", - }, + Name: "numa", + Value: "0", }: {"gpu-0", "gpu-1", "gpu-3"}, }, }, "gpu-3": { NumaNodes: []int{0}, - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ { - PriorityLevel: 0, - Dimension: machine.Dimension{ - Name: "pcie", - Value: "1", - }, + Name: "pcie", + Value: "1", }: {"gpu-2"}, { - PriorityLevel: 1, - Dimension: machine.Dimension{ - Name: "numa", - Value: "0", - }, + Name: "numa", + Value: "0", }: {"gpu-0", "gpu-1", "gpu-2"}, }, }, "gpu-4": { NumaNodes: []int{1}, - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ { - PriorityLevel: 0, - Dimension: machine.Dimension{ - Name: "pcie", - Value: "2", - }, + Name: "pcie", + Value: "2", }: {"gpu-5"}, { - PriorityLevel: 1, - Dimension: machine.Dimension{ - Name: "numa", - Value: "1", - }, + Name: "numa", + Value: "1", }: {"gpu-5", "gpu-6", "gpu-7"}, }, }, "gpu-5": { NumaNodes: []int{1}, - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ { - PriorityLevel: 0, - Dimension: machine.Dimension{ - Name: "pcie", - Value: "2", - }, + Name: "pcie", + Value: "2", }: {"gpu-4"}, { - PriorityLevel: 1, - Dimension: machine.Dimension{ - Name: "numa", - Value: "1", - }, + Name: "numa", + Value: "1", }: {"gpu-4", "gpu-6", "gpu-7"}, }, }, "gpu-6": { NumaNodes: []int{1}, - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ { - PriorityLevel: 0, - Dimension: machine.Dimension{ - Name: "pcie", - Value: "3", - }, + Name: "pcie", + Value: "3", }: {"gpu-7"}, { - PriorityLevel: 1, - Dimension: machine.Dimension{ - Name: "numa", - Value: "1", - }, + Name: "numa", + Value: "1", }: {"gpu-4", "gpu-5", "gpu-7"}, }, }, "gpu-7": { NumaNodes: []int{1}, - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ { - PriorityLevel: 0, - Dimension: machine.Dimension{ - Name: "pcie", - Value: "3", - }, + Name: "pcie", + Value: "3", }: {"gpu-6"}, { - PriorityLevel: 1, - Dimension: machine.Dimension{ - Name: "numa", - Value: "1", - }, + Name: "numa", + Value: "1", }: {"gpu-4", "gpu-5", "gpu-6"}, }, }, @@ -1737,106 +1593,58 @@ func TestGpuReporterPlugin_GetReportContent(t *testing.T) { Devices: map[string]machine.DeviceInfo{ "gpu-0": { NumaNodes: []int{0}, - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - { - PriorityLevel: 0, - Dimension: machine.Dimension{}, - }: {"gpu-1"}, - { - PriorityLevel: 1, - Dimension: machine.Dimension{}, - }: {"gpu-1", "gpu-2", "gpu-3"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + {}: {"gpu-1"}, + {}: {"gpu-1", "gpu-2", "gpu-3"}, }, }, "gpu-1": { NumaNodes: []int{0}, - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - { - PriorityLevel: 0, - Dimension: machine.Dimension{}, - }: {"gpu-0"}, - { - PriorityLevel: 1, - Dimension: machine.Dimension{}, - }: {"gpu-0", "gpu-2", "gpu-3"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + {}: {"gpu-0"}, + {}: {"gpu-0", "gpu-2", "gpu-3"}, }, }, "gpu-2": { NumaNodes: []int{0}, - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - { - PriorityLevel: 0, - Dimension: machine.Dimension{}, - }: {"gpu-3"}, - { - PriorityLevel: 1, - Dimension: machine.Dimension{}, - }: {"gpu-0", "gpu-1", "gpu-3"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + {}: {"gpu-3"}, + {}: {"gpu-0", "gpu-1", "gpu-3"}, }, }, "gpu-3": { NumaNodes: []int{0}, - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - { - PriorityLevel: 0, - Dimension: machine.Dimension{}, - }: {"gpu-2"}, - { - PriorityLevel: 1, - Dimension: machine.Dimension{}, - }: {"gpu-0", "gpu-1", "gpu-2"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + {}: {"gpu-2"}, + {}: {"gpu-0", "gpu-1", "gpu-2"}, }, }, "gpu-4": { NumaNodes: []int{1}, - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - { - PriorityLevel: 0, - Dimension: machine.Dimension{}, - }: {"gpu-5"}, - { - PriorityLevel: 1, - Dimension: machine.Dimension{}, - }: {"gpu-5", "gpu-6", "gpu-7"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + {}: {"gpu-5"}, + {}: {"gpu-5", "gpu-6", "gpu-7"}, }, }, "gpu-5": { NumaNodes: []int{1}, - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - { - PriorityLevel: 0, - Dimension: machine.Dimension{}, - }: {"gpu-4"}, - { - PriorityLevel: 1, - Dimension: machine.Dimension{}, - }: {"gpu-4", "gpu-6", "gpu-7"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + {}: {"gpu-4"}, + {}: {"gpu-4", "gpu-6", "gpu-7"}, }, }, "gpu-6": { NumaNodes: []int{1}, - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - { - PriorityLevel: 0, - Dimension: machine.Dimension{}, - }: {"gpu-7"}, - { - PriorityLevel: 1, - Dimension: machine.Dimension{}, - }: {"gpu-4", "gpu-5", "gpu-7"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + {}: {"gpu-7"}, + {}: {"gpu-4", "gpu-5", "gpu-7"}, }, }, "gpu-7": { NumaNodes: []int{1}, - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - { - PriorityLevel: 0, - Dimension: machine.Dimension{}, - }: {"gpu-6"}, - { - PriorityLevel: 1, - Dimension: machine.Dimension{}, - }: {"gpu-4", "gpu-5", "gpu-6"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + {}: {"gpu-6"}, + {}: {"gpu-4", "gpu-5", "gpu-6"}, }, }, }, @@ -1999,13 +1807,10 @@ func TestGpuReporterPlugin_GetReportContent(t *testing.T) { "gpu-0": { Health: pluginapi.Healthy, NumaNodes: []int{0}, - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ { - PriorityLevel: 0, - Dimension: machine.Dimension{ - Name: "numa", - Value: "0", - }, + Name: "numa", + Value: "0", }: {}, }, }, @@ -2089,20 +1894,18 @@ func TestGpuReporterPlugin_GetReportContent(t *testing.T) { "gpu-0": { Health: pluginapi.Healthy, NumaNodes: []int{0}, - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ { - PriorityLevel: 0, - Dimension: machine.Dimension{Name: "numa", Value: "0"}, + Name: "numa", Value: "0", }: {}, }, }, "gpu-1": { Health: pluginapi.Healthy, NumaNodes: []int{1}, - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ { - PriorityLevel: 0, - Dimension: machine.Dimension{Name: "numa", Value: "1"}, + Name: "numa", Value: "1", }: {}, }, }, @@ -2167,7 +1970,7 @@ func TestGpuReporterPlugin_GetReportContent(t *testing.T) { "gpu-0": { Health: pluginapi.Healthy, NumaNodes: []int{0}, - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{{PriorityLevel: 0, Dimension: machine.Dimension{Name: "numa", Value: "0"}}: {}}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{{Name: "numa", Value: "0"}: {}}, }, }, }, @@ -2224,14 +2027,14 @@ func TestGpuReporterPlugin_GetReportContent(t *testing.T) { PriorityDimensions: []string{"numa"}, UpdateTime: 100, Devices: map[string]machine.DeviceInfo{ - "gpu-0": {Health: pluginapi.Healthy, NumaNodes: []int{0}, DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{{PriorityLevel: 0, Dimension: machine.Dimension{Name: "numa", Value: "0"}}: {}}}, + "gpu-0": {Health: pluginapi.Healthy, NumaNodes: []int{0}, DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{{Name: "numa", Value: "0"}: {}}}, }, }, "test-gpu-b": { PriorityDimensions: []string{"numa"}, UpdateTime: 200, Devices: map[string]machine.DeviceInfo{ - "gpu-0": {Health: pluginapi.Unhealthy, NumaNodes: []int{0}, DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{{PriorityLevel: 0, Dimension: machine.Dimension{Name: "numa", Value: "0"}}: {}}}, + "gpu-0": {Health: pluginapi.Unhealthy, NumaNodes: []int{0}, DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{{Name: "numa", Value: "0"}: {}}}, }, }, }, diff --git a/pkg/agent/qrm-plugins/gpu/customdeviceplugin/gpu/gpu_test.go b/pkg/agent/qrm-plugins/gpu/customdeviceplugin/gpu/gpu_test.go index 96cba8751c..39077afea9 100644 --- a/pkg/agent/qrm-plugins/gpu/customdeviceplugin/gpu/gpu_test.go +++ b/pkg/agent/qrm-plugins/gpu/customdeviceplugin/gpu/gpu_test.go @@ -160,11 +160,11 @@ func TestGPUDevicePlugin_UpdateAllocatableAssociatedDevices(t *testing.T) { Devices: map[string]machine.DeviceInfo{ "test-gpu-0": { NumaNodes: []int{0}, - DeviceAffinity: make(map[machine.AffinityPriority]machine.DeviceIDs), + DeviceAffinity: make(map[machine.Dimension]machine.DeviceIDs), }, "test-gpu-1": { NumaNodes: []int{1}, - DeviceAffinity: make(map[machine.AffinityPriority]machine.DeviceIDs), + DeviceAffinity: make(map[machine.Dimension]machine.DeviceIDs), }, }, } diff --git a/pkg/agent/qrm-plugins/gpu/strategy/allocate/strategies/deviceaffinity/bind.go b/pkg/agent/qrm-plugins/gpu/strategy/allocate/strategies/deviceaffinity/bind.go index b8b314793f..a2d4b265e1 100644 --- a/pkg/agent/qrm-plugins/gpu/strategy/allocate/strategies/deviceaffinity/bind.go +++ b/pkg/agent/qrm-plugins/gpu/strategy/allocate/strategies/deviceaffinity/bind.go @@ -40,6 +40,7 @@ const ( type affinityGroup struct { id string unallocatedDevices sets.String + totalDevicesNum int } // Bind binds the sorted devices to the allocation context by searching for the devices that have affinity to each other. @@ -60,14 +61,24 @@ func (s *DeviceAffinityStrategy) Bind( devicesToAllocate := int(ctx.DeviceReq.DeviceRequest) reusableDevicesSet := sets.NewString(ctx.DeviceReq.ReusableDevices...) + requiredDeviceAffinity := ctx.GPUQRMPluginConfig.RequiredDeviceAffinity + // All devices that are passed into the strategy are unallocated devices unallocatedDevicesSet := sets.NewString(sortedDevices...) - // Get a map of affinity groups that is grouped by priority - affinityMap := ctx.DeviceTopology.GroupDeviceAffinity() + // Get affinity groups ordered from the highest priority to the lowest priority. + affinityLevels := ctx.DeviceTopology.GroupDeviceAffinity() // If there is no topology affinity, fallback to generic canonical strategy - if len(affinityMap) == 0 { + if len(affinityLevels) == 0 { + // return error if device affinity is required but no topology affinity is found + if requiredDeviceAffinity { + return &allocate.AllocationResult{ + Success: false, + ErrorMessage: fmt.Sprintf("no topology affinity found but device affinity is required"), + }, fmt.Errorf("no topology affinity found but device affinity is required") + } + general.Warningf("no topology affinity found, fallback to canonical strategy") tags := metrics.ConvertMapToTags(map[string]string{ "podNamespace": ctx.ResourceReq.PodNamespace, @@ -79,12 +90,12 @@ func (s *DeviceAffinityStrategy) Bind( return s.CanonicalStrategy.Bind(ctx, sortedDevices) } - // Get affinity groups organized by priority level - affinityGroupsMap := s.getAffinityGroupsByPriority(affinityMap, unallocatedDevicesSet) + // Get affinity groups organized by priority level and trimmed to the lowest priority we need to consider. + affinityGroupsByPriority := s.getAffinityGroupsSortedByPriority(affinityLevels, unallocatedDevicesSet, devicesToAllocate, requiredDeviceAffinity) // Allocate reusable devices first - allocatedDevices, err := s.allocateCandidateDevices(affinityGroupsMap, - reusableDevicesSet.Intersection(unallocatedDevicesSet), devicesToAllocate, sets.NewString()) + allocatedDevices, err := s.allocateCandidateDevices(affinityGroupsByPriority, + reusableDevicesSet.Intersection(unallocatedDevicesSet), sets.NewString(), devicesToAllocate, requiredDeviceAffinity) if err != nil { return &allocate.AllocationResult{ Success: false, @@ -101,8 +112,8 @@ func (s *DeviceAffinityStrategy) Bind( // Next, allocate left available devices availableDevices := unallocatedDevicesSet.Difference(allocatedDevices) - allocatedDevices, err = s.allocateCandidateDevices(affinityGroupsMap, - availableDevices, devicesToAllocate, allocatedDevices) + allocatedDevices, err = s.allocateCandidateDevices(affinityGroupsByPriority, + availableDevices, allocatedDevices, devicesToAllocate, requiredDeviceAffinity) if err != nil { return &allocate.AllocationResult{ Success: false, @@ -124,20 +135,28 @@ func (s *DeviceAffinityStrategy) Bind( }, fmt.Errorf("not enough devices to allocate: need %d, have %d", devicesToAllocate, len(allocatedDevices)) } -// getAffinityGroupsByPriority forms a map of affinityGroup by priority level. -func (s *DeviceAffinityStrategy) getAffinityGroupsByPriority( - affinityMap map[int][]machine.DeviceIDs, unallocatedDevicesSet sets.String, -) map[int][]affinityGroup { - affinityGroupsMap := make(map[int][]affinityGroup) - for priority, affinityDevices := range affinityMap { - if affinityGroupsMap[priority] == nil { - affinityGroupsMap[priority] = make([]affinityGroup, 0) +// getAffinityGroupsSortedByPriority forms affinity groups sorted from the highest priority to the lowest priority. +// When device affinity is required, it only keeps levels up to the first one that can satisfy the request. +func (s *DeviceAffinityStrategy) getAffinityGroupsSortedByPriority( + affinityMap [][]machine.DeviceIDs, unallocatedDevicesSet sets.String, deviceReq int, requiredDeviceAffinity bool, +) [][]affinityGroup { + affinityGroupsSortedByPriority := make([][]affinityGroup, 0, len(affinityMap)) + for _, affinityDevices := range affinityMap { + affinityGroups := s.getAffinityGroups(affinityDevices, unallocatedDevicesSet) + affinityGroupsSortedByPriority = append(affinityGroupsSortedByPriority, affinityGroups) + + if !requiredDeviceAffinity { + continue + } + + for _, group := range affinityGroups { + if deviceReq <= group.totalDevicesNum { + return affinityGroupsSortedByPriority + } } - affinityGroupsMap[priority] = append(affinityGroupsMap[priority], - s.getAffinityGroups(affinityDevices, unallocatedDevicesSet)...) } - return affinityGroupsMap + return affinityGroupsSortedByPriority } // getAffinityGroups forms a list of affinityGroup with unallocated devices. @@ -157,6 +176,7 @@ func (s *DeviceAffinityStrategy) getAffinityGroups( affinityGroups = append(affinityGroups, affinityGroup{ unallocatedDevices: unallocatedDevices, id: uuid.NewString(), + totalDevicesNum: len(devices), }) } @@ -170,7 +190,7 @@ func (s *DeviceAffinityStrategy) getAffinityGroups( // 3. Balances between fulfilling exact requirements and maintaining optimal groupings // // Parameters: -// - affinityGroupsMap: Mapping of affinity priorities to device groups with those priorities +// - affinityGroupsByPriority: Device groups ordered from highest priority to lowest priority // - candidateDevicesSet: Set of available devices that can be allocated // - devicesToAllocate: Total number of devices that need to be allocated // - allocatedDevices: Set of devices that have already been allocated in previous iterations @@ -179,10 +199,10 @@ func (s *DeviceAffinityStrategy) getAffinityGroups( // - sets.String: The complete set of allocated devices after this allocation round // - error: Any error encountered during the allocation process func (s *DeviceAffinityStrategy) allocateCandidateDevices( - affinityGroupsMap map[int][]affinityGroup, - candidateDevicesSet sets.String, + affinityGroupsByPriority [][]affinityGroup, + candidateDevicesSet, allocatedDevices sets.String, devicesToAllocate int, - allocatedDevices sets.String, + requiredDeviceAffinity bool, ) (sets.String, error) { // Early termination conditions if len(allocatedDevices) == devicesToAllocate || len(candidateDevicesSet) == 0 { @@ -192,23 +212,15 @@ func (s *DeviceAffinityStrategy) allocateCandidateDevices( // Calculate remaining devices needed remainingDevicesToAllocate := devicesToAllocate - len(allocatedDevices) - // Fast path: If we need all remaining candidates, allocate them all - if remainingDevicesToAllocate >= len(candidateDevicesSet) { + // Fast path: If we need all remaining candidates, allocate them all. + // With RequiredDeviceAffinity, don't blindly union candidates across affinity groups. + if !requiredDeviceAffinity && remainingDevicesToAllocate >= len(candidateDevicesSet) { allocatedDevices = allocatedDevices.Union(candidateDevicesSet) return allocatedDevices, nil } - // Process affinity groups from highest to lowest priority. - // Do not assume priorities are consecutive (e.g., keys can be 0,2,3,5). - priorityLevels := make([]int, 0, len(affinityGroupsMap)) - for p := range affinityGroupsMap { - priorityLevels = append(priorityLevels, p) - } - // Sort priorities ascending so stronger affinity (lower number) is processed first - sort.Ints(priorityLevels) - - for idx, priority := range priorityLevels { - affinityGroups := affinityGroupsMap[priority] + // Process affinity groups from highest to lowest priority + for priority, affinityGroups := range affinityGroupsByPriority { if len(affinityGroups) == 0 { continue } @@ -229,11 +241,15 @@ func (s *DeviceAffinityStrategy) allocateCandidateDevices( return result, nil } - // For the lowest priority (last in the sorted list), use more flexible allocation strategies - if idx == len(priorityLevels)-1 { + // For the lowest considered priority, use more flexible allocation strategies. + // With RequiredDeviceAffinity, do not mix devices across different affinity groups. + if priority == len(affinityGroupsByPriority)-1 { + if requiredDeviceAffinity { + return allocatedDevices, nil + } return s.handleLowestPriorityAllocation( - groupInfos, affinityGroupsMap, candidateDevicesSet, - devicesToAllocate, allocatedDevices, remainingDevicesToAllocate, + groupInfos, affinityGroupsByPriority, candidateDevicesSet, + devicesToAllocate, allocatedDevices, remainingDevicesToAllocate, requiredDeviceAffinity, ) } } @@ -343,11 +359,12 @@ func (s *DeviceAffinityStrategy) tryAllocateFromGroups( // This method is more permissive in its allocation strategy to ensure device requirements are met. func (s *DeviceAffinityStrategy) handleLowestPriorityAllocation( groupInfos []groupInfo, - affinityGroupsMap map[int][]affinityGroup, + affinityGroupsByPriority [][]affinityGroup, candidateDevicesSet sets.String, devicesToAllocate int, allocatedDevices sets.String, remainingDevicesToAllocate int, + requiredDeviceAffinity bool, ) (sets.String, error) { // First try to allocate entire groups that fit within the remaining requirement and // ensure affinity allocation if there are already allocated devices @@ -360,10 +377,11 @@ func (s *DeviceAffinityStrategy) handleLowestPriorityAllocation( // Recursively allocate the remaining devices return s.allocateCandidateDevices( - affinityGroupsMap, + affinityGroupsByPriority, candidateDevicesSet.Difference(group.candidates), - devicesToAllocate, allocatedDevices, + devicesToAllocate, + requiredDeviceAffinity, ) } } @@ -376,18 +394,20 @@ func (s *DeviceAffinityStrategy) handleLowestPriorityAllocation( allocatedDevices = allocatedDevices.Union(group.candidates) return s.allocateCandidateDevices( - affinityGroupsMap, + affinityGroupsByPriority, candidateDevicesSet.Difference(group.candidates), - devicesToAllocate, allocatedDevices, + devicesToAllocate, + requiredDeviceAffinity, ) } else { // Recursively allocate a subset of devices from this group devices, err := s.allocateCandidateDevices( - affinityGroupsMap, + affinityGroupsByPriority, group.candidates, - remainingDevicesToAllocate, group.allocated, + remainingDevicesToAllocate, + requiredDeviceAffinity, ) if err != nil { return nil, err diff --git a/pkg/agent/qrm-plugins/gpu/strategy/allocate/strategies/deviceaffinity/bind_test.go b/pkg/agent/qrm-plugins/gpu/strategy/allocate/strategies/deviceaffinity/bind_test.go index 5597f012f7..8f74ce50c4 100644 --- a/pkg/agent/qrm-plugins/gpu/strategy/allocate/strategies/deviceaffinity/bind_test.go +++ b/pkg/agent/qrm-plugins/gpu/strategy/allocate/strategies/deviceaffinity/bind_test.go @@ -17,8 +17,7 @@ limitations under the License. package deviceaffinity import ( - "reflect" - "sort" + "strconv" "testing" "github.com/google/go-cmp/cmp" @@ -28,13 +27,18 @@ import ( "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/state" "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/strategy/allocate" + qrmconfig "github.com/kubewharf/katalyst-core/pkg/config/agent/qrm" "github.com/kubewharf/katalyst-core/pkg/metrics" "github.com/kubewharf/katalyst-core/pkg/util/machine" ) -func convertIntToAffinityPriority(priorityLevel int) machine.AffinityPriority { - return machine.AffinityPriority{ - PriorityLevel: priorityLevel, +func convertIntToAffinityDimension(priorityLevel int) machine.Dimension { + // Bind tests only use priority levels 0 and 1. + // Convert them into stable synthetic dimensions; ordering is derived from + // DeviceTopology.PriorityDimensions. + return machine.Dimension{ + Name: strconv.Itoa(priorityLevel), + Value: strconv.Itoa(priorityLevel), } } @@ -63,45 +67,46 @@ func TestBind_NumberOfDevicesAllocated(t *testing.T) { DeviceRequest: 1, }, DeviceTopology: &machine.DeviceTopology{ + PriorityDimensions: []string{"0", "1"}, Devices: map[string]machine.DeviceInfo{ "gpu-1": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-2"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-2"}, }, }, "gpu-2": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-1"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-1"}, }, }, "gpu-3": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-4"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-4"}, }, }, "gpu-4": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-3"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-3"}, }, }, "gpu-5": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-6"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-6"}, }, }, "gpu-6": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-5"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-5"}, }, }, "gpu-7": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-8"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-8"}, }, }, "gpu-8": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-7"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-7"}, }, }, }, @@ -124,45 +129,46 @@ func TestBind_NumberOfDevicesAllocated(t *testing.T) { DeviceRequest: 2, }, DeviceTopology: &machine.DeviceTopology{ + PriorityDimensions: []string{"0", "1"}, Devices: map[string]machine.DeviceInfo{ "gpu-1": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-2"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-2"}, }, }, "gpu-2": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-1"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-1"}, }, }, "gpu-3": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-4"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-4"}, }, }, "gpu-4": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-3"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-3"}, }, }, "gpu-5": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-6"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-6"}, }, }, "gpu-6": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-5"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-5"}, }, }, "gpu-7": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-8"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-8"}, }, }, "gpu-8": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-7"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-7"}, }, }, }, @@ -185,25 +191,26 @@ func TestBind_NumberOfDevicesAllocated(t *testing.T) { DeviceRequest: 3, }, DeviceTopology: &machine.DeviceTopology{ + PriorityDimensions: []string{"0", "1"}, Devices: map[string]machine.DeviceInfo{ "gpu-1": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-2"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-2"}, }, }, "gpu-2": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-1"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-1"}, }, }, "gpu-3": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-4"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-4"}, }, }, "gpu-4": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-3"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-3"}, }, }, }, @@ -226,25 +233,26 @@ func TestBind_NumberOfDevicesAllocated(t *testing.T) { DeviceRequest: 4, }, DeviceTopology: &machine.DeviceTopology{ + PriorityDimensions: []string{"0", "1"}, Devices: map[string]machine.DeviceInfo{ "gpu-1": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-2"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-2"}, }, }, "gpu-2": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-1"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-1"}, }, }, "gpu-3": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-4"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-4"}, }, }, "gpu-4": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-3"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-3"}, }, }, }, @@ -269,25 +277,26 @@ func TestBind_NumberOfDevicesAllocated(t *testing.T) { DeviceRequest: 2, }, DeviceTopology: &machine.DeviceTopology{ + PriorityDimensions: []string{"0", "1"}, Devices: map[string]machine.DeviceInfo{ "gpu-1": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-2", "gpu-3", "gpu-4"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-2", "gpu-3", "gpu-4"}, }, }, "gpu-2": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-1", "gpu-3", "gpu-4"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-1", "gpu-3", "gpu-4"}, }, }, "gpu-3": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-1", "gpu-2", "gpu-4"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-1", "gpu-2", "gpu-4"}, }, }, "gpu-4": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-1", "gpu-2", "gpu-3"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-1", "gpu-2", "gpu-3"}, }, }, }, @@ -310,25 +319,26 @@ func TestBind_NumberOfDevicesAllocated(t *testing.T) { DeviceRequest: 2, }, DeviceTopology: &machine.DeviceTopology{ + PriorityDimensions: []string{"0", "1"}, Devices: map[string]machine.DeviceInfo{ "gpu-1": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-2"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-2"}, }, }, "gpu-2": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-1"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-1"}, }, }, "gpu-3": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-4"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-4"}, }, }, "gpu-4": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-3"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-3"}, }, }, }, @@ -353,25 +363,26 @@ func TestBind_NumberOfDevicesAllocated(t *testing.T) { DeviceRequest: 2, // should allocate gpu-3 and gpu-4 }, DeviceTopology: &machine.DeviceTopology{ + PriorityDimensions: []string{"0", "1"}, Devices: map[string]machine.DeviceInfo{ "gpu-1": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-2"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-2"}, }, }, "gpu-2": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-1"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-1"}, }, }, "gpu-3": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-4"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-4"}, }, }, "gpu-4": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-3"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-3"}, }, }, }, @@ -397,25 +408,26 @@ func TestBind_NumberOfDevicesAllocated(t *testing.T) { }, // Level 0: [gpu-1, gpu-2], [gpu-3, gpu-4] DeviceTopology: &machine.DeviceTopology{ + PriorityDimensions: []string{"0", "1"}, Devices: map[string]machine.DeviceInfo{ "gpu-1": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-2"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-2"}, }, }, "gpu-2": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-1"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-1"}, }, }, "gpu-3": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-4"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-4"}, }, }, "gpu-4": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-3"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-3"}, }, }, }, @@ -442,45 +454,46 @@ func TestBind_NumberOfDevicesAllocated(t *testing.T) { }, // Level 0: [gpu-1, gpu-2], [gpu-3, gpu-4] DeviceTopology: &machine.DeviceTopology{ + PriorityDimensions: []string{"0", "1"}, Devices: map[string]machine.DeviceInfo{ "gpu-1": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-2"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-2"}, }, }, "gpu-2": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-1"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-1"}, }, }, "gpu-3": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-4"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-4"}, }, }, "gpu-4": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-3"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-3"}, }, }, "gpu-5": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-6"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-6"}, }, }, "gpu-6": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-5"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-5"}, }, }, "gpu-7": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-8"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-8"}, }, }, "gpu-8": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-7"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-7"}, }, }, }, @@ -506,45 +519,46 @@ func TestBind_NumberOfDevicesAllocated(t *testing.T) { }, // Level 0: [gpu-1, gpu-2, gpu-3, gpu-4], [gpu-5, gpu-6, gpu-7, gpu-8] DeviceTopology: &machine.DeviceTopology{ + PriorityDimensions: []string{"0", "1"}, Devices: map[string]machine.DeviceInfo{ "gpu-1": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-2", "gpu-3", "gpu-4"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-2", "gpu-3", "gpu-4"}, }, }, "gpu-2": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-1", "gpu-3", "gpu-4"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-1", "gpu-3", "gpu-4"}, }, }, "gpu-3": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-1", "gpu-2", "gpu-4"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-1", "gpu-2", "gpu-4"}, }, }, "gpu-4": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-1", "gpu-2", "gpu-3"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-1", "gpu-2", "gpu-3"}, }, }, "gpu-5": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-6", "gpu-7", "gpu-8"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-6", "gpu-7", "gpu-8"}, }, }, "gpu-6": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-5", "gpu-7", "gpu-8"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-5", "gpu-7", "gpu-8"}, }, }, "gpu-7": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-5", "gpu-6", "gpu-8"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-5", "gpu-6", "gpu-8"}, }, }, "gpu-8": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-5", "gpu-6", "gpu-7"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-5", "gpu-6", "gpu-7"}, }, }, }, @@ -570,65 +584,66 @@ func TestBind_NumberOfDevicesAllocated(t *testing.T) { }, // Level 0: [gpu-1, gpu-2, gpu-3, gpu-4], [gpu-5, gpu-6, gpu-7, gpu-8], [gpu-9, gpu-10, gpu-11, gpu-12] DeviceTopology: &machine.DeviceTopology{ + PriorityDimensions: []string{"0", "1"}, Devices: map[string]machine.DeviceInfo{ "gpu-1": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-2", "gpu-3", "gpu-4"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-2", "gpu-3", "gpu-4"}, }, }, "gpu-2": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-1", "gpu-3", "gpu-4"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-1", "gpu-3", "gpu-4"}, }, }, "gpu-3": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-1", "gpu-2", "gpu-4"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-1", "gpu-2", "gpu-4"}, }, }, "gpu-4": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-1", "gpu-2", "gpu-3"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-1", "gpu-2", "gpu-3"}, }, }, "gpu-5": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-6", "gpu-7", "gpu-8"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-6", "gpu-7", "gpu-8"}, }, }, "gpu-6": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-5", "gpu-7", "gpu-8"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-5", "gpu-7", "gpu-8"}, }, }, "gpu-7": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-5", "gpu-6", "gpu-8"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-5", "gpu-6", "gpu-8"}, }, }, "gpu-8": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-5", "gpu-6", "gpu-7"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-5", "gpu-6", "gpu-7"}, }, }, "gpu-9": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-10", "gpu-11", "gpu-12"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-10", "gpu-11", "gpu-12"}, }, }, "gpu-10": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-9", "gpu-11", "gpu-12"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-9", "gpu-11", "gpu-12"}, }, }, "gpu-11": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-9", "gpu-10", "gpu-12"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-9", "gpu-10", "gpu-12"}, }, }, "gpu-12": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-9", "gpu-10", "gpu-11"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-9", "gpu-10", "gpu-11"}, }, }, }, @@ -656,137 +671,66 @@ func TestBind_NumberOfDevicesAllocated(t *testing.T) { }, // Level 0: [gpu-1, gpu-2, gpu-3, gpu-4], [gpu-5, gpu-6, gpu-7, gpu-8], [gpu-9, gpu-10, gpu-11, gpu-12] DeviceTopology: &machine.DeviceTopology{ + PriorityDimensions: []string{"numa"}, Devices: map[string]machine.DeviceInfo{ "gpu-1": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - { - PriorityLevel: 0, - Dimension: machine.Dimension{ - Name: "numa", - Value: "numa-0", - }, - }: {"gpu-2", "gpu-3", "gpu-4"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + {Name: "numa", Value: "numa-0"}: {"gpu-2", "gpu-3", "gpu-4"}, }, }, "gpu-2": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - { - PriorityLevel: 0, - Dimension: machine.Dimension{ - Name: "numa", - Value: "numa-0", - }, - }: {"gpu-1", "gpu-3", "gpu-4"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + {Name: "numa", Value: "numa-0"}: {"gpu-1", "gpu-3", "gpu-4"}, }, }, "gpu-3": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - { - PriorityLevel: 0, - Dimension: machine.Dimension{ - Name: "numa", - Value: "numa-0", - }, - }: {"gpu-1", "gpu-2", "gpu-4"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + {Name: "numa", Value: "numa-0"}: {"gpu-1", "gpu-2", "gpu-4"}, }, }, "gpu-4": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - { - PriorityLevel: 0, - Dimension: machine.Dimension{ - Name: "numa", - Value: "numa-0", - }, - }: {"gpu-1", "gpu-2", "gpu-3"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + {Name: "numa", Value: "numa-0"}: {"gpu-1", "gpu-2", "gpu-3"}, }, }, "gpu-5": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - { - PriorityLevel: 0, - Dimension: machine.Dimension{ - Name: "numa", - Value: "numa-1", - }, - }: {"gpu-6", "gpu-7", "gpu-8"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + {Name: "numa", Value: "numa-1"}: {"gpu-6", "gpu-7", "gpu-8"}, }, }, "gpu-6": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - { - PriorityLevel: 0, - Dimension: machine.Dimension{ - Name: "numa", - Value: "numa-1", - }, - }: {"gpu-5", "gpu-7", "gpu-8"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + {Name: "numa", Value: "numa-1"}: {"gpu-5", "gpu-7", "gpu-8"}, }, }, "gpu-7": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - { - PriorityLevel: 0, - Dimension: machine.Dimension{ - Name: "numa", - Value: "numa-1", - }, - }: {"gpu-5", "gpu-6", "gpu-8"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + {Name: "numa", Value: "numa-1"}: {"gpu-5", "gpu-6", "gpu-8"}, }, }, "gpu-8": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - { - PriorityLevel: 0, - Dimension: machine.Dimension{ - Name: "numa", - Value: "numa-1", - }, - }: {"gpu-5", "gpu-6", "gpu-7"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + {Name: "numa", Value: "numa-1"}: {"gpu-5", "gpu-6", "gpu-7"}, }, }, "gpu-9": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - { - PriorityLevel: 0, - Dimension: machine.Dimension{ - Name: "numa", - Value: "numa-2", - }, - }: {"gpu-10", "gpu-11", "gpu-12"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + {Name: "numa", Value: "numa-2"}: {"gpu-10", "gpu-11", "gpu-12"}, }, }, "gpu-10": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - { - PriorityLevel: 0, - Dimension: machine.Dimension{ - Name: "numa", - Value: "numa-2", - }, - }: {"gpu-9", "gpu-11", "gpu-12"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + {Name: "numa", Value: "numa-2"}: {"gpu-9", "gpu-11", "gpu-12"}, }, }, "gpu-11": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - { - PriorityLevel: 0, - Dimension: machine.Dimension{ - Name: "numa", - Value: "numa-2", - }, - }: {"gpu-9", "gpu-10", "gpu-12"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + {Name: "numa", Value: "numa-2"}: {"gpu-9", "gpu-10", "gpu-12"}, }, }, "gpu-12": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - { - PriorityLevel: 0, - Dimension: machine.Dimension{ - Name: "numa", - Value: "numa-2", - }, - }: {"gpu-9", "gpu-10", "gpu-11"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + {Name: "numa", Value: "numa-2"}: {"gpu-9", "gpu-10", "gpu-11"}, }, }, }, @@ -813,53 +757,54 @@ func TestBind_NumberOfDevicesAllocated(t *testing.T) { // Level 0: [gpu-1, gpu-2], [gpu-3, gpu-4], [gpu-5, gpu-6], [gpu-7, gpu-8] // Level 1: [gpu-1, gpu-2, gpu-3, gpu-4], [gpu-5, gpu-6, gpu-7, gpu-8] DeviceTopology: &machine.DeviceTopology{ + PriorityDimensions: []string{"0", "1"}, Devices: map[string]machine.DeviceInfo{ "gpu-1": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-2"}, - convertIntToAffinityPriority(1): {"gpu-2", "gpu-3", "gpu-4"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-2"}, + convertIntToAffinityDimension(1): {"gpu-2", "gpu-3", "gpu-4"}, }, }, "gpu-2": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-1"}, - convertIntToAffinityPriority(1): {"gpu-1", "gpu-3", "gpu-4"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-1"}, + convertIntToAffinityDimension(1): {"gpu-1", "gpu-3", "gpu-4"}, }, }, "gpu-3": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-4"}, - convertIntToAffinityPriority(1): {"gpu-1", "gpu-2", "gpu-4"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-4"}, + convertIntToAffinityDimension(1): {"gpu-1", "gpu-2", "gpu-4"}, }, }, "gpu-4": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-3"}, - convertIntToAffinityPriority(1): {"gpu-1", "gpu-2", "gpu-3"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-3"}, + convertIntToAffinityDimension(1): {"gpu-1", "gpu-2", "gpu-3"}, }, }, "gpu-5": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-6"}, - convertIntToAffinityPriority(1): {"gpu-6", "gpu-7", "gpu-8"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-6"}, + convertIntToAffinityDimension(1): {"gpu-6", "gpu-7", "gpu-8"}, }, }, "gpu-6": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-5"}, - convertIntToAffinityPriority(1): {"gpu-5", "gpu-7", "gpu-8"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-5"}, + convertIntToAffinityDimension(1): {"gpu-5", "gpu-7", "gpu-8"}, }, }, "gpu-7": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-8"}, - convertIntToAffinityPriority(1): {"gpu-5", "gpu-6", "gpu-8"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-8"}, + convertIntToAffinityDimension(1): {"gpu-5", "gpu-6", "gpu-8"}, }, }, "gpu-8": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-7"}, - convertIntToAffinityPriority(1): {"gpu-5", "gpu-6", "gpu-7"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-7"}, + convertIntToAffinityDimension(1): {"gpu-5", "gpu-6", "gpu-7"}, }, }, }, @@ -890,51 +835,51 @@ func TestBind_NumberOfDevicesAllocated(t *testing.T) { DeviceTopology: &machine.DeviceTopology{ Devices: map[string]machine.DeviceInfo{ "gpu-1": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-2"}, - convertIntToAffinityPriority(2): {"gpu-2", "gpu-3", "gpu-4"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-2"}, + convertIntToAffinityDimension(2): {"gpu-2", "gpu-3", "gpu-4"}, }, }, "gpu-2": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-1"}, - convertIntToAffinityPriority(2): {"gpu-1", "gpu-3", "gpu-4"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-1"}, + convertIntToAffinityDimension(2): {"gpu-1", "gpu-3", "gpu-4"}, }, }, "gpu-3": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-4"}, - convertIntToAffinityPriority(2): {"gpu-1", "gpu-2", "gpu-4"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-4"}, + convertIntToAffinityDimension(2): {"gpu-1", "gpu-2", "gpu-4"}, }, }, "gpu-4": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-3"}, - convertIntToAffinityPriority(2): {"gpu-1", "gpu-2", "gpu-3"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-3"}, + convertIntToAffinityDimension(2): {"gpu-1", "gpu-2", "gpu-3"}, }, }, "gpu-5": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-6"}, - convertIntToAffinityPriority(2): {"gpu-6", "gpu-7", "gpu-8"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-6"}, + convertIntToAffinityDimension(2): {"gpu-6", "gpu-7", "gpu-8"}, }, }, "gpu-6": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-5"}, - convertIntToAffinityPriority(2): {"gpu-5", "gpu-7", "gpu-8"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-5"}, + convertIntToAffinityDimension(2): {"gpu-5", "gpu-7", "gpu-8"}, }, }, "gpu-7": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-8"}, - convertIntToAffinityPriority(2): {"gpu-5", "gpu-6", "gpu-8"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-8"}, + convertIntToAffinityDimension(2): {"gpu-5", "gpu-6", "gpu-8"}, }, }, "gpu-8": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-7"}, - convertIntToAffinityPriority(2): {"gpu-5", "gpu-6", "gpu-7"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-7"}, + convertIntToAffinityDimension(2): {"gpu-5", "gpu-6", "gpu-7"}, }, }, }, @@ -961,45 +906,46 @@ func TestBind_NumberOfDevicesAllocated(t *testing.T) { DeviceRequest: 4, }, DeviceTopology: &machine.DeviceTopology{ + PriorityDimensions: []string{"0", "1"}, Devices: map[string]machine.DeviceInfo{ "gpu-1": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-2"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-2"}, }, }, "gpu-2": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-1"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-1"}, }, }, "gpu-3": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-4"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-4"}, }, }, "gpu-4": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-3"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-3"}, }, }, "gpu-5": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-6"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-6"}, }, }, "gpu-6": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-5"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-5"}, }, }, "gpu-7": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-8"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-8"}, }, }, "gpu-8": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-7"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-7"}, }, }, }, @@ -1038,45 +984,46 @@ func TestBind_NumberOfDevicesAllocated(t *testing.T) { DeviceRequest: 6, }, DeviceTopology: &machine.DeviceTopology{ + PriorityDimensions: []string{"0", "1"}, Devices: map[string]machine.DeviceInfo{ "gpu-1": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-2"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-2"}, }, }, "gpu-2": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-1"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-1"}, }, }, "gpu-3": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-4"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-4"}, }, }, "gpu-4": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-3"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-3"}, }, }, "gpu-5": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-6"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-6"}, }, }, "gpu-6": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-5"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-5"}, }, }, "gpu-7": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-8"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-8"}, }, }, "gpu-8": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-7"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-7"}, }, }, }, @@ -1105,65 +1052,66 @@ func TestBind_NumberOfDevicesAllocated(t *testing.T) { }, // 1 level: [gpu-1, gpu-2, gpu-3, gpu-4], [gpu-5, gpu-6, gpu-7, gpu-8], [gpu-9, gpu-10, gpu-11, gpu-12] DeviceTopology: &machine.DeviceTopology{ + PriorityDimensions: []string{"0", "1"}, Devices: map[string]machine.DeviceInfo{ "gpu-1": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-2", "gpu-3", "gpu-4"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-2", "gpu-3", "gpu-4"}, }, }, "gpu-2": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-1", "gpu-3", "gpu-4"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-1", "gpu-3", "gpu-4"}, }, }, "gpu-3": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-1", "gpu-2", "gpu-4"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-1", "gpu-2", "gpu-4"}, }, }, "gpu-4": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-1", "gpu-2", "gpu-3"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-1", "gpu-2", "gpu-3"}, }, }, "gpu-5": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-6", "gpu-7", "gpu-8"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-6", "gpu-7", "gpu-8"}, }, }, "gpu-6": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-5", "gpu-7", "gpu-8"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-5", "gpu-7", "gpu-8"}, }, }, "gpu-7": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-5", "gpu-6", "gpu-8"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-5", "gpu-6", "gpu-8"}, }, }, "gpu-8": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-5", "gpu-6", "gpu-7"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-5", "gpu-6", "gpu-7"}, }, }, "gpu-9": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-10", "gpu-11", "gpu-12"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-10", "gpu-11", "gpu-12"}, }, }, "gpu-10": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-9", "gpu-11", "gpu-12"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-9", "gpu-11", "gpu-12"}, }, }, "gpu-11": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-9", "gpu-10", "gpu-12"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-9", "gpu-10", "gpu-12"}, }, }, "gpu-12": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-9", "gpu-10", "gpu-11"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-9", "gpu-10", "gpu-11"}, }, }, }, @@ -1190,65 +1138,66 @@ func TestBind_NumberOfDevicesAllocated(t *testing.T) { DeviceRequest: 8, }, DeviceTopology: &machine.DeviceTopology{ + PriorityDimensions: []string{"0", "1"}, Devices: map[string]machine.DeviceInfo{ "gpu-1": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-2", "gpu-3", "gpu-4"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-2", "gpu-3", "gpu-4"}, }, }, "gpu-2": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-1", "gpu-3", "gpu-4"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-1", "gpu-3", "gpu-4"}, }, }, "gpu-3": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-1", "gpu-2", "gpu-4"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-1", "gpu-2", "gpu-4"}, }, }, "gpu-4": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-1", "gpu-2", "gpu-3"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-1", "gpu-2", "gpu-3"}, }, }, "gpu-5": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-6", "gpu-7", "gpu-8"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-6", "gpu-7", "gpu-8"}, }, }, "gpu-6": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-5", "gpu-7", "gpu-8"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-5", "gpu-7", "gpu-8"}, }, }, "gpu-7": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-5", "gpu-6", "gpu-8"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-5", "gpu-6", "gpu-8"}, }, }, "gpu-8": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-5", "gpu-6", "gpu-7"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-5", "gpu-6", "gpu-7"}, }, }, "gpu-9": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-10", "gpu-11", "gpu-12"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-10", "gpu-11", "gpu-12"}, }, }, "gpu-10": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-9", "gpu-11", "gpu-12"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-9", "gpu-11", "gpu-12"}, }, }, "gpu-11": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-9", "gpu-10", "gpu-12"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-9", "gpu-10", "gpu-12"}, }, }, "gpu-12": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-9", "gpu-10", "gpu-11"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-9", "gpu-10", "gpu-11"}, }, }, }, @@ -1279,77 +1228,78 @@ func TestBind_NumberOfDevicesAllocated(t *testing.T) { // Level 0: [gpu-1, gpu-2], [gpu-3, gpu-4], [gpu-5, gpu-6], [gpu-7, gpu-8], [gpu-9, gpu-10], [gpu-11, gpu-12] // Level 1: [gpu-1, gpu-2, gpu-3, gpu-4], [gpu-5, gpu-6, gpu-7, gpu-8], [gpu-9, gpu-10, gpu-11, gpu-12] DeviceTopology: &machine.DeviceTopology{ + PriorityDimensions: []string{"0", "1"}, Devices: map[string]machine.DeviceInfo{ "gpu-1": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-2"}, - convertIntToAffinityPriority(1): {"gpu-2", "gpu-3", "gpu-4"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-2"}, + convertIntToAffinityDimension(1): {"gpu-2", "gpu-3", "gpu-4"}, }, }, "gpu-2": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-1"}, - convertIntToAffinityPriority(1): {"gpu-1", "gpu-3", "gpu-4"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-1"}, + convertIntToAffinityDimension(1): {"gpu-1", "gpu-3", "gpu-4"}, }, }, "gpu-3": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-4"}, - convertIntToAffinityPriority(1): {"gpu-1", "gpu-2", "gpu-4"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-4"}, + convertIntToAffinityDimension(1): {"gpu-1", "gpu-2", "gpu-4"}, }, }, "gpu-4": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-3"}, - convertIntToAffinityPriority(1): {"gpu-1", "gpu-2", "gpu-3"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-3"}, + convertIntToAffinityDimension(1): {"gpu-1", "gpu-2", "gpu-3"}, }, }, "gpu-5": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-6"}, - convertIntToAffinityPriority(1): {"gpu-6", "gpu-7", "gpu-8"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-6"}, + convertIntToAffinityDimension(1): {"gpu-6", "gpu-7", "gpu-8"}, }, }, "gpu-6": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-5"}, - convertIntToAffinityPriority(1): {"gpu-5", "gpu-7", "gpu-8"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-5"}, + convertIntToAffinityDimension(1): {"gpu-5", "gpu-7", "gpu-8"}, }, }, "gpu-7": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-8"}, - convertIntToAffinityPriority(1): {"gpu-5", "gpu-6", "gpu-8"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-8"}, + convertIntToAffinityDimension(1): {"gpu-5", "gpu-6", "gpu-8"}, }, }, "gpu-8": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-7"}, - convertIntToAffinityPriority(1): {"gpu-5", "gpu-6", "gpu-7"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-7"}, + convertIntToAffinityDimension(1): {"gpu-5", "gpu-6", "gpu-7"}, }, }, "gpu-9": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-10"}, - convertIntToAffinityPriority(1): {"gpu-10", "gpu-11", "gpu-12"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-10"}, + convertIntToAffinityDimension(1): {"gpu-10", "gpu-11", "gpu-12"}, }, }, "gpu-10": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-9"}, - convertIntToAffinityPriority(1): {"gpu-9", "gpu-11", "gpu-12"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-9"}, + convertIntToAffinityDimension(1): {"gpu-9", "gpu-11", "gpu-12"}, }, }, "gpu-11": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-12"}, - convertIntToAffinityPriority(1): {"gpu-9", "gpu-10", "gpu-12"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-12"}, + convertIntToAffinityDimension(1): {"gpu-9", "gpu-10", "gpu-12"}, }, }, "gpu-12": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-11"}, - convertIntToAffinityPriority(1): {"gpu-9", "gpu-10", "gpu-11"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-11"}, + convertIntToAffinityDimension(1): {"gpu-9", "gpu-10", "gpu-11"}, }, }, }, @@ -1391,53 +1341,54 @@ func TestBind_NumberOfDevicesAllocated(t *testing.T) { // Level 0: [gpu-1, gpu-2], [gpu-3, gpu-4], [gpu-5, gpu-6], [gpu-7, gpu-8] // Level 1: [gpu-1, gpu-2, gpu-3, gpu-4], [gpu-5, gpu-6, gpu-7, gpu-8] DeviceTopology: &machine.DeviceTopology{ + PriorityDimensions: []string{"0", "1"}, Devices: map[string]machine.DeviceInfo{ "gpu-1": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-2"}, - convertIntToAffinityPriority(1): {"gpu-2", "gpu-3", "gpu-4"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-2"}, + convertIntToAffinityDimension(1): {"gpu-2", "gpu-3", "gpu-4"}, }, }, "gpu-2": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-1"}, - convertIntToAffinityPriority(1): {"gpu-1", "gpu-3", "gpu-4"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-1"}, + convertIntToAffinityDimension(1): {"gpu-1", "gpu-3", "gpu-4"}, }, }, "gpu-3": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-4"}, - convertIntToAffinityPriority(1): {"gpu-1", "gpu-2", "gpu-4"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-4"}, + convertIntToAffinityDimension(1): {"gpu-1", "gpu-2", "gpu-4"}, }, }, "gpu-4": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-3"}, - convertIntToAffinityPriority(1): {"gpu-1", "gpu-2", "gpu-3"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-3"}, + convertIntToAffinityDimension(1): {"gpu-1", "gpu-2", "gpu-3"}, }, }, "gpu-5": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-6"}, - convertIntToAffinityPriority(1): {"gpu-6", "gpu-7", "gpu-8"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-6"}, + convertIntToAffinityDimension(1): {"gpu-6", "gpu-7", "gpu-8"}, }, }, "gpu-6": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-5"}, - convertIntToAffinityPriority(1): {"gpu-5", "gpu-7", "gpu-8"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-5"}, + convertIntToAffinityDimension(1): {"gpu-5", "gpu-7", "gpu-8"}, }, }, "gpu-7": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-8"}, - convertIntToAffinityPriority(1): {"gpu-5", "gpu-6", "gpu-8"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-8"}, + convertIntToAffinityDimension(1): {"gpu-5", "gpu-6", "gpu-8"}, }, }, "gpu-8": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-7"}, - convertIntToAffinityPriority(1): {"gpu-5", "gpu-6", "gpu-7"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-7"}, + convertIntToAffinityDimension(1): {"gpu-5", "gpu-6", "gpu-7"}, }, }, }, @@ -1464,29 +1415,30 @@ func TestBind_NumberOfDevicesAllocated(t *testing.T) { DeviceRequest: 2, }, DeviceTopology: &machine.DeviceTopology{ + PriorityDimensions: []string{"0", "1"}, Devices: map[string]machine.DeviceInfo{ "gpu-1": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-2"}, - convertIntToAffinityPriority(1): {"gpu-2", "gpu-3", "gpu-4"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-2"}, + convertIntToAffinityDimension(1): {"gpu-2", "gpu-3", "gpu-4"}, }, }, "gpu-2": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-1"}, - convertIntToAffinityPriority(1): {"gpu-1", "gpu-3", "gpu-4"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-1"}, + convertIntToAffinityDimension(1): {"gpu-1", "gpu-3", "gpu-4"}, }, }, "gpu-3": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-4"}, - convertIntToAffinityPriority(1): {"gpu-1", "gpu-2", "gpu-4"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-4"}, + convertIntToAffinityDimension(1): {"gpu-1", "gpu-2", "gpu-4"}, }, }, "gpu-4": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-3"}, - convertIntToAffinityPriority(1): {"gpu-1", "gpu-2", "gpu-3"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-3"}, + convertIntToAffinityDimension(1): {"gpu-1", "gpu-2", "gpu-3"}, }, }, }, @@ -1515,53 +1467,54 @@ func TestBind_NumberOfDevicesAllocated(t *testing.T) { // Level 0: [gpu-1, gpu-2], [gpu-3, gpu-4], [gpu-5, gpu-6], [gpu-7, gpu-8] // Level 1: [gpu-1, gpu-2, gpu-3, gpu-4], [gpu-5, gpu-6, gpu-7, gpu-8] DeviceTopology: &machine.DeviceTopology{ + PriorityDimensions: []string{"0", "1"}, Devices: map[string]machine.DeviceInfo{ "gpu-1": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-2"}, - convertIntToAffinityPriority(1): {"gpu-2", "gpu-3", "gpu-4"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-2"}, + convertIntToAffinityDimension(1): {"gpu-2", "gpu-3", "gpu-4"}, }, }, "gpu-2": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-1"}, - convertIntToAffinityPriority(1): {"gpu-1", "gpu-3", "gpu-4"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-1"}, + convertIntToAffinityDimension(1): {"gpu-1", "gpu-3", "gpu-4"}, }, }, "gpu-3": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-4"}, - convertIntToAffinityPriority(1): {"gpu-1", "gpu-2", "gpu-4"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-4"}, + convertIntToAffinityDimension(1): {"gpu-1", "gpu-2", "gpu-4"}, }, }, "gpu-4": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-3"}, - convertIntToAffinityPriority(1): {"gpu-1", "gpu-2", "gpu-3"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-3"}, + convertIntToAffinityDimension(1): {"gpu-1", "gpu-2", "gpu-3"}, }, }, "gpu-5": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-6"}, - convertIntToAffinityPriority(1): {"gpu-6", "gpu-7", "gpu-8"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-6"}, + convertIntToAffinityDimension(1): {"gpu-6", "gpu-7", "gpu-8"}, }, }, "gpu-6": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-5"}, - convertIntToAffinityPriority(1): {"gpu-5", "gpu-7", "gpu-8"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-5"}, + convertIntToAffinityDimension(1): {"gpu-5", "gpu-7", "gpu-8"}, }, }, "gpu-7": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-8"}, - convertIntToAffinityPriority(1): {"gpu-5", "gpu-6", "gpu-8"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-8"}, + convertIntToAffinityDimension(1): {"gpu-5", "gpu-6", "gpu-8"}, }, }, "gpu-8": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-7"}, - convertIntToAffinityPriority(1): {"gpu-5", "gpu-6", "gpu-7"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-7"}, + convertIntToAffinityDimension(1): {"gpu-5", "gpu-6", "gpu-7"}, }, }, }, @@ -1593,148 +1546,101 @@ func TestBind_NumberOfDevicesAllocated(t *testing.T) { // Level 0: [gpu-1, gpu-2], [gpu-3, gpu-4], [gpu-5, gpu-6], [gpu-7, gpu-8] // Level 1: [gpu-1, gpu-2, gpu-3, gpu-4], [gpu-5, gpu-6, gpu-7, gpu-8] DeviceTopology: &machine.DeviceTopology{ + PriorityDimensions: []string{"numa", "socket"}, Devices: map[string]machine.DeviceInfo{ "gpu-1": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ { - PriorityLevel: 0, - Dimension: machine.Dimension{ - Name: "numa", - Value: "numa-0", - }, + Name: "numa", + Value: "numa-0", }: {"gpu-2"}, { - PriorityLevel: 1, - Dimension: machine.Dimension{ - Name: "socket", - Value: "socket-0", - }, + Name: "socket", + Value: "socket-0", }: {"gpu-2", "gpu-3", "gpu-4"}, }, }, "gpu-2": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ { - PriorityLevel: 0, - Dimension: machine.Dimension{ - Name: "numa", - Value: "numa-0", - }, + Name: "numa", + Value: "numa-0", }: {"gpu-1"}, { - PriorityLevel: 1, - Dimension: machine.Dimension{ - Name: "socket", - Value: "socket-0", - }, + Name: "socket", + Value: "socket-0", }: {"gpu-1", "gpu-3", "gpu-4"}, }, }, "gpu-3": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ { - PriorityLevel: 0, - Dimension: machine.Dimension{ - Name: "numa", - Value: "numa-1", - }, + Name: "numa", + Value: "numa-1", }: {"gpu-4"}, { - PriorityLevel: 1, - Dimension: machine.Dimension{ - Name: "socket", - Value: "socket-0", - }, + Name: "socket", + Value: "socket-0", }: {"gpu-1", "gpu-2", "gpu-4"}, }, }, "gpu-4": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ { - PriorityLevel: 0, - Dimension: machine.Dimension{ - Name: "numa", - Value: "numa-1", - }, + Name: "numa", + Value: "numa-1", }: {"gpu-3"}, { - PriorityLevel: 1, - Dimension: machine.Dimension{ - Name: "socket", - Value: "socket-0", - }, + Name: "socket", + Value: "socket-0", }: {"gpu-1", "gpu-2", "gpu-3"}, }, }, "gpu-5": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ { - PriorityLevel: 0, - Dimension: machine.Dimension{ - Name: "numa", - Value: "numa-2", - }, + Name: "numa", + Value: "numa-2", }: {"gpu-6"}, { - PriorityLevel: 1, - Dimension: machine.Dimension{ - Name: "socket", - Value: "socket-1", - }, + Name: "socket", + Value: "socket-1", }: {"gpu-6", "gpu-7", "gpu-8"}, }, }, "gpu-6": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ { - PriorityLevel: 0, - Dimension: machine.Dimension{ - Name: "numa", - Value: "numa-2", - }, + Name: "numa", + Value: "numa-2", }: {"gpu-5"}, { - PriorityLevel: 1, - Dimension: machine.Dimension{ - Name: "socket", - Value: "socket-1", - }, + Name: "socket", + Value: "socket-1", }: {"gpu-5", "gpu-7", "gpu-8"}, }, }, "gpu-7": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ { - PriorityLevel: 0, - Dimension: machine.Dimension{ - Name: "numa", - Value: "numa-3", - }, + Name: "numa", + Value: "numa-3", }: {"gpu-8"}, { - PriorityLevel: 1, - Dimension: machine.Dimension{ - Name: "socket", - Value: "socket-1", - }, + Name: "socket", + Value: "socket-1", }: {"gpu-5", "gpu-6", "gpu-8"}, }, }, "gpu-8": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ { - PriorityLevel: 0, - Dimension: machine.Dimension{ - Name: "numa", - Value: "numa-3", - }, + Name: "numa", + Value: "numa-3", }: {"gpu-7"}, { - PriorityLevel: 1, - Dimension: machine.Dimension{ - Name: "socket", - Value: "socket-1", - }, + Name: "socket", + Value: "socket-1", }: {"gpu-5", "gpu-6", "gpu-7"}, }, }, @@ -1761,45 +1667,46 @@ func TestBind_NumberOfDevicesAllocated(t *testing.T) { }, // Level 0: [gpu-1, gpu-2], [gpu-3, gpu-4], [gpu-5, gpu-6], [gpu-7, gpu-8] DeviceTopology: &machine.DeviceTopology{ + PriorityDimensions: []string{"0", "1"}, Devices: map[string]machine.DeviceInfo{ "gpu-1": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-2"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-2"}, }, }, "gpu-2": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-1"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-1"}, }, }, "gpu-3": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-4"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-4"}, }, }, "gpu-4": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-3"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-3"}, }, }, "gpu-5": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-6"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-6"}, }, }, "gpu-6": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-5"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-5"}, }, }, "gpu-7": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-8"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-8"}, }, }, "gpu-8": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-7"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-7"}, }, }, }, @@ -1827,25 +1734,26 @@ func TestBind_NumberOfDevicesAllocated(t *testing.T) { DeviceRequest: 4, }, DeviceTopology: &machine.DeviceTopology{ + PriorityDimensions: []string{"0", "1"}, Devices: map[string]machine.DeviceInfo{ "gpu-1": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-2"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-2"}, }, }, "gpu-2": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-1"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-1"}, }, }, "gpu-3": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-4"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-4"}, }, }, "gpu-4": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-3"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-3"}, }, }, }, @@ -1874,53 +1782,54 @@ func TestBind_NumberOfDevicesAllocated(t *testing.T) { // Level 0: [gpu-1, gpu-2], [gpu-3, gpu-4], [gpu-5, gpu-6], [gpu-7, gpu-8] // Level 1: [gpu-1, gpu-2, gpu-3, gpu-4], [gpu-5, gpu-6, gpu-7, gpu-8] DeviceTopology: &machine.DeviceTopology{ + PriorityDimensions: []string{"0", "1"}, Devices: map[string]machine.DeviceInfo{ "gpu-1": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-2"}, - convertIntToAffinityPriority(1): {"gpu-2", "gpu-3", "gpu-4"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-2"}, + convertIntToAffinityDimension(1): {"gpu-2", "gpu-3", "gpu-4"}, }, }, "gpu-2": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-1"}, - convertIntToAffinityPriority(1): {"gpu-1", "gpu-3", "gpu-4"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-1"}, + convertIntToAffinityDimension(1): {"gpu-1", "gpu-3", "gpu-4"}, }, }, "gpu-3": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-4"}, - convertIntToAffinityPriority(1): {"gpu-1", "gpu-2", "gpu-4"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-4"}, + convertIntToAffinityDimension(1): {"gpu-1", "gpu-2", "gpu-4"}, }, }, "gpu-4": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-3"}, - convertIntToAffinityPriority(1): {"gpu-1", "gpu-2", "gpu-3"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-3"}, + convertIntToAffinityDimension(1): {"gpu-1", "gpu-2", "gpu-3"}, }, }, "gpu-5": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-6"}, - convertIntToAffinityPriority(1): {"gpu-6", "gpu-7", "gpu-8"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-6"}, + convertIntToAffinityDimension(1): {"gpu-6", "gpu-7", "gpu-8"}, }, }, "gpu-6": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-5"}, - convertIntToAffinityPriority(1): {"gpu-5", "gpu-7", "gpu-8"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-5"}, + convertIntToAffinityDimension(1): {"gpu-5", "gpu-7", "gpu-8"}, }, }, "gpu-7": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-8"}, - convertIntToAffinityPriority(1): {"gpu-5", "gpu-6", "gpu-8"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-8"}, + convertIntToAffinityDimension(1): {"gpu-5", "gpu-6", "gpu-8"}, }, }, "gpu-8": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-7"}, - convertIntToAffinityPriority(1): {"gpu-5", "gpu-6", "gpu-7"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-7"}, + convertIntToAffinityDimension(1): {"gpu-5", "gpu-6", "gpu-7"}, }, }, }, @@ -1957,101 +1866,102 @@ func TestBind_NumberOfDevicesAllocated(t *testing.T) { DeviceRequest: 2, }, DeviceTopology: &machine.DeviceTopology{ + PriorityDimensions: []string{"0", "1"}, Devices: map[string]machine.DeviceInfo{ "0": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-1", "gpu-2", "gpu-3", "gpu-4"}, - convertIntToAffinityPriority(1): {"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-6", "gpu-7", "gpu-8"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-1", "gpu-2", "gpu-3", "gpu-4"}, + convertIntToAffinityDimension(1): {"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-6", "gpu-7", "gpu-8"}, }, }, "gpu-2": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-1", "gpu-2", "gpu-3", "gpu-4"}, - convertIntToAffinityPriority(1): {"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-6", "gpu-7", "gpu-8"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-1", "gpu-2", "gpu-3", "gpu-4"}, + convertIntToAffinityDimension(1): {"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-6", "gpu-7", "gpu-8"}, }, }, "gpu-3": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-1", "gpu-2", "gpu-3", "gpu-4"}, - convertIntToAffinityPriority(1): {"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-6", "gpu-7", "gpu-8"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-1", "gpu-2", "gpu-3", "gpu-4"}, + convertIntToAffinityDimension(1): {"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-6", "gpu-7", "gpu-8"}, }, }, "gpu-4": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-1", "gpu-2", "gpu-3", "gpu-4"}, - convertIntToAffinityPriority(1): {"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-6", "gpu-7", "gpu-8"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-1", "gpu-2", "gpu-3", "gpu-4"}, + convertIntToAffinityDimension(1): {"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-6", "gpu-7", "gpu-8"}, }, }, "gpu-5": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-5", "gpu-6", "gpu-7", "gpu-8"}, - convertIntToAffinityPriority(1): {"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-6", "gpu-7", "gpu-8"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-5", "gpu-6", "gpu-7", "gpu-8"}, + convertIntToAffinityDimension(1): {"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-6", "gpu-7", "gpu-8"}, }, }, "gpu-6": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-5", "gpu-6", "gpu-7", "gpu-8"}, - convertIntToAffinityPriority(1): {"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-6", "gpu-7", "gpu-8"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-5", "gpu-6", "gpu-7", "gpu-8"}, + convertIntToAffinityDimension(1): {"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-6", "gpu-7", "gpu-8"}, }, }, "gpu-7": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-5", "gpu-6", "gpu-7", "gpu-8"}, - convertIntToAffinityPriority(1): {"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-6", "gpu-7", "gpu-8"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-5", "gpu-6", "gpu-7", "gpu-8"}, + convertIntToAffinityDimension(1): {"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-6", "gpu-7", "gpu-8"}, }, }, "gpu-8": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-5", "gpu-6", "gpu-7", "gpu-8"}, - convertIntToAffinityPriority(1): {"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-6", "gpu-7", "gpu-8"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-5", "gpu-6", "gpu-7", "gpu-8"}, + convertIntToAffinityDimension(1): {"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-6", "gpu-7", "gpu-8"}, }, }, "gpu-9": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-9", "gpu-10", "gpu-11", "gpu-12"}, - convertIntToAffinityPriority(1): {"gpu-9", "gpu-10", "gpu-11", "gpu-12", "gpu-13", "gpu-14", "gpu-15", "gpu-16"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-9", "gpu-10", "gpu-11", "gpu-12"}, + convertIntToAffinityDimension(1): {"gpu-9", "gpu-10", "gpu-11", "gpu-12", "gpu-13", "gpu-14", "gpu-15", "gpu-16"}, }, }, "gpu-10": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-9", "gpu-10", "gpu-11", "gpu-12"}, - convertIntToAffinityPriority(1): {"gpu-9", "gpu-10", "gpu-11", "gpu-12", "gpu-13", "gpu-14", "gpu-15", "gpu-16"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-9", "gpu-10", "gpu-11", "gpu-12"}, + convertIntToAffinityDimension(1): {"gpu-9", "gpu-10", "gpu-11", "gpu-12", "gpu-13", "gpu-14", "gpu-15", "gpu-16"}, }, }, "gpu-11": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-9", "gpu-10", "gpu-11", "gpu-12"}, - convertIntToAffinityPriority(1): {"gpu-9", "gpu-10", "gpu-11", "gpu-12", "gpu-13", "gpu-14", "gpu-15", "gpu-16"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-9", "gpu-10", "gpu-11", "gpu-12"}, + convertIntToAffinityDimension(1): {"gpu-9", "gpu-10", "gpu-11", "gpu-12", "gpu-13", "gpu-14", "gpu-15", "gpu-16"}, }, }, "gpu-12": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-9", "gpu-10", "gpu-11", "gpu-12"}, - convertIntToAffinityPriority(1): {"gpu-9", "gpu-10", "gpu-11", "gpu-12", "gpu-13", "gpu-14", "gpu-15", "gpu-16"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-9", "gpu-10", "gpu-11", "gpu-12"}, + convertIntToAffinityDimension(1): {"gpu-9", "gpu-10", "gpu-11", "gpu-12", "gpu-13", "gpu-14", "gpu-15", "gpu-16"}, }, }, "gpu-13": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-13", "gpu-14", "gpu-15", "gpu-16"}, - convertIntToAffinityPriority(1): {"gpu-9", "gpu-10", "gpu-11", "gpu-12", "gpu-13", "gpu-14", "gpu-15", "gpu-16"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-13", "gpu-14", "gpu-15", "gpu-16"}, + convertIntToAffinityDimension(1): {"gpu-9", "gpu-10", "gpu-11", "gpu-12", "gpu-13", "gpu-14", "gpu-15", "gpu-16"}, }, }, "gpu-14": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-13", "gpu-14", "gpu-15", "gpu-16"}, - convertIntToAffinityPriority(1): {"gpu-9", "gpu-10", "gpu-11", "gpu-12", "gpu-13", "gpu-14", "gpu-15", "gpu-16"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-13", "gpu-14", "gpu-15", "gpu-16"}, + convertIntToAffinityDimension(1): {"gpu-9", "gpu-10", "gpu-11", "gpu-12", "gpu-13", "gpu-14", "gpu-15", "gpu-16"}, }, }, "gpu-15": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-13", "gpu-14", "gpu-15", "gpu-16"}, - convertIntToAffinityPriority(1): {"gpu-9", "gpu-10", "gpu-11", "gpu-12", "gpu-13", "gpu-14", "gpu-15", "gpu-16"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-13", "gpu-14", "gpu-15", "gpu-16"}, + convertIntToAffinityDimension(1): {"gpu-9", "gpu-10", "gpu-11", "gpu-12", "gpu-13", "gpu-14", "gpu-15", "gpu-16"}, }, }, "gpu-16": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-13", "gpu-14", "gpu-15", "gpu-16"}, - convertIntToAffinityPriority(1): {"gpu-9", "gpu-10", "gpu-11", "gpu-12", "gpu-13", "gpu-14", "gpu-15", "gpu-16"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-13", "gpu-14", "gpu-15", "gpu-16"}, + convertIntToAffinityDimension(1): {"gpu-9", "gpu-10", "gpu-11", "gpu-12", "gpu-13", "gpu-14", "gpu-15", "gpu-16"}, }, }, }, @@ -2076,53 +1986,54 @@ func TestBind_NumberOfDevicesAllocated(t *testing.T) { // Level 0: [gpu-0, gpu-1], [gpu-4, gpu-5], [gpu-2, gpu-3], [gpu-6, gpu-7] // Level 1: [gpu-0, gpu-1, gpu-4, gpu-5], [gpu-2, gpu-3, gpu-6, gpu-7] DeviceTopology: &machine.DeviceTopology{ + PriorityDimensions: []string{"0", "1"}, Devices: map[string]machine.DeviceInfo{ "gpu-0": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-1"}, - convertIntToAffinityPriority(1): {"gpu-1", "gpu-4", "gpu-5"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-1"}, + convertIntToAffinityDimension(1): {"gpu-1", "gpu-4", "gpu-5"}, }, }, "gpu-1": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-0"}, - convertIntToAffinityPriority(1): {"gpu-0", "gpu-4", "gpu-5"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-0"}, + convertIntToAffinityDimension(1): {"gpu-0", "gpu-4", "gpu-5"}, }, }, "gpu-2": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-3"}, - convertIntToAffinityPriority(1): {"gpu-3", "gpu-6", "gpu-7"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-3"}, + convertIntToAffinityDimension(1): {"gpu-3", "gpu-6", "gpu-7"}, }, }, "gpu-3": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-2"}, - convertIntToAffinityPriority(1): {"gpu-2", "gpu-6", "gpu-7"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-2"}, + convertIntToAffinityDimension(1): {"gpu-2", "gpu-6", "gpu-7"}, }, }, "gpu-4": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-5"}, - convertIntToAffinityPriority(1): {"gpu-0", "gpu-1", "gpu-5"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-5"}, + convertIntToAffinityDimension(1): {"gpu-0", "gpu-1", "gpu-5"}, }, }, "gpu-5": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-4"}, - convertIntToAffinityPriority(1): {"gpu-0", "gpu-1", "gpu-4"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-4"}, + convertIntToAffinityDimension(1): {"gpu-0", "gpu-1", "gpu-4"}, }, }, "gpu-6": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-7"}, - convertIntToAffinityPriority(1): {"gpu-2", "gpu-3", "gpu-7"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-7"}, + convertIntToAffinityDimension(1): {"gpu-2", "gpu-3", "gpu-7"}, }, }, "gpu-7": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-6"}, - convertIntToAffinityPriority(1): {"gpu-2", "gpu-3", "gpu-6"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-6"}, + convertIntToAffinityDimension(1): {"gpu-2", "gpu-3", "gpu-6"}, }, }, }, @@ -2149,148 +2060,101 @@ func TestBind_NumberOfDevicesAllocated(t *testing.T) { // Level 0: [gpu-0, gpu-1], [gpu-4, gpu-5], [gpu-2, gpu-3], [gpu-6, gpu-7] // Level 1: [gpu-0, gpu-1, gpu-4, gpu-5], [gpu-2, gpu-3, gpu-6, gpu-7] DeviceTopology: &machine.DeviceTopology{ + PriorityDimensions: []string{"numa", "socket"}, Devices: map[string]machine.DeviceInfo{ "gpu-0": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ { - PriorityLevel: 0, - Dimension: machine.Dimension{ - Name: "numa", - Value: "numa-0", - }, + Name: "numa", + Value: "numa-0", }: {"gpu-1"}, { - PriorityLevel: 1, - Dimension: machine.Dimension{ - Name: "socket", - Value: "socket-0", - }, + Name: "socket", + Value: "socket-0", }: {"gpu-1", "gpu-4", "gpu-5"}, }, }, "gpu-1": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ { - PriorityLevel: 0, - Dimension: machine.Dimension{ - Name: "numa", - Value: "numa-0", - }, + Name: "numa", + Value: "numa-0", }: {"gpu-0"}, { - PriorityLevel: 1, - Dimension: machine.Dimension{ - Name: "socket", - Value: "socket-0", - }, + Name: "socket", + Value: "socket-0", }: {"gpu-0", "gpu-4", "gpu-5"}, }, }, "gpu-2": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ { - PriorityLevel: 0, - Dimension: machine.Dimension{ - Name: "numa", - Value: "numa-2", - }, + Name: "numa", + Value: "numa-2", }: {"gpu-3"}, { - PriorityLevel: 1, - Dimension: machine.Dimension{ - Name: "socket", - Value: "socket-1", - }, + Name: "socket", + Value: "socket-1", }: {"gpu-3", "gpu-6", "gpu-7"}, }, }, "gpu-3": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ { - PriorityLevel: 0, - Dimension: machine.Dimension{ - Name: "numa", - Value: "numa-2", - }, + Name: "numa", + Value: "numa-2", }: {"gpu-2"}, { - PriorityLevel: 1, - Dimension: machine.Dimension{ - Name: "socket", - Value: "socket-1", - }, + Name: "socket", + Value: "socket-1", }: {"gpu-2", "gpu-6", "gpu-7"}, }, }, "gpu-4": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ { - PriorityLevel: 0, - Dimension: machine.Dimension{ - Name: "numa", - Value: "numa-1", - }, + Name: "numa", + Value: "numa-1", }: {"gpu-5"}, { - PriorityLevel: 1, - Dimension: machine.Dimension{ - Name: "socket", - Value: "socket-0", - }, + Name: "socket", + Value: "socket-0", }: {"gpu-0", "gpu-1", "gpu-5"}, }, }, "gpu-5": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ { - PriorityLevel: 0, - Dimension: machine.Dimension{ - Name: "numa", - Value: "numa-1", - }, + Name: "numa", + Value: "numa-1", }: {"gpu-4"}, { - PriorityLevel: 1, - Dimension: machine.Dimension{ - Name: "socket", - Value: "socket-0", - }, + Name: "socket", + Value: "socket-0", }: {"gpu-0", "gpu-1", "gpu-4"}, }, }, "gpu-6": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ { - PriorityLevel: 0, - Dimension: machine.Dimension{ - Name: "numa", - Value: "numa-3", - }, + Name: "numa", + Value: "numa-3", }: {"gpu-7"}, { - PriorityLevel: 1, - Dimension: machine.Dimension{ - Name: "socket", - Value: "socket-1", - }, + Name: "socket", + Value: "socket-1", }: {"gpu-2", "gpu-3", "gpu-7"}, }, }, "gpu-7": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ { - PriorityLevel: 0, - Dimension: machine.Dimension{ - Name: "numa", - Value: "numa-3", - }, + Name: "numa", + Value: "numa-3", }: {"gpu-6"}, { - PriorityLevel: 1, - Dimension: machine.Dimension{ - Name: "socket", - Value: "socket-1", - }, + Name: "socket", + Value: "socket-1", }: {"gpu-2", "gpu-3", "gpu-6"}, }, }, @@ -2364,6 +2228,11 @@ func TestBind_NumberOfDevicesAllocated(t *testing.T) { t.Run(tt.name, func(t *testing.T) { t.Parallel() deviceBindingStrategy := NewDeviceAffinityStrategy() + if tt.ctx.GPUQRMPluginConfig == nil { + tt.ctx.GPUQRMPluginConfig = qrmconfig.NewGPUQRMPluginConfig() + // Keep tests focused on affinity logic rather than admission strictness. + tt.ctx.GPUQRMPluginConfig.RequiredDeviceAffinity = false + } tt.ctx.Emitter = metrics.DummyMetrics{} result, err := deviceBindingStrategy.Bind(tt.ctx, tt.sortedDevices) if (err != nil) != tt.expectedErr { @@ -2397,45 +2266,46 @@ func TestBind_DeviceAffinity(t *testing.T) { DeviceRequest: 2, }, DeviceTopology: &machine.DeviceTopology{ + PriorityDimensions: []string{"0", "1"}, Devices: map[string]machine.DeviceInfo{ "gpu-1": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-2"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-2"}, }, }, "gpu-2": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-1"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-1"}, }, }, "gpu-3": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-4"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-4"}, }, }, "gpu-4": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-3"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-3"}, }, }, "gpu-5": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-6"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-6"}, }, }, "gpu-6": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-5"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-5"}, }, }, "gpu-7": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-8"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-8"}, }, }, "gpu-8": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-7"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-7"}, }, }, }, @@ -2457,53 +2327,54 @@ func TestBind_DeviceAffinity(t *testing.T) { DeviceRequest: 4, }, DeviceTopology: &machine.DeviceTopology{ + PriorityDimensions: []string{"0", "1"}, Devices: map[string]machine.DeviceInfo{ "gpu-1": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-2"}, - convertIntToAffinityPriority(1): {"gpu-2", "gpu-3", "gpu-4"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-2"}, + convertIntToAffinityDimension(1): {"gpu-2", "gpu-3", "gpu-4"}, }, }, "gpu-2": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-1"}, - convertIntToAffinityPriority(1): {"gpu-1", "gpu-3", "gpu-4"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-1"}, + convertIntToAffinityDimension(1): {"gpu-1", "gpu-3", "gpu-4"}, }, }, "gpu-3": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-4"}, - convertIntToAffinityPriority(1): {"gpu-1", "gpu-2", "gpu-4"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-4"}, + convertIntToAffinityDimension(1): {"gpu-1", "gpu-2", "gpu-4"}, }, }, "gpu-4": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-3"}, - convertIntToAffinityPriority(1): {"gpu-1", "gpu-2", "gpu-3"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-3"}, + convertIntToAffinityDimension(1): {"gpu-1", "gpu-2", "gpu-3"}, }, }, "gpu-5": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-6"}, - convertIntToAffinityPriority(1): {"gpu-6", "gpu-7", "gpu-8"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-6"}, + convertIntToAffinityDimension(1): {"gpu-6", "gpu-7", "gpu-8"}, }, }, "gpu-6": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-5"}, - convertIntToAffinityPriority(1): {"gpu-5", "gpu-7", "gpu-8"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-5"}, + convertIntToAffinityDimension(1): {"gpu-5", "gpu-7", "gpu-8"}, }, }, "gpu-7": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-8"}, - convertIntToAffinityPriority(1): {"gpu-5", "gpu-6", "gpu-8"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-8"}, + convertIntToAffinityDimension(1): {"gpu-5", "gpu-6", "gpu-8"}, }, }, "gpu-8": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-7"}, - convertIntToAffinityPriority(1): {"gpu-5", "gpu-6", "gpu-7"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-7"}, + convertIntToAffinityDimension(1): {"gpu-5", "gpu-6", "gpu-7"}, }, }, }, @@ -2525,60 +2396,63 @@ func TestBind_DeviceAffinity(t *testing.T) { DeviceRequest: 4, }, DeviceTopology: &machine.DeviceTopology{ + PriorityDimensions: []string{"0", "2"}, Devices: map[string]machine.DeviceInfo{ "gpu-1": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-2"}, - convertIntToAffinityPriority(2): {"gpu-2", "gpu-3", "gpu-4"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-2"}, + convertIntToAffinityDimension(2): {"gpu-2", "gpu-3", "gpu-4"}, }, }, "gpu-2": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-1"}, - convertIntToAffinityPriority(2): {"gpu-1", "gpu-3", "gpu-4"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-1"}, + convertIntToAffinityDimension(2): {"gpu-1", "gpu-3", "gpu-4"}, }, }, "gpu-3": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-4"}, - convertIntToAffinityPriority(2): {"gpu-1", "gpu-2", "gpu-4"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-4"}, + convertIntToAffinityDimension(2): {"gpu-1", "gpu-2", "gpu-4"}, }, }, "gpu-4": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-3"}, - convertIntToAffinityPriority(2): {"gpu-1", "gpu-2", "gpu-3"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-3"}, + convertIntToAffinityDimension(2): {"gpu-1", "gpu-2", "gpu-3"}, }, }, "gpu-5": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-6"}, - convertIntToAffinityPriority(2): {"gpu-6", "gpu-7", "gpu-8"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-6"}, + convertIntToAffinityDimension(2): {"gpu-6", "gpu-7", "gpu-8"}, }, }, "gpu-6": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-5"}, - convertIntToAffinityPriority(2): {"gpu-5", "gpu-7", "gpu-8"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-5"}, + convertIntToAffinityDimension(2): {"gpu-5", "gpu-7", "gpu-8"}, }, }, "gpu-7": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-8"}, - convertIntToAffinityPriority(2): {"gpu-5", "gpu-6", "gpu-8"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-8"}, + convertIntToAffinityDimension(2): {"gpu-5", "gpu-6", "gpu-8"}, }, }, "gpu-8": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-7"}, - convertIntToAffinityPriority(2): {"gpu-5", "gpu-6", "gpu-7"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-7"}, + convertIntToAffinityDimension(2): {"gpu-5", "gpu-6", "gpu-7"}, }, }, }, }, }, - sortedDevices: []string{"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-6", "gpu-7", "gpu-8"}, - expectedAffinityPriorityLevel: 2, + sortedDevices: []string{"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-6", "gpu-7", "gpu-8"}, + // Priority levels are positional indices in DeviceTopology.PriorityDimensions. + // Here, dimension name "2" is the second (index=1) priority dimension. + expectedAffinityPriorityLevel: 1, }, { name: "4 devices in affinity priority 0, 8 devices in affinity priority 1, allocate 4 devices", @@ -2593,101 +2467,102 @@ func TestBind_DeviceAffinity(t *testing.T) { DeviceRequest: 4, }, DeviceTopology: &machine.DeviceTopology{ + PriorityDimensions: []string{"0", "1"}, Devices: map[string]machine.DeviceInfo{ "gpu-1": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-2", "gpu-3", "gpu-4"}, - convertIntToAffinityPriority(1): {"gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-6", "gpu-7", "gpu-8"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-2", "gpu-3", "gpu-4"}, + convertIntToAffinityDimension(1): {"gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-6", "gpu-7", "gpu-8"}, }, }, "gpu-2": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-1", "gpu-3", "gpu-4"}, - convertIntToAffinityPriority(1): {"gpu-1", "gpu-3", "gpu-4", "gpu-5", "gpu-6", "gpu-7", "gpu-8"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-1", "gpu-3", "gpu-4"}, + convertIntToAffinityDimension(1): {"gpu-1", "gpu-3", "gpu-4", "gpu-5", "gpu-6", "gpu-7", "gpu-8"}, }, }, "gpu-3": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-1", "gpu-2", "gpu-4"}, - convertIntToAffinityPriority(1): {"gpu-1", "gpu-2", "gpu-4", "gpu-5", "gpu-6", "gpu-7", "gpu-8"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-1", "gpu-2", "gpu-4"}, + convertIntToAffinityDimension(1): {"gpu-1", "gpu-2", "gpu-4", "gpu-5", "gpu-6", "gpu-7", "gpu-8"}, }, }, "gpu-4": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-1", "gpu-2", "gpu-3"}, - convertIntToAffinityPriority(1): {"gpu-1", "gpu-2", "gpu-3", "gpu-5", "gpu-6", "gpu-7", "gpu-8"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-1", "gpu-2", "gpu-3"}, + convertIntToAffinityDimension(1): {"gpu-1", "gpu-2", "gpu-3", "gpu-5", "gpu-6", "gpu-7", "gpu-8"}, }, }, "gpu-5": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-6", "gpu-7", "gpu-8"}, - convertIntToAffinityPriority(1): {"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-6", "gpu-7", "gpu-8"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-6", "gpu-7", "gpu-8"}, + convertIntToAffinityDimension(1): {"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-6", "gpu-7", "gpu-8"}, }, }, "gpu-6": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-5", "gpu-7", "gpu-8"}, - convertIntToAffinityPriority(1): {"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-7", "gpu-8"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-5", "gpu-7", "gpu-8"}, + convertIntToAffinityDimension(1): {"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-7", "gpu-8"}, }, }, "gpu-7": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-5", "gpu-6", "gpu-8"}, - convertIntToAffinityPriority(1): {"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-6", "gpu-8"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-5", "gpu-6", "gpu-8"}, + convertIntToAffinityDimension(1): {"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-6", "gpu-8"}, }, }, "gpu-8": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-5", "gpu-6", "gpu-7"}, - convertIntToAffinityPriority(1): {"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-6", "gpu-7"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-5", "gpu-6", "gpu-7"}, + convertIntToAffinityDimension(1): {"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-6", "gpu-7"}, }, }, "gpu-9": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-10", "gpu-11", "gpu-12"}, - convertIntToAffinityPriority(1): {"gpu-10", "gpu-11", "gpu-12", "gpu-13", "gpu-14", "gpu-15", "gpu-16"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-10", "gpu-11", "gpu-12"}, + convertIntToAffinityDimension(1): {"gpu-10", "gpu-11", "gpu-12", "gpu-13", "gpu-14", "gpu-15", "gpu-16"}, }, }, "gpu-10": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-9", "gpu-11", "gpu-12"}, - convertIntToAffinityPriority(1): {"gpu-9", "gpu-11", "gpu-12", "gpu-13", "gpu-14", "gpu-15", "gpu-16"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-9", "gpu-11", "gpu-12"}, + convertIntToAffinityDimension(1): {"gpu-9", "gpu-11", "gpu-12", "gpu-13", "gpu-14", "gpu-15", "gpu-16"}, }, }, "gpu-11": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-9", "gpu-10", "gpu-12"}, - convertIntToAffinityPriority(1): {"gpu-9", "gpu-10", "gpu-12", "gpu-13", "gpu-14", "gpu-15", "gpu-16"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-9", "gpu-10", "gpu-12"}, + convertIntToAffinityDimension(1): {"gpu-9", "gpu-10", "gpu-12", "gpu-13", "gpu-14", "gpu-15", "gpu-16"}, }, }, "gpu-12": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-9", "gpu-10", "gpu-11"}, - convertIntToAffinityPriority(1): {"gpu-9", "gpu-10", "gpu-11", "gpu-13", "gpu-14", "gpu-15", "gpu-16"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-9", "gpu-10", "gpu-11"}, + convertIntToAffinityDimension(1): {"gpu-9", "gpu-10", "gpu-11", "gpu-13", "gpu-14", "gpu-15", "gpu-16"}, }, }, "gpu-13": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-14", "gpu-15", "gpu-16"}, - convertIntToAffinityPriority(1): {"gpu-9", "gpu-10", "gpu-11", "gpu-12", "gpu-14", "gpu-15", "gpu-16"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-14", "gpu-15", "gpu-16"}, + convertIntToAffinityDimension(1): {"gpu-9", "gpu-10", "gpu-11", "gpu-12", "gpu-14", "gpu-15", "gpu-16"}, }, }, "gpu-14": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-13", "gpu-15", "gpu-16"}, - convertIntToAffinityPriority(1): {"gpu-9", "gpu-10", "gpu-11", "gpu-12", "gpu-13", "gpu-15", "gpu-16"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-13", "gpu-15", "gpu-16"}, + convertIntToAffinityDimension(1): {"gpu-9", "gpu-10", "gpu-11", "gpu-12", "gpu-13", "gpu-15", "gpu-16"}, }, }, "gpu-15": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-13", "gpu-14", "gpu-16"}, - convertIntToAffinityPriority(1): {"gpu-9", "gpu-10", "gpu-11", "gpu-12", "gpu-13", "gpu-14", "gpu-16"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-13", "gpu-14", "gpu-16"}, + convertIntToAffinityDimension(1): {"gpu-9", "gpu-10", "gpu-11", "gpu-12", "gpu-13", "gpu-14", "gpu-16"}, }, }, "gpu-16": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-13", "gpu-14", "gpu-15"}, - convertIntToAffinityPriority(1): {"gpu-9", "gpu-10", "gpu-11", "gpu-12", "gpu-13", "gpu-14", "gpu-15"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-13", "gpu-14", "gpu-15"}, + convertIntToAffinityDimension(1): {"gpu-9", "gpu-10", "gpu-11", "gpu-12", "gpu-13", "gpu-14", "gpu-15"}, }, }, }, @@ -2709,101 +2584,102 @@ func TestBind_DeviceAffinity(t *testing.T) { DeviceRequest: 8, }, DeviceTopology: &machine.DeviceTopology{ + PriorityDimensions: []string{"0", "1"}, Devices: map[string]machine.DeviceInfo{ "gpu-1": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-2", "gpu-3", "gpu-4"}, - convertIntToAffinityPriority(1): {"gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-6", "gpu-7", "gpu-8"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-2", "gpu-3", "gpu-4"}, + convertIntToAffinityDimension(1): {"gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-6", "gpu-7", "gpu-8"}, }, }, "gpu-2": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-1", "gpu-3", "gpu-4"}, - convertIntToAffinityPriority(1): {"gpu-1", "gpu-3", "gpu-4", "gpu-5", "gpu-6", "gpu-7", "gpu-8"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-1", "gpu-3", "gpu-4"}, + convertIntToAffinityDimension(1): {"gpu-1", "gpu-3", "gpu-4", "gpu-5", "gpu-6", "gpu-7", "gpu-8"}, }, }, "gpu-3": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-1", "gpu-2", "gpu-4"}, - convertIntToAffinityPriority(1): {"gpu-1", "gpu-2", "gpu-4", "gpu-5", "gpu-6", "gpu-7", "gpu-8"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-1", "gpu-2", "gpu-4"}, + convertIntToAffinityDimension(1): {"gpu-1", "gpu-2", "gpu-4", "gpu-5", "gpu-6", "gpu-7", "gpu-8"}, }, }, "gpu-4": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-1", "gpu-2", "gpu-3"}, - convertIntToAffinityPriority(1): {"gpu-1", "gpu-2", "gpu-3", "gpu-5", "gpu-6", "gpu-7", "gpu-8"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-1", "gpu-2", "gpu-3"}, + convertIntToAffinityDimension(1): {"gpu-1", "gpu-2", "gpu-3", "gpu-5", "gpu-6", "gpu-7", "gpu-8"}, }, }, "gpu-5": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-6", "gpu-7", "gpu-8"}, - convertIntToAffinityPriority(1): {"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-6", "gpu-7", "gpu-8"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-6", "gpu-7", "gpu-8"}, + convertIntToAffinityDimension(1): {"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-6", "gpu-7", "gpu-8"}, }, }, "gpu-6": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-5", "gpu-7", "gpu-8"}, - convertIntToAffinityPriority(1): {"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-7", "gpu-8"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-5", "gpu-7", "gpu-8"}, + convertIntToAffinityDimension(1): {"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-7", "gpu-8"}, }, }, "gpu-7": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-5", "gpu-6", "gpu-8"}, - convertIntToAffinityPriority(1): {"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-6", "gpu-8"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-5", "gpu-6", "gpu-8"}, + convertIntToAffinityDimension(1): {"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-6", "gpu-8"}, }, }, "gpu-8": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-5", "gpu-6", "gpu-7"}, - convertIntToAffinityPriority(1): {"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-6", "gpu-7"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-5", "gpu-6", "gpu-7"}, + convertIntToAffinityDimension(1): {"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-6", "gpu-7"}, }, }, "gpu-9": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-10", "gpu-11", "gpu-12"}, - convertIntToAffinityPriority(1): {"gpu-10", "gpu-11", "gpu-12", "gpu-13", "gpu-14", "gpu-15", "gpu-16"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-10", "gpu-11", "gpu-12"}, + convertIntToAffinityDimension(1): {"gpu-10", "gpu-11", "gpu-12", "gpu-13", "gpu-14", "gpu-15", "gpu-16"}, }, }, "gpu-10": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-9", "gpu-11", "gpu-12"}, - convertIntToAffinityPriority(1): {"gpu-9", "gpu-11", "gpu-12", "gpu-13", "gpu-14", "gpu-15", "gpu-16"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-9", "gpu-11", "gpu-12"}, + convertIntToAffinityDimension(1): {"gpu-9", "gpu-11", "gpu-12", "gpu-13", "gpu-14", "gpu-15", "gpu-16"}, }, }, "gpu-11": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-9", "gpu-10", "gpu-12"}, - convertIntToAffinityPriority(1): {"gpu-9", "gpu-10", "gpu-12", "gpu-13", "gpu-14", "gpu-15", "gpu-16"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-9", "gpu-10", "gpu-12"}, + convertIntToAffinityDimension(1): {"gpu-9", "gpu-10", "gpu-12", "gpu-13", "gpu-14", "gpu-15", "gpu-16"}, }, }, "gpu-12": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-9", "gpu-10", "gpu-11"}, - convertIntToAffinityPriority(1): {"gpu-9", "gpu-10", "gpu-11", "gpu-13", "gpu-14", "gpu-15", "gpu-16"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-9", "gpu-10", "gpu-11"}, + convertIntToAffinityDimension(1): {"gpu-9", "gpu-10", "gpu-11", "gpu-13", "gpu-14", "gpu-15", "gpu-16"}, }, }, "gpu-13": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-14", "gpu-15", "gpu-16"}, - convertIntToAffinityPriority(1): {"gpu-9", "gpu-10", "gpu-11", "gpu-12", "gpu-14", "gpu-15", "gpu-16"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-14", "gpu-15", "gpu-16"}, + convertIntToAffinityDimension(1): {"gpu-9", "gpu-10", "gpu-11", "gpu-12", "gpu-14", "gpu-15", "gpu-16"}, }, }, "gpu-14": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-13", "gpu-15", "gpu-16"}, - convertIntToAffinityPriority(1): {"gpu-9", "gpu-10", "gpu-11", "gpu-12", "gpu-13", "gpu-15", "gpu-16"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-13", "gpu-15", "gpu-16"}, + convertIntToAffinityDimension(1): {"gpu-9", "gpu-10", "gpu-11", "gpu-12", "gpu-13", "gpu-15", "gpu-16"}, }, }, "gpu-15": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-13", "gpu-14", "gpu-16"}, - convertIntToAffinityPriority(1): {"gpu-9", "gpu-10", "gpu-11", "gpu-12", "gpu-13", "gpu-14", "gpu-16"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-13", "gpu-14", "gpu-16"}, + convertIntToAffinityDimension(1): {"gpu-9", "gpu-10", "gpu-11", "gpu-12", "gpu-13", "gpu-14", "gpu-16"}, }, }, "gpu-16": { - DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{ - convertIntToAffinityPriority(0): {"gpu-13", "gpu-14", "gpu-15"}, - convertIntToAffinityPriority(1): {"gpu-9", "gpu-10", "gpu-11", "gpu-12", "gpu-13", "gpu-14", "gpu-15"}, + DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-13", "gpu-14", "gpu-15"}, + convertIntToAffinityDimension(1): {"gpu-9", "gpu-10", "gpu-11", "gpu-12", "gpu-13", "gpu-14", "gpu-15"}, }, }, }, @@ -2819,6 +2695,11 @@ func TestBind_DeviceAffinity(t *testing.T) { t.Run(tt.name, func(t *testing.T) { t.Parallel() deviceBindingStrategy := NewDeviceAffinityStrategy() + if tt.ctx.GPUQRMPluginConfig == nil { + tt.ctx.GPUQRMPluginConfig = qrmconfig.NewGPUQRMPluginConfig() + // Keep tests focused on affinity logic rather than admission strictness. + tt.ctx.GPUQRMPluginConfig.RequiredDeviceAffinity = false + } tt.ctx.Emitter = metrics.DummyMetrics{} result, err := deviceBindingStrategy.Bind(tt.ctx, tt.sortedDevices) if (err != nil) != tt.expectedErr { @@ -2830,6 +2711,341 @@ func TestBind_DeviceAffinity(t *testing.T) { } } +func TestBind_DeviceAffinity_RequiredStrict(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + ctx *allocate.AllocationContext + sortedDevices []string + expectedErr bool + expectedResultSize int + expectedPriority int + }{ + { + name: "required affinity: allocate strictly within a satisfiable affinity group", + ctx: &allocate.AllocationContext{ + ResourceReq: &pluginapi.ResourceRequest{ + PodUid: "pod-1", + ContainerName: "container-1", + }, + DeviceReq: &pluginapi.DeviceRequest{ + DeviceName: "gpu", + ReusableDevices: nil, + DeviceRequest: 4, + }, + GPUQRMPluginConfig: &qrmconfig.GPUQRMPluginConfig{RequiredDeviceAffinity: true}, + DeviceTopology: &machine.DeviceTopology{ + PriorityDimensions: []string{"0", "1"}, + Devices: map[string]machine.DeviceInfo{ + // One affinity group of size 4 at priority 1. + "gpu-1": {DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{convertIntToAffinityDimension(1): {"gpu-2", "gpu-3", "gpu-4"}}}, + "gpu-2": {DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{convertIntToAffinityDimension(1): {"gpu-1", "gpu-3", "gpu-4"}}}, + "gpu-3": {DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{convertIntToAffinityDimension(1): {"gpu-1", "gpu-2", "gpu-4"}}}, + "gpu-4": {DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{convertIntToAffinityDimension(1): {"gpu-1", "gpu-2", "gpu-3"}}}, + // Extra devices that should not be needed. + "gpu-5": {}, + "gpu-6": {}, + }, + }, + Emitter: metrics.DummyMetrics{}, + }, + sortedDevices: []string{"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-6"}, + expectedResultSize: 4, + expectedPriority: 0, + }, + { + name: "required affinity: restrict allocation to smallest satisfiable affinity level", + ctx: &allocate.AllocationContext{ + ResourceReq: &pluginapi.ResourceRequest{ + PodUid: "pod-1", + ContainerName: "container-1", + }, + DeviceReq: &pluginapi.DeviceRequest{ + DeviceName: "gpu", + ReusableDevices: nil, + DeviceRequest: 2, + }, + GPUQRMPluginConfig: &qrmconfig.GPUQRMPluginConfig{RequiredDeviceAffinity: true}, + DeviceTopology: &machine.DeviceTopology{ + PriorityDimensions: []string{"0", "1"}, + Devices: map[string]machine.DeviceInfo{ + // Priority 0: affinity group size 2 (gpu-1,gpu-2). + // Priority 1: affinity group size 4 (gpu-1,gpu-2,gpu-3,gpu-4) which supersets priority 0. + "gpu-1": {DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-2"}, + convertIntToAffinityDimension(1): {"gpu-2", "gpu-3", "gpu-4"}, + }}, + "gpu-2": {DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-1"}, + convertIntToAffinityDimension(1): {"gpu-1", "gpu-3", "gpu-4"}, + }}, + "gpu-3": {DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{convertIntToAffinityDimension(1): {"gpu-1", "gpu-2", "gpu-4"}}}, + "gpu-4": {DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{convertIntToAffinityDimension(1): {"gpu-1", "gpu-2", "gpu-3"}}}, + }, + }, + Emitter: metrics.DummyMetrics{}, + }, + sortedDevices: []string{"gpu-1", "gpu-2", "gpu-3", "gpu-4"}, + expectedResultSize: 2, + expectedPriority: 0, + }, + { + name: "required affinity: fail when no topology affinity is present", + ctx: &allocate.AllocationContext{ + ResourceReq: &pluginapi.ResourceRequest{ + PodUid: "pod-1", + ContainerName: "container-1", + }, + DeviceReq: &pluginapi.DeviceRequest{ + DeviceName: "gpu", + ReusableDevices: nil, + DeviceRequest: 2, + }, + GPUQRMPluginConfig: &qrmconfig.GPUQRMPluginConfig{RequiredDeviceAffinity: true}, + DeviceTopology: &machine.DeviceTopology{ + PriorityDimensions: []string{"0"}, + Devices: map[string]machine.DeviceInfo{ + // No devices declare any DeviceAffinity. + "gpu-1": {}, + "gpu-2": {}, + "gpu-3": {}, + "gpu-4": {}, + }, + }, + Emitter: metrics.DummyMetrics{}, + }, + sortedDevices: []string{"gpu-1", "gpu-2", "gpu-3", "gpu-4"}, + expectedErr: true, + }, + { + name: "required affinity: fail when available devices are split across different size-2 groups", + ctx: &allocate.AllocationContext{ + ResourceReq: &pluginapi.ResourceRequest{ + PodUid: "pod-1", + ContainerName: "container-1", + }, + DeviceReq: &pluginapi.DeviceRequest{ + DeviceName: "gpu", + ReusableDevices: nil, + DeviceRequest: 2, + }, + GPUQRMPluginConfig: &qrmconfig.GPUQRMPluginConfig{RequiredDeviceAffinity: true}, + DeviceTopology: &machine.DeviceTopology{ + PriorityDimensions: []string{"0", "1"}, + Devices: map[string]machine.DeviceInfo{ + // Priority 0 has two disjoint affinity groups of size 2: {1,2} and {3,4}. + // Priority 1 has a size-4 affinity group {1,2,3,4} that supersets both size-2 groups. + "gpu-1": {DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-2"}, + convertIntToAffinityDimension(1): {"gpu-2", "gpu-3", "gpu-4"}, + }}, + "gpu-2": {DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-1"}, + convertIntToAffinityDimension(1): {"gpu-1", "gpu-3", "gpu-4"}, + }}, + "gpu-3": {DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-4"}, + convertIntToAffinityDimension(1): {"gpu-1", "gpu-2", "gpu-4"}, + }}, + "gpu-4": {DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-3"}, + convertIntToAffinityDimension(1): {"gpu-1", "gpu-2", "gpu-3"}, + }}, + }, + }, + Emitter: metrics.DummyMetrics{}, + }, + // Only two devices are available, and they are from different priority-0 groups. + sortedDevices: []string{"gpu-1", "gpu-3"}, + expectedErr: true, + }, + { + name: "required affinity: fail when request=4 devices are split across different size-4 groups", + ctx: &allocate.AllocationContext{ + ResourceReq: &pluginapi.ResourceRequest{ + PodUid: "pod-1", + ContainerName: "container-1", + }, + DeviceReq: &pluginapi.DeviceRequest{ + DeviceName: "gpu", + ReusableDevices: nil, + DeviceRequest: 4, + }, + GPUQRMPluginConfig: &qrmconfig.GPUQRMPluginConfig{RequiredDeviceAffinity: true}, + DeviceTopology: &machine.DeviceTopology{ + PriorityDimensions: []string{"0", "1"}, + Devices: map[string]machine.DeviceInfo{ + // Priority 0: two disjoint size-4 groups {1,2,3,4} and {5,6,7,8}. + // Priority 1: one size-8 group {1..8} that supersets both size-4 groups. + "gpu-1": {DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-2", "gpu-3", "gpu-4"}, + convertIntToAffinityDimension(1): {"gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-6", "gpu-7", "gpu-8"}, + }}, + "gpu-2": {DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-1", "gpu-3", "gpu-4"}, + convertIntToAffinityDimension(1): {"gpu-1", "gpu-3", "gpu-4", "gpu-5", "gpu-6", "gpu-7", "gpu-8"}, + }}, + "gpu-3": {DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-1", "gpu-2", "gpu-4"}, + convertIntToAffinityDimension(1): {"gpu-1", "gpu-2", "gpu-4", "gpu-5", "gpu-6", "gpu-7", "gpu-8"}, + }}, + "gpu-4": {DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-1", "gpu-2", "gpu-3"}, + convertIntToAffinityDimension(1): {"gpu-1", "gpu-2", "gpu-3", "gpu-5", "gpu-6", "gpu-7", "gpu-8"}, + }}, + "gpu-5": {DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-6", "gpu-7", "gpu-8"}, + convertIntToAffinityDimension(1): {"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-6", "gpu-7", "gpu-8"}, + }}, + "gpu-6": {DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-5", "gpu-7", "gpu-8"}, + convertIntToAffinityDimension(1): {"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-7", "gpu-8"}, + }}, + "gpu-7": {DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-5", "gpu-6", "gpu-8"}, + convertIntToAffinityDimension(1): {"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-6", "gpu-8"}, + }}, + "gpu-8": {DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-5", "gpu-6", "gpu-7"}, + convertIntToAffinityDimension(1): {"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-6", "gpu-7"}, + }}, + }, + }, + Emitter: metrics.DummyMetrics{}, + }, + // Four available devices, but split across the two size-4 groups at priority 0. + sortedDevices: []string{"gpu-1", "gpu-2", "gpu-5", "gpu-6"}, + expectedErr: true, + }, + { + name: "required affinity: allocate 4 devices from priority-1 size-4 group", + ctx: &allocate.AllocationContext{ + ResourceReq: &pluginapi.ResourceRequest{ + PodUid: "pod-1", + ContainerName: "container-1", + }, + DeviceReq: &pluginapi.DeviceRequest{ + DeviceName: "gpu", + ReusableDevices: nil, + DeviceRequest: 4, + }, + GPUQRMPluginConfig: &qrmconfig.GPUQRMPluginConfig{RequiredDeviceAffinity: true}, + DeviceTopology: &machine.DeviceTopology{ + PriorityDimensions: []string{"0", "1"}, + Devices: map[string]machine.DeviceInfo{ + // Priority 0: size-2 groups {1,2} and {3,4}. + // Priority 1: size-4 group {1,2,3,4} (superset of both size-2 groups). + "gpu-1": {DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-2"}, + convertIntToAffinityDimension(1): {"gpu-2", "gpu-3", "gpu-4"}, + }}, + "gpu-2": {DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-1"}, + convertIntToAffinityDimension(1): {"gpu-1", "gpu-3", "gpu-4"}, + }}, + "gpu-3": {DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-4"}, + convertIntToAffinityDimension(1): {"gpu-1", "gpu-2", "gpu-4"}, + }}, + "gpu-4": {DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-3"}, + convertIntToAffinityDimension(1): {"gpu-1", "gpu-2", "gpu-3"}, + }}, + }, + }, + Emitter: metrics.DummyMetrics{}, + }, + sortedDevices: []string{"gpu-1", "gpu-2", "gpu-3", "gpu-4"}, + expectedResultSize: 4, + expectedPriority: 1, + }, + { + name: "required affinity: allocate 6 devices from priority-1 size-8 group", + ctx: &allocate.AllocationContext{ + ResourceReq: &pluginapi.ResourceRequest{ + PodUid: "pod-1", + ContainerName: "container-1", + }, + DeviceReq: &pluginapi.DeviceRequest{ + DeviceName: "gpu", + ReusableDevices: nil, + DeviceRequest: 6, + }, + GPUQRMPluginConfig: &qrmconfig.GPUQRMPluginConfig{RequiredDeviceAffinity: true}, + DeviceTopology: &machine.DeviceTopology{ + PriorityDimensions: []string{"0", "1"}, + Devices: map[string]machine.DeviceInfo{ + // Priority 0: two size-4 groups {1,2,3,4} and {5,6,7,8}. + // Priority 1: one size-8 group {1,2,3,4,5,6,7,8} that supersets both size-4 groups. + "gpu-1": {DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-2", "gpu-3", "gpu-4"}, + convertIntToAffinityDimension(1): {"gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-6", "gpu-7", "gpu-8"}, + }}, + "gpu-2": {DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-1", "gpu-3", "gpu-4"}, + convertIntToAffinityDimension(1): {"gpu-1", "gpu-3", "gpu-4", "gpu-5", "gpu-6", "gpu-7", "gpu-8"}, + }}, + "gpu-3": {DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-1", "gpu-2", "gpu-4"}, + convertIntToAffinityDimension(1): {"gpu-1", "gpu-2", "gpu-4", "gpu-5", "gpu-6", "gpu-7", "gpu-8"}, + }}, + "gpu-4": {DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-1", "gpu-2", "gpu-3"}, + convertIntToAffinityDimension(1): {"gpu-1", "gpu-2", "gpu-3", "gpu-5", "gpu-6", "gpu-7", "gpu-8"}, + }}, + "gpu-5": {DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-6", "gpu-7", "gpu-8"}, + convertIntToAffinityDimension(1): {"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-6", "gpu-7", "gpu-8"}, + }}, + "gpu-6": {DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-5", "gpu-7", "gpu-8"}, + convertIntToAffinityDimension(1): {"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-7", "gpu-8"}, + }}, + "gpu-7": {DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-5", "gpu-6", "gpu-8"}, + convertIntToAffinityDimension(1): {"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-6", "gpu-8"}, + }}, + "gpu-8": {DeviceAffinity: map[machine.Dimension]machine.DeviceIDs{ + convertIntToAffinityDimension(0): {"gpu-5", "gpu-6", "gpu-7"}, + convertIntToAffinityDimension(1): {"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-6", "gpu-7"}, + }}, + }, + }, + Emitter: metrics.DummyMetrics{}, + }, + sortedDevices: []string{"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-6", "gpu-7", "gpu-8"}, + expectedResultSize: 6, + expectedPriority: 1, + }, + } + + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + deviceBindingStrategy := NewDeviceAffinityStrategy() + result, err := deviceBindingStrategy.Bind(tt.ctx, tt.sortedDevices) + if (err != nil) != tt.expectedErr { + t.Fatalf("Bind() error = %v, expectedErr %v", err, tt.expectedErr) + } + if tt.expectedErr { + if result == nil || result.Success { + t.Fatalf("expected allocation to fail, got result=%v", result) + } + return + } + if result == nil || !result.Success { + t.Fatalf("expected allocation to succeed, got result=%v", result) + } + if len(result.AllocatedDevices) != tt.expectedResultSize { + t.Fatalf("allocated size = %d, want %d", len(result.AllocatedDevices), tt.expectedResultSize) + } + verifyResultIsAffinity(t, result, tt.ctx.DeviceTopology, tt.expectedPriority) + }) + } +} + func verifyAllocationResult( t *testing.T, result *allocate.AllocationResult, expectedResult *allocate.AllocationResult, isRandom bool, expectedResultSize int, @@ -2863,16 +3079,37 @@ func verifyResultIsAffinity( t *testing.T, result *allocate.AllocationResult, topology *machine.DeviceTopology, expectedAffinityPriorityLevel int, ) { - affinityMap := topology.GroupDeviceAffinity() - priorityLevelDevices := affinityMap[expectedAffinityPriorityLevel] - - sort.Slice(result.AllocatedDevices, func(i, j int) bool { - return result.AllocatedDevices[i] < result.AllocatedDevices[j] - }) + affinityLevels := topology.GroupDeviceAffinity() + if len(affinityLevels) == 0 { + t.Errorf("expected affinity groups but found none") + return + } + if expectedAffinityPriorityLevel >= len(affinityLevels) { + t.Errorf("expected affinity priority level %d, got only %d affinity levels", expectedAffinityPriorityLevel, len(affinityLevels)) + return + } + priorityLevelDevices := affinityLevels[expectedAffinityPriorityLevel] + if result == nil { + t.Errorf("result is nil") + return + } + // Allocation is considered affinity-compliant if all allocated devices are contained + // within any single affinity group at the expected priority level. for _, deviceIDs := range priorityLevelDevices { - sort.Slice(deviceIDs, func(i, j int) bool { return deviceIDs[i] < deviceIDs[j] }) - if reflect.DeepEqual(deviceIDs, machine.DeviceIDs(result.AllocatedDevices)) { + groupSet := make(map[string]struct{}, len(deviceIDs)) + for _, id := range deviceIDs { + groupSet[id] = struct{}{} + } + + ok := true + for _, alloc := range result.AllocatedDevices { + if _, exists := groupSet[alloc]; !exists { + ok = false + break + } + } + if ok { return } } diff --git a/pkg/config/agent/qrm/gpu_plugin.go b/pkg/config/agent/qrm/gpu_plugin.go index f610910083..7f94eb8dbb 100644 --- a/pkg/config/agent/qrm/gpu_plugin.go +++ b/pkg/config/agent/qrm/gpu_plugin.go @@ -33,6 +33,9 @@ type GPUQRMPluginConfig struct { GPUMemoryAllocatablePerGPU resource.Quantity // SkipGPUStateCorruption skip gpu state corruption, and it will be used after updating state properties SkipGPUStateCorruption bool + // RequiredDeviceAffinity specifies whether it is required for pods to follow device affinity strictly. + // If true, pods will fail to admit if they are not able to satisfy device affinity constraints. Set to true by default. + RequiredDeviceAffinity bool *gpustrategy.GPUStrategyConfig } diff --git a/pkg/util/machine/device.go b/pkg/util/machine/device.go index 2ae80a3c50..023c6738af 100644 --- a/pkg/util/machine/device.go +++ b/pkg/util/machine/device.go @@ -20,6 +20,7 @@ import ( "fmt" "reflect" "sort" + "strings" "sync" "time" @@ -317,20 +318,41 @@ func (t *DeviceTopology) IsDeviceHealthy(id string) (bool, bool) { } // GroupDeviceAffinity forms a topology graph such that all devices within a DeviceIDs group have an affinity with each other. -// They are differentiated by their affinity priority value. +// The outer slice is ordered from the highest priority to the lowest priority. // E.g. Output: // -// { -// 0: {{"gpu-0", "gpu-1"}, {"gpu-2", "gpu-3"}}, -// 1: {{"gpu-0", "gpu-1", "gpu-2", "gpu-3"}} -// } +// [ +// {{"gpu-0", "gpu-1"}, {"gpu-2", "gpu-3"}}, +// {{"gpu-0", "gpu-1", "gpu-2", "gpu-3"}}, +// ] // -// means that gpu-0 and gpu-1 have an affinity with each other, gpu-2 and gpu-3 have an affinity with each other in affinity priority 0. -// and gpu-0, gpu-1, gpu-2, and gpu-3 have an affinity with each other in affinity priority 1. -func (t *DeviceTopology) GroupDeviceAffinity() map[int][]DeviceIDs { - deviceAffinityGroup := make(map[int][]DeviceIDs) +// means that gpu-0 and gpu-1 have an affinity with each other, gpu-2 and gpu-3 have an affinity with each other in the highest affinity priority. +// and gpu-0, gpu-1, gpu-2, and gpu-3 have an affinity with each other in the next lower affinity priority. +func (t *DeviceTopology) GroupDeviceAffinity() [][]DeviceIDs { + if t == nil || len(t.Devices) == 0 || len(t.PriorityDimensions) == 0 { + return nil + } + + // Build dimension name -> priority index mapping. + // + // Use DeviceTopology.PriorityDimensions as the single source of truth. + dimensionNameToPriority := make(map[string]int) + for i, name := range t.PriorityDimensions { + name = strings.ToLower(strings.TrimSpace(name)) + if name == "" { + continue + } + dimensionNameToPriority[name] = i + } + + deviceAffinityGroup := make([][]DeviceIDs, len(t.PriorityDimensions)) for deviceId, deviceInfo := range t.Devices { - for priority, affinityDeviceIDs := range deviceInfo.DeviceAffinity { + for dimension, affinityDeviceIDs := range deviceInfo.DeviceAffinity { + // filter out invalid dimensions because some invalid/empty configurations may be passed + if strings.TrimSpace(dimension.Name) == "" || strings.TrimSpace(dimension.Value) == "" { + continue + } + // Add itself in the group if it is not already included if !slices.Contains(affinityDeviceIDs, deviceId) { affinityDeviceIDs = append(affinityDeviceIDs, deviceId) @@ -338,9 +360,10 @@ func (t *DeviceTopology) GroupDeviceAffinity() map[int][]DeviceIDs { // Sort the strings for easier deduplication sort.Strings(affinityDeviceIDs) - priorityLevel := priority.GetPriorityLevel() - if _, ok := deviceAffinityGroup[priorityLevel]; !ok { - deviceAffinityGroup[priorityLevel] = make([]DeviceIDs, 0) + priorityLevel, ok := dimensionNameToPriority[strings.ToLower(strings.TrimSpace(dimension.Name))] + if !ok { + // Unknown dimension; ignore it to avoid inconsistent priority semantics. + continue } // Add the affinityDeviceIDs to the priority level if it is not already there @@ -349,7 +372,16 @@ func (t *DeviceTopology) GroupDeviceAffinity() map[int][]DeviceIDs { } } } - return deviceAffinityGroup + + groupedByPriority := make([][]DeviceIDs, 0, len(deviceAffinityGroup)) + for _, groups := range deviceAffinityGroup { + if len(groups) == 0 { + continue + } + groupedByPriority = append(groupedByPriority, groups) + } + + return groupedByPriority } func containsGroup(groups []DeviceIDs, candidate DeviceIDs) bool { @@ -361,8 +393,9 @@ func containsGroup(groups []DeviceIDs, candidate DeviceIDs) bool { return false } -// DeviceAffinity is the map of priority level to the other deviceIds that a particular deviceId has an affinity with -type DeviceAffinity map[AffinityPriority]DeviceIDs +// DeviceAffinity maps an affinity dimension to the other device IDs that a particular device has an affinity with. +// Priority ordering across different dimensions is determined by DeviceTopology.PriorityDimensions. +type DeviceAffinity map[Dimension]DeviceIDs type DeviceInfo struct { Health string @@ -372,12 +405,12 @@ type DeviceInfo struct { func (i DeviceInfo) GetDimensions() []Dimension { dimensions := make([]Dimension, 0) - for priority := range i.DeviceAffinity { + for dimension := range i.DeviceAffinity { // filter out invalid dimensions because some invalid/empty configurations may be passed - if priority.Dimension.Name == "" || priority.Dimension.Value == "" { + if dimension.Name == "" || dimension.Value == "" { continue } - dimensions = append(dimensions, priority.Dimension) + dimensions = append(dimensions, dimension) } sort.Slice(dimensions, func(i, j int) bool { @@ -387,22 +420,6 @@ func (i DeviceInfo) GetDimensions() []Dimension { return dimensions } -// AffinityPriority represents the level of affinity that a deviceID has with another deviceID. -// It contains the actual priority level and the dimension of the affinity. -// The priority level is the value of the priority. The lower the value, the higher the priority. -type AffinityPriority struct { - PriorityLevel int - Dimension Dimension -} - -func (a *AffinityPriority) GetPriorityLevel() int { - return a.PriorityLevel -} - -func (a *AffinityPriority) GetDimension() Dimension { - return a.Dimension -} - // Dimension represents the dimension of the affinity. // Name is the name of the dimension. E.g. NUMA, SOCKET, etc. // Value is the id of the dimension. E.g. numa-0, numa-1, socket-0, socket-1, etc. diff --git a/pkg/util/machine/device_test.go b/pkg/util/machine/device_test.go index fdf94815fa..b9cc5567cb 100644 --- a/pkg/util/machine/device_test.go +++ b/pkg/util/machine/device_test.go @@ -181,492 +181,370 @@ func TestDeviceTopology_GroupDeviceAffinity(t *testing.T) { tests := []struct { name string deviceTopology *DeviceTopology - expectedDeviceAffinity map[int][]DeviceIDs + expectedDeviceAffinity [][]DeviceIDs + expectedNil bool }{ + { + name: "no affinity groups when PriorityDimensions is empty", + expectedNil: true, + deviceTopology: &DeviceTopology{ + PriorityDimensions: nil, + Devices: map[string]DeviceInfo{ + "npu-0": { + DeviceAffinity: map[Dimension]DeviceIDs{ + {Name: "pcie", Value: "0"}: {"npu-1"}, + }, + }, + "npu-1": { + DeviceAffinity: map[Dimension]DeviceIDs{ + {Name: "pcie", Value: "0"}: {"npu-0"}, + }, + }, + }, + }, + }, { name: "test simple affinity of 2 devices to 1 group with only affinity priority level", deviceTopology: &DeviceTopology{ + PriorityDimensions: []string{"pcie"}, Devices: map[string]DeviceInfo{ "npu-0": { - DeviceAffinity: map[AffinityPriority]DeviceIDs{ - { - PriorityLevel: 0, - Dimension: Dimension{ - Name: "pcie", - Value: "0", - }, - }: {"npu-1"}, + DeviceAffinity: map[Dimension]DeviceIDs{ + {Name: "pcie", Value: "0"}: {"npu-1"}, }, }, "npu-1": { - DeviceAffinity: map[AffinityPriority]DeviceIDs{ - { - PriorityLevel: 0, - Dimension: Dimension{ - Name: "pcie", - Value: "0", - }, - }: {"npu-0"}, + DeviceAffinity: map[Dimension]DeviceIDs{ + {Name: "pcie", Value: "0"}: {"npu-0"}, }, }, "npu-2": { - DeviceAffinity: map[AffinityPriority]DeviceIDs{ - { - PriorityLevel: 0, - Dimension: Dimension{ - Name: "pcie", - Value: "1", - }, - }: {"npu-3"}, + DeviceAffinity: map[Dimension]DeviceIDs{ + {Name: "pcie", Value: "1"}: {"npu-3"}, }, }, "npu-3": { - DeviceAffinity: map[AffinityPriority]DeviceIDs{ - { - PriorityLevel: 0, - Dimension: Dimension{ - Name: "pcie", - Value: "1", - }, - }: {"npu-2"}, + DeviceAffinity: map[Dimension]DeviceIDs{ + {Name: "pcie", Value: "1"}: {"npu-2"}, }, }, }, }, - expectedDeviceAffinity: map[int][]DeviceIDs{ - 0: { - {"npu-0", "npu-1"}, {"npu-2", "npu-3"}, - }, + expectedDeviceAffinity: [][]DeviceIDs{ + {{"npu-0", "npu-1"}, {"npu-2", "npu-3"}}, }, }, { name: "test simple affinity of 4 devices to 1 group with only affinity priority level", deviceTopology: &DeviceTopology{ + PriorityDimensions: []string{"numa"}, Devices: map[string]DeviceInfo{ "npu-0": { - DeviceAffinity: map[AffinityPriority]DeviceIDs{ + DeviceAffinity: map[Dimension]DeviceIDs{ { - PriorityLevel: 0, - Dimension: Dimension{ - Name: "numa", - Value: "0", - }, + Name: "numa", + Value: "0", }: {"npu-1", "npu-2", "npu-3"}, }, }, "npu-1": { - DeviceAffinity: map[AffinityPriority]DeviceIDs{ + DeviceAffinity: map[Dimension]DeviceIDs{ { - PriorityLevel: 0, - Dimension: Dimension{ - Name: "numa", - Value: "0", - }, + Name: "numa", + Value: "0", }: {"npu-0", "npu-2", "npu-3"}, }, }, "npu-2": { - DeviceAffinity: map[AffinityPriority]DeviceIDs{ + DeviceAffinity: map[Dimension]DeviceIDs{ { - PriorityLevel: 0, - Dimension: Dimension{ - Name: "numa", - Value: "0", - }, + Name: "numa", + Value: "0", }: {"npu-0", "npu-1", "npu-3"}, }, }, "npu-3": { - DeviceAffinity: map[AffinityPriority]DeviceIDs{ + DeviceAffinity: map[Dimension]DeviceIDs{ { - PriorityLevel: 0, - Dimension: Dimension{ - Name: "numa", - Value: "0", - }, + Name: "numa", + Value: "0", }: {"npu-0", "npu-1", "npu-2"}, }, }, "npu-4": { - DeviceAffinity: map[AffinityPriority]DeviceIDs{ + DeviceAffinity: map[Dimension]DeviceIDs{ { - PriorityLevel: 0, - Dimension: Dimension{ - Name: "numa", - Value: "1", - }, + Name: "numa", + Value: "1", }: {"npu-5", "npu-6", "npu-7"}, }, }, "npu-5": { - DeviceAffinity: map[AffinityPriority]DeviceIDs{ + DeviceAffinity: map[Dimension]DeviceIDs{ { - PriorityLevel: 0, - Dimension: Dimension{ - Name: "numa", - Value: "1", - }, + Name: "numa", + Value: "1", }: {"npu-4", "npu-6", "npu-7"}, }, }, "npu-6": { - DeviceAffinity: map[AffinityPriority]DeviceIDs{ + DeviceAffinity: map[Dimension]DeviceIDs{ { - PriorityLevel: 0, - Dimension: Dimension{ - Name: "numa", - Value: "1", - }, + Name: "numa", + Value: "1", }: {"npu-4", "npu-5", "npu-7"}, }, }, "npu-7": { - DeviceAffinity: map[AffinityPriority]DeviceIDs{ + DeviceAffinity: map[Dimension]DeviceIDs{ { - PriorityLevel: 0, - Dimension: Dimension{ - Name: "numa", - Value: "1", - }, + Name: "numa", + Value: "1", }: {"npu-4", "npu-5", "npu-6"}, }, }, }, }, - expectedDeviceAffinity: map[int][]DeviceIDs{ - 0: { - {"npu-0", "npu-1", "npu-2", "npu-3"}, {"npu-4", "npu-5", "npu-6", "npu-7"}, - }, + expectedDeviceAffinity: [][]DeviceIDs{ + {{"npu-0", "npu-1", "npu-2", "npu-3"}, {"npu-4", "npu-5", "npu-6", "npu-7"}}, }, }, { name: "device topology includes self for one affinity level", deviceTopology: &DeviceTopology{ + PriorityDimensions: []string{"numa"}, Devices: map[string]DeviceInfo{ "npu-0": { - DeviceAffinity: map[AffinityPriority]DeviceIDs{ + DeviceAffinity: map[Dimension]DeviceIDs{ { - PriorityLevel: 0, - Dimension: Dimension{ - Name: "numa", - Value: "0", - }, + Name: "numa", + Value: "0", }: {"npu-0", "npu-1"}, }, }, "npu-1": { - DeviceAffinity: map[AffinityPriority]DeviceIDs{ + DeviceAffinity: map[Dimension]DeviceIDs{ { - PriorityLevel: 0, - Dimension: Dimension{ - Name: "numa", - Value: "0", - }, + Name: "numa", + Value: "0", }: {"npu-0", "npu-1"}, }, }, "npu-2": { - DeviceAffinity: map[AffinityPriority]DeviceIDs{ + DeviceAffinity: map[Dimension]DeviceIDs{ { - PriorityLevel: 0, - Dimension: Dimension{ - Name: "numa", - Value: "1", - }, + Name: "numa", + Value: "1", }: {"npu-2", "npu-3"}, }, }, "npu-3": { - DeviceAffinity: map[AffinityPriority]DeviceIDs{ + DeviceAffinity: map[Dimension]DeviceIDs{ { - PriorityLevel: 0, - Dimension: Dimension{ - Name: "numa", - Value: "1", - }, + Name: "numa", + Value: "1", }: {"npu-2", "npu-3"}, }, }, }, }, - expectedDeviceAffinity: map[int][]DeviceIDs{ - 0: { - {"npu-0", "npu-1"}, {"npu-2", "npu-3"}, - }, + expectedDeviceAffinity: [][]DeviceIDs{ + {{"npu-0", "npu-1"}, {"npu-2", "npu-3"}}, }, }, { name: "test simple affinity of 2 devices to 1 group with 2 affinity priority level", deviceTopology: &DeviceTopology{ + PriorityDimensions: []string{"pcie", "numa"}, Devices: map[string]DeviceInfo{ "npu-0": { - DeviceAffinity: map[AffinityPriority]DeviceIDs{ + DeviceAffinity: map[Dimension]DeviceIDs{ { - PriorityLevel: 0, - Dimension: Dimension{ - Name: "pcie", - Value: "0", - }, + Name: "pcie", + Value: "0", }: {"npu-1"}, { - PriorityLevel: 1, - Dimension: Dimension{ - Name: "numa", - Value: "0", - }, + Name: "numa", + Value: "0", }: {"npu-1", "npu-2", "npu-3"}, }, }, "npu-1": { - DeviceAffinity: map[AffinityPriority]DeviceIDs{ + DeviceAffinity: map[Dimension]DeviceIDs{ { - PriorityLevel: 0, - Dimension: Dimension{ - Name: "pcie", - Value: "0", - }, + Name: "pcie", + Value: "0", }: {"npu-0"}, { - PriorityLevel: 1, - Dimension: Dimension{ - Name: "numa", - Value: "0", - }, + Name: "numa", + Value: "0", }: {"npu-0", "npu-2", "npu-3"}, }, }, "npu-2": { - DeviceAffinity: map[AffinityPriority]DeviceIDs{ + DeviceAffinity: map[Dimension]DeviceIDs{ { - PriorityLevel: 0, - Dimension: Dimension{ - Name: "pcie", - Value: "1", - }, + Name: "pcie", + Value: "1", }: {"npu-3"}, { - PriorityLevel: 1, - Dimension: Dimension{ - Name: "numa", - Value: "0", - }, + Name: "numa", + Value: "0", }: {"npu-0", "npu-1", "npu-3"}, }, }, "npu-3": { - DeviceAffinity: map[AffinityPriority]DeviceIDs{ + DeviceAffinity: map[Dimension]DeviceIDs{ { - PriorityLevel: 0, - Dimension: Dimension{ - Name: "pcie", - Value: "1", - }, + Name: "pcie", + Value: "1", }: {"npu-2"}, { - PriorityLevel: 1, - Dimension: Dimension{ - Name: "numa", - Value: "0", - }, + Name: "numa", + Value: "0", }: {"npu-0", "npu-1", "npu-2"}, }, }, }, }, - expectedDeviceAffinity: map[int][]DeviceIDs{ - 0: { - {"npu-0", "npu-1"}, {"npu-2", "npu-3"}, - }, - 1: { - {"npu-0", "npu-1", "npu-2", "npu-3"}, - }, + expectedDeviceAffinity: [][]DeviceIDs{ + {{"npu-0", "npu-1"}, {"npu-2", "npu-3"}}, + {{"npu-0", "npu-1", "npu-2", "npu-3"}}, }, }, { name: "device topology includes self for 2 affinity levels", deviceTopology: &DeviceTopology{ + PriorityDimensions: []string{"pcie", "numa"}, Devices: map[string]DeviceInfo{ "npu-0": { - DeviceAffinity: map[AffinityPriority]DeviceIDs{ + DeviceAffinity: map[Dimension]DeviceIDs{ { - PriorityLevel: 0, - Dimension: Dimension{ - Name: "pcie", - Value: "0", - }, + Name: "pcie", + Value: "0", }: {"npu-0", "npu-1"}, { - PriorityLevel: 1, - Dimension: Dimension{ - Name: "numa", - Value: "0", - }, + Name: "numa", + Value: "0", }: {"npu-0", "npu-1", "npu-2", "npu-3"}, }, }, "npu-1": { - DeviceAffinity: map[AffinityPriority]DeviceIDs{ + DeviceAffinity: map[Dimension]DeviceIDs{ { - PriorityLevel: 0, - Dimension: Dimension{ - Name: "pcie", - Value: "0", - }, + Name: "pcie", + Value: "0", }: {"npu-0", "npu-1"}, { - PriorityLevel: 1, - Dimension: Dimension{ - Name: "numa", - Value: "0", - }, + Name: "numa", + Value: "0", }: {"npu-0", "npu-1", "npu-2", "npu-3"}, }, }, "npu-2": { - DeviceAffinity: map[AffinityPriority]DeviceIDs{ + DeviceAffinity: map[Dimension]DeviceIDs{ { - PriorityLevel: 0, - Dimension: Dimension{ - Name: "pcie", - Value: "1", - }, + Name: "pcie", + Value: "1", }: {"npu-2", "npu-3"}, { - PriorityLevel: 1, - Dimension: Dimension{ - Name: "numa", - Value: "0", - }, + Name: "numa", + Value: "0", }: {"npu-0", "npu-1", "npu-2", "npu-3"}, }, }, "npu-3": { - DeviceAffinity: map[AffinityPriority]DeviceIDs{ + DeviceAffinity: map[Dimension]DeviceIDs{ { - PriorityLevel: 0, - Dimension: Dimension{ - Name: "pcie", - Value: "1", - }, + Name: "pcie", + Value: "1", }: {"npu-2", "npu-3"}, { - PriorityLevel: 1, - Dimension: Dimension{ - Name: "numa", - Value: "0", - }, + Name: "numa", + Value: "0", }: {"npu-0", "npu-1", "npu-2", "npu-3"}, }, }, }, }, - expectedDeviceAffinity: map[int][]DeviceIDs{ - 0: { - {"npu-0", "npu-1"}, {"npu-2", "npu-3"}, - }, - 1: { - {"npu-0", "npu-1", "npu-2", "npu-3"}, - }, + expectedDeviceAffinity: [][]DeviceIDs{ + {{"npu-0", "npu-1"}, {"npu-2", "npu-3"}}, + {{"npu-0", "npu-1", "npu-2", "npu-3"}}, }, }, { name: "unsorted device topology has no effect on result", deviceTopology: &DeviceTopology{ + PriorityDimensions: []string{"numa"}, Devices: map[string]DeviceInfo{ "npu-0": { - DeviceAffinity: map[AffinityPriority]DeviceIDs{ + DeviceAffinity: map[Dimension]DeviceIDs{ { - PriorityLevel: 0, - Dimension: Dimension{ - Name: "numa", - Value: "0", - }, + Name: "numa", + Value: "0", }: {"npu-2", "npu-1", "npu-3"}, }, }, "npu-1": { - DeviceAffinity: map[AffinityPriority]DeviceIDs{ + DeviceAffinity: map[Dimension]DeviceIDs{ { - PriorityLevel: 0, - Dimension: Dimension{ - Name: "numa", - Value: "0", - }, + Name: "numa", + Value: "0", }: {"npu-3", "npu-0", "npu-2"}, }, }, "npu-2": { - DeviceAffinity: map[AffinityPriority]DeviceIDs{ + DeviceAffinity: map[Dimension]DeviceIDs{ { - PriorityLevel: 0, - Dimension: Dimension{ - Name: "numa", - Value: "0", - }, + Name: "numa", + Value: "0", }: {"npu-1", "npu-0", "npu-3"}, }, }, "npu-3": { - DeviceAffinity: map[AffinityPriority]DeviceIDs{ + DeviceAffinity: map[Dimension]DeviceIDs{ { - PriorityLevel: 0, - Dimension: Dimension{ - Name: "numa", - Value: "0", - }, + Name: "numa", + Value: "0", }: {"npu-0", "npu-2", "npu-1"}, }, }, "npu-4": { - DeviceAffinity: map[AffinityPriority]DeviceIDs{ + DeviceAffinity: map[Dimension]DeviceIDs{ { - PriorityLevel: 0, - Dimension: Dimension{ - Name: "numa", - Value: "1", - }, + Name: "numa", + Value: "1", }: {"npu-6", "npu-5", "npu-7"}, }, }, "npu-5": { - DeviceAffinity: map[AffinityPriority]DeviceIDs{ + DeviceAffinity: map[Dimension]DeviceIDs{ { - PriorityLevel: 0, - Dimension: Dimension{ - Name: "numa", - Value: "1", - }, + Name: "numa", + Value: "1", }: {"npu-7", "npu-4", "npu-6"}, }, }, "npu-6": { - DeviceAffinity: map[AffinityPriority]DeviceIDs{ + DeviceAffinity: map[Dimension]DeviceIDs{ { - PriorityLevel: 0, - Dimension: Dimension{ - Name: "numa", - Value: "1", - }, + Name: "numa", + Value: "1", }: {"npu-5", "npu-4", "npu-7"}, }, }, "npu-7": { - DeviceAffinity: map[AffinityPriority]DeviceIDs{ + DeviceAffinity: map[Dimension]DeviceIDs{ { - PriorityLevel: 0, - Dimension: Dimension{ - Name: "numa", - Value: "1", - }, + Name: "numa", + Value: "1", }: {"npu-6", "npu-4", "npu-5"}, }, }, }, }, - expectedDeviceAffinity: map[int][]DeviceIDs{ - 0: { - {"npu-0", "npu-1", "npu-2", "npu-3"}, {"npu-4", "npu-5", "npu-6", "npu-7"}, - }, + expectedDeviceAffinity: [][]DeviceIDs{ + {{"npu-0", "npu-1", "npu-2", "npu-3"}, {"npu-4", "npu-5", "npu-6", "npu-7"}}, }, }, } @@ -676,6 +554,10 @@ func TestDeviceTopology_GroupDeviceAffinity(t *testing.T) { t.Run(tt.name, func(t *testing.T) { t.Parallel() deviceAffinity := tt.deviceTopology.GroupDeviceAffinity() + if tt.expectedNil { + assert.Nil(t, deviceAffinity) + return + } evaluateDeviceAffinity(t, deviceAffinity, tt.expectedDeviceAffinity) }) } @@ -698,20 +580,14 @@ func evaluateDeviceNUMAAffinity(t *testing.T, expectedDeviceNUMAAffinity, actual } } -func evaluateDeviceAffinity(t *testing.T, expectedDeviceAffinity, actualDeviceAffinity map[int][]DeviceIDs) { +func evaluateDeviceAffinity(t *testing.T, expectedDeviceAffinity, actualDeviceAffinity [][]DeviceIDs) { if len(actualDeviceAffinity) != len(expectedDeviceAffinity) { t.Errorf("expected %d affinities, got %d", len(expectedDeviceAffinity), len(actualDeviceAffinity)) return } - for priority, expected := range expectedDeviceAffinity { - actual, ok := actualDeviceAffinity[priority] - if !ok { - t.Errorf("expected affinities for priority %v, but it is not found", priority) - return - } - - if !equalDeviceIDsGroupsIgnoreOrder(t, expected, actual) { + for priority := range expectedDeviceAffinity { + if !equalDeviceIDsGroupsIgnoreOrder(t, expectedDeviceAffinity[priority], actualDeviceAffinity[priority]) { return } } @@ -861,34 +737,22 @@ func TestDeviceInfo_GetDimensions(t *testing.T) { t.Parallel() deviceInfo := DeviceInfo{ - DeviceAffinity: map[AffinityPriority]DeviceIDs{ + DeviceAffinity: map[Dimension]DeviceIDs{ { - PriorityLevel: 0, - Dimension: Dimension{ - Name: "numa", - Value: "0", - }, + Name: "numa", + Value: "0", }: {"npu-1"}, { - PriorityLevel: 1, - Dimension: Dimension{ - Name: "", - Value: "1", - }, + Name: "", + Value: "1", }: {"npu-2"}, { - PriorityLevel: 2, - Dimension: Dimension{ - Name: "socket", - Value: "", - }, + Name: "socket", + Value: "", }: {"npu-3"}, { - PriorityLevel: 3, - Dimension: Dimension{ - Name: "pcie", - Value: "2", - }, + Name: "pcie", + Value: "2", }: {"npu-4"}, }, }