diff --git a/.changelog/27177.txt b/.changelog/27177.txt new file mode 100644 index 00000000000..59f560271a4 --- /dev/null +++ b/.changelog/27177.txt @@ -0,0 +1,3 @@ +```release-note:bug +scheduler (Enterprise): Fixed a bug where tasks were not placed on same numa node as reserved device +``` diff --git a/command/agent/job_endpoint.go b/command/agent/job_endpoint.go index df055515ed7..8338f33cd6b 100644 --- a/command/agent/job_endpoint.go +++ b/command/agent/job_endpoint.go @@ -1639,6 +1639,7 @@ func ApiResourcesToStructs(in *api.Resources) *structs.Resources { if in.NUMA != nil { out.NUMA = &structs.NUMA{ Affinity: in.NUMA.Affinity, + Devices: slices.Clone(in.NUMA.Devices), } } diff --git a/scheduler/feasible/device.go b/scheduler/feasible/device.go index 4f34b039566..0960dbffdf9 100644 --- a/scheduler/feasible/device.go +++ b/scheduler/feasible/device.go @@ -120,8 +120,13 @@ func (d *deviceAllocator) createOffer(mem *memoryNodeMatcher, ask *structs.Reque // Determine the devices that are feasible based on availability and // constraints for id, devInst := range d.Devices { + // Check if the device works + if !nodeDeviceMatches(d.ctx, devInst.Device, ask) { + continue + } + // Check if we have enough unused instances to use this - assignable := uint64(0) + assignable := []string{} for instanceID, v := range devInst.Instances { if v != 0 { continue @@ -129,16 +134,18 @@ func (d *deviceAllocator) createOffer(mem *memoryNodeMatcher, ask *structs.Reque if !mem.Matches(instanceID, devInst.Device) { continue } - assignable++ - } + if d.deviceIDMatchesConstraint(instanceID, ask.Constraints, devInst.Device) { + assignable = append(assignable, instanceID) + } - // This device doesn't have enough instances - if assignable < ask.Count { - continue + // Don't assign more than the ask + if len(assignable) == int(ask.Count) { + break + } } - // Check if the device works - if !nodeDeviceMatches(d.ctx, devInst.Device, ask) { + // This device doesn't have enough instances + if len(assignable) < int(ask.Count) { continue } @@ -185,19 +192,7 @@ func (d *deviceAllocator) createOffer(mem *memoryNodeMatcher, ask *structs.Reque Vendor: id.Vendor, Type: id.Type, Name: id.Name, - DeviceIDs: make([]string, 0, ask.Count), - } - - assigned := uint64(0) - for id, v := range devInst.Instances { - if v == 0 && assigned < ask.Count && - d.deviceIDMatchesConstraint(id, ask.Constraints, devInst.Device) { - assigned++ - offer.DeviceIDs = append(offer.DeviceIDs, id) - if assigned == ask.Count { - break - } - } + DeviceIDs: assignable, } } diff --git a/scheduler/feasible/device_test.go b/scheduler/feasible/device_test.go index 4dd4d2b277d..13f525b5d8b 100644 --- a/scheduler/feasible/device_test.go +++ b/scheduler/feasible/device_test.go @@ -9,6 +9,7 @@ import ( "github.com/hashicorp/go-set/v3" "github.com/hashicorp/nomad/ci" "github.com/hashicorp/nomad/client/lib/numalib" + "github.com/hashicorp/nomad/client/lib/numalib/hw" "github.com/hashicorp/nomad/helper/uuid" "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/structs" @@ -215,7 +216,7 @@ func TestDeviceAllocator_Allocate_NUMA_node1(t *testing.T) { } // Test that asking for a device with constraints works -func TestDeviceAllocator_Allocate_Constraints(t *testing.T) { +func TestDeviceAllocate_Constraints_NoMemoryMatch(t *testing.T) { ci.Parallel(t) n := multipleNvidiaNode() @@ -349,8 +350,55 @@ func TestDeviceAllocator_Allocate_Constraints(t *testing.T) { } } +func TestDeviceAllocate_Constraints_MemoryMatch(t *testing.T) { + ci.Parallel(t) + + n := multipleNvidiaNode() + nvidia0 := n.NodeResources.Devices[0] + + _, ctx := MockContext(t) + d := newDeviceAllocator(ctx, n) + must.NotNil(t, d) + + testContraints := []*structs.Constraint{ + { + LTarget: "${device.ids}", + Operand: "set_contains", + RTarget: nvidia0.Instances[1].ID, + }, + } + // Build the request + ask := deviceRequest("nvidia/gpu", 1, testContraints, nil) + + mem := &memoryNodeMatcher{ + memoryNode: 1, + topology: &numalib.Topology{ + BusAssociativity: map[string]hw.NodeID{ + nvidia0.Instances[0].Locality.PciBusID: 1, + nvidia0.Instances[1].Locality.PciBusID: 2, + }, + }, + devices: set.From([]string{nvidia0.ID().String()}), + } + out, _, err := d.createOffer(mem, ask) + + // the first memoryNodeMatcher does not have the correct memoryNode + must.ErrorContains(t, err, "no devices match") + must.Nil(t, out) + + // change to the correct node + mem.memoryNode = 2 + out, _, err = d.createOffer(mem, ask) + + must.NoError(t, err) + must.Len(t, 1, out.DeviceIDs) + must.SliceContains(t, collectInstanceIDs(nvidia0), out.DeviceIDs[0]) + must.SliceContains(t, out.DeviceIDs, nvidia0.Instances[1].ID) + +} + // Test that asking for a device with affinities works -func TestDeviceAllocator_Allocate_Affinities(t *testing.T) { +func TestDeviceAllocator_Affinities(t *testing.T) { ci.Parallel(t) n := multipleNvidiaNode()