Skip to content

Commit d8c58a7

Browse files
authored
host volumes: account for proposed allocs in unmet claims (#27470)
When a dynamic host volume requests are marked "sticky", we need to ensure that any new allocations are preferentially placed on nodes with volumes that have been previously claimed but are no longer claimed. But when we have multiple allocations in an allocation, we're not counting allocations that have been previously placed against the number of unmet claims. This results in placement failures, particularly in cases where a volume has been deleted out from under the claim. Remove the proposed allocations from the list of claims in `SetVolumes`, which is called at the start of each iteration of the nodes while looking for a placement. Rework the test to exercise this behavior across placing multiple allocations in an evaluation, including an allocation with a valid claim and one that's been evicted. Ref: https://hashicorp.atlassian.net/browse/NMD-1062 Fixes: #27002
1 parent 4c86c8c commit d8c58a7

3 files changed

Lines changed: 190 additions & 88 deletions

File tree

.changelog/27470.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
```release-note:bug
2+
host volumes: Fixed a bug where allocations that request volumes with sticky=true could not be placed if previous allocations in the job claimed volumes
3+
```

scheduler/feasible/feasible.go

Lines changed: 48 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -212,15 +212,15 @@ type HostVolumeChecker struct {
212212
namespace string
213213
jobID string
214214
taskGroupName string
215-
claims []*structs.TaskGroupHostVolumeClaim
215+
unmetClaims []*structs.TaskGroupHostVolumeClaim
216216
}
217217

218218
// NewHostVolumeChecker creates a HostVolumeChecker from a set of volumes
219219
func NewHostVolumeChecker(ctx Context) *HostVolumeChecker {
220220
hostVolumeChecker := &HostVolumeChecker{
221-
ctx: ctx,
222-
claims: []*structs.TaskGroupHostVolumeClaim{},
223-
volumeReqs: []*structs.VolumeRequest{},
221+
ctx: ctx,
222+
unmetClaims: []*structs.TaskGroupHostVolumeClaim{},
223+
volumeReqs: []*structs.VolumeRequest{},
224224
}
225225

226226
return hostVolumeChecker
@@ -232,16 +232,46 @@ func (h *HostVolumeChecker) SetVolumes(allocName, ns, jobID, taskGroupName strin
232232
h.jobID = jobID
233233
h.taskGroupName = taskGroupName
234234
h.volumeReqs = []*structs.VolumeRequest{}
235-
236-
storedClaims, _ := h.ctx.State().TaskGroupHostVolumeClaimsByFields(nil, state.TgvcSearchableFields{
237-
Namespace: ns,
238-
JobID: jobID,
239-
TaskGroupName: taskGroupName,
240-
})
241-
235+
h.unmetClaims = []*structs.TaskGroupHostVolumeClaim{}
236+
237+
storedClaims, _ := h.ctx.State().TaskGroupHostVolumeClaimsByFields(
238+
nil, state.TgvcSearchableFields{
239+
Namespace: ns,
240+
JobID: jobID,
241+
})
242+
243+
// if we have previous claims for sticky volumes, we want to make sure these
244+
// are getting picked first. But we don't want to count claims that have
245+
// been met by proposed allocs already
246+
NEXT:
242247
for raw := storedClaims.Next(); raw != nil; raw = storedClaims.Next() {
243248
claim := raw.(*structs.TaskGroupHostVolumeClaim)
244-
h.claims = append(h.claims, claim)
249+
250+
alloc, err := h.ctx.State().AllocByID(nil, claim.AllocID)
251+
if err != nil || alloc == nil {
252+
continue
253+
}
254+
vol, err := h.ctx.State().HostVolumeByID(nil, ns, claim.VolumeID, false)
255+
if err != nil || vol == nil {
256+
continue
257+
}
258+
259+
// the proposed alloc might be the exact same alloc or it could be an
260+
// alloc from earlier in this eval that we've successfully placed
261+
proposed, err := h.ctx.ProposedAllocs(alloc.NodeID)
262+
if err == nil {
263+
for _, proposedAlloc := range proposed {
264+
if proposedAlloc.ID == claim.AllocID ||
265+
(proposedAlloc.TaskGroup == taskGroupName &&
266+
vol.NodeID == alloc.NodeID &&
267+
vol.ID == claim.VolumeID) {
268+
continue NEXT
269+
}
270+
}
271+
h.unmetClaims = append(h.unmetClaims, claim)
272+
} else {
273+
h.unmetClaims = append(h.unmetClaims, claim)
274+
}
245275
}
246276

247277
for _, req := range volumes {
@@ -308,20 +338,21 @@ func (h *HostVolumeChecker) hasVolumes(n *structs.Node) bool {
308338
if req.Sticky {
309339
// the node is feasible if there are no remaining claims to
310340
// fulfill or if there's an exact match
311-
if len(h.claims) == 0 {
341+
if len(h.unmetClaims) == 0 {
312342
return true
313343
}
314344

315-
for _, c := range h.claims {
345+
for _, c := range h.unmetClaims {
316346
if c.VolumeID == vol.ID {
317347
// if we have a match for a volume claim, delete this
318348
// claim from the claims list in the feasibility
319349
// checker. This is needed for situations when jobs get
320350
// scaled up and new allocations need to be placed on
321351
// the same node.
322-
h.claims = slices.DeleteFunc(h.claims, func(c *structs.TaskGroupHostVolumeClaim) bool {
323-
return c.VolumeID == vol.ID
324-
})
352+
h.unmetClaims = slices.DeleteFunc(h.unmetClaims,
353+
func(claim *structs.TaskGroupHostVolumeClaim) bool {
354+
return claim.VolumeID == vol.ID
355+
})
325356
return true
326357
}
327358
}

0 commit comments

Comments
 (0)