Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion client/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -2901,7 +2901,7 @@ func (c *Client) updateAlloc(update *structs.Allocation) {
// Reconnect unknown allocations if they were updated and are not terminal.
reconnect := update.ClientStatus == structs.AllocClientStatusUnknown &&
update.AllocModifyIndex > alloc.AllocModifyIndex &&
(!update.ServerTerminalStatus() || !alloc.PreventReplaceOnDisconnect())
!update.ServerTerminalStatus()
if reconnect {
err = ar.Reconnect(update)
if err != nil {
Expand Down
2 changes: 1 addition & 1 deletion nomad/plan_apply.go
Original file line number Diff line number Diff line change
Expand Up @@ -806,7 +806,7 @@ func isValidForDisconnectedNode(plan *structs.Plan, nodeID string) bool {
// as non reschedulables when lost or if the allocs are being updated to lost.
func isValidForDownNode(plan *structs.Plan, nodeID string) bool {
for _, alloc := range plan.NodeAllocation[nodeID] {
if !(alloc.ClientStatus == structs.AllocClientStatusUnknown && alloc.PreventReplaceOnDisconnect()) &&
if !(alloc.ClientStatus == structs.AllocClientStatusUnknown && !alloc.ReplaceOnDisconnect()) &&
(alloc.ClientStatus != structs.AllocClientStatusLost) {
return false
}
Expand Down
36 changes: 26 additions & 10 deletions nomad/structs/structs.go
Original file line number Diff line number Diff line change
Expand Up @@ -7634,9 +7634,9 @@ func (tg *TaskGroup) Replace() bool {
return *tg.Disconnect.Replace
}

// GetDisconnectLostTimeout is a helper meant to simplify the logic for
// GetDisconnectLostAfter is a helper meant to simplify the logic for
// getting the Disconnect.LostAfter field of a task group.
func (tg *TaskGroup) GetDisconnectLostTimeout() time.Duration {
func (tg *TaskGroup) GetDisconnectLostAfter() time.Duration {
if tg.Disconnect != nil {
return tg.Disconnect.LostAfter
}
Expand Down Expand Up @@ -11582,14 +11582,26 @@ func (a *Allocation) DisconnectTimeout(now time.Time) time.Time {

tg := a.Job.LookupTaskGroup(a.TaskGroup)

timeout := tg.GetDisconnectLostTimeout()
timeout := tg.GetDisconnectLostAfter()
if timeout == 0 {
return now
}

return now.Add(timeout)
}

// DisconnectLostAfter is a helper to get the Disconnect.LostAfter for an allocation
func (a *Allocation) DisconnectLostAfter() time.Duration {
if a.Job != nil {
tg := a.Job.LookupTaskGroup(a.TaskGroup)
if tg != nil {
return tg.GetDisconnectLostAfter()
}
}

return 0
}

// SupportsDisconnectedClients determines whether both the server and the task group
// are configured to allow the allocation to reconnect after network connectivity
// has been lost and then restored.
Expand All @@ -11601,24 +11613,24 @@ func (a *Allocation) SupportsDisconnectedClients(serverSupportsDisconnectedClien
if a.Job != nil {
tg := a.Job.LookupTaskGroup(a.TaskGroup)
if tg != nil {
return tg.GetDisconnectLostTimeout() != 0
return tg.GetDisconnectLostAfter() != 0
}
}

return false
}

// PreventReplaceOnDisconnect determines if an alloc allows to have a replacement
// ReplaceOnDisconnect determines if an alloc can be replaced
// when Disconnected.
func (a *Allocation) PreventReplaceOnDisconnect() bool {
func (a *Allocation) ReplaceOnDisconnect() bool {
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Renamed to use positive function naming, which avoids double negatives.

if a.Job != nil {
tg := a.Job.LookupTaskGroup(a.TaskGroup)
if tg != nil {
return !tg.Replace()
return tg.Replace()
}
}

return false
return true
}

// NextDelay returns a duration after which the allocation can be rescheduled.
Expand Down Expand Up @@ -11834,8 +11846,12 @@ func (a *Allocation) Expired(now time.Time) bool {
return false
}

timeout := tg.GetDisconnectLostTimeout()
if timeout == 0 && tg.Replace() {
if !tg.Replace() {
return false
}

timeout := tg.GetDisconnectLostAfter()
if timeout == 0 {
return false
}

Expand Down
52 changes: 26 additions & 26 deletions scheduler/reconciler/allocs_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,8 @@ func TestAllocSet_filterByTainted(t *testing.T) {
ID: "draining",
DrainStrategy: mock.DrainNode().DrainStrategy,
},
"lost": {
ID: "lost",
"down": {
ID: "down",
Status: structs.NodeStatusDown,
},
"nil": nil,
Expand Down Expand Up @@ -162,12 +162,12 @@ func TestAllocSet_filterByTainted(t *testing.T) {
Job: testJob,
NodeID: "draining",
},
// Terminal allocs are always untainted, even on lost nodes
// Terminal allocs are always untainted, even on down nodes
"untainted4": {
ID: "untainted4",
ClientStatus: structs.AllocClientStatusComplete,
Job: testJob,
NodeID: "lost",
NodeID: "down",
},
// Non-terminal alloc with migrate=true should migrate on a draining node
"migrating1": {
Expand Down Expand Up @@ -207,12 +207,12 @@ func TestAllocSet_filterByTainted(t *testing.T) {
Job: testJob,
NodeID: "draining",
},
// Terminal allocs are always untainted, even on lost nodes
// Terminal allocs are always untainted, even on down nodes
"untainted4": {
ID: "untainted4",
ClientStatus: structs.AllocClientStatusComplete,
Job: testJob,
NodeID: "lost",
NodeID: "down",
},
},
migrate: allocSet{
Expand Down Expand Up @@ -247,19 +247,19 @@ func TestAllocSet_filterByTainted(t *testing.T) {
// false failures, so don't perform that test if in this case.
skipNilNodeTest: true,
all: allocSet{
// Non-terminal allocs on lost nodes are lost
// Non-terminal allocs on lost nodes are down
"lost1": {
ID: "lost1",
ClientStatus: structs.AllocClientStatusPending,
Job: testJob,
NodeID: "lost",
NodeID: "down",
},
// Non-terminal allocs on lost nodes are lost
// Non-terminal allocs on lost nodes are down
"lost2": {
ID: "lost2",
ClientStatus: structs.AllocClientStatusRunning,
Job: testJob,
NodeID: "lost",
NodeID: "down",
},
},
untainted: allocSet{},
Expand All @@ -268,19 +268,19 @@ func TestAllocSet_filterByTainted(t *testing.T) {
reconnecting: allocSet{},
ignore: allocSet{},
lost: allocSet{
// Non-terminal allocs on lost nodes are lost
// Non-terminal allocs on lost nodes are down
"lost1": {
ID: "lost1",
ClientStatus: structs.AllocClientStatusPending,
Job: testJob,
NodeID: "lost",
NodeID: "down",
},
// Non-terminal allocs on lost nodes are lost
// Non-terminal allocs on lost nodes are down
"lost2": {
ID: "lost2",
ClientStatus: structs.AllocClientStatusRunning,
Job: testJob,
NodeID: "lost",
NodeID: "down",
},
},
expiring: allocSet{},
Expand Down Expand Up @@ -864,12 +864,12 @@ func TestAllocSet_filterByTainted(t *testing.T) {
Job: testJobSingle,
NodeID: "draining",
},
// Terminal allocs are always untainted, even on lost nodes
// Terminal allocs are always untainted, even on down nodes
"untainted4": {
ID: "untainted4",
ClientStatus: structs.AllocClientStatusComplete,
Job: testJobSingle,
NodeID: "lost",
NodeID: "down",
},
// Non-terminal alloc with migrate=true should migrate on a draining node
"migrating1": {
Expand Down Expand Up @@ -909,12 +909,12 @@ func TestAllocSet_filterByTainted(t *testing.T) {
Job: testJobSingle,
NodeID: "draining",
},
// Terminal allocs are always untainted, even on lost nodes
// Terminal allocs are always untainted, even on down nodes
"untainted4": {
ID: "untainted4",
ClientStatus: structs.AllocClientStatusComplete,
Job: testJobSingle,
NodeID: "lost",
NodeID: "down",
},
},
migrate: allocSet{
Expand Down Expand Up @@ -949,19 +949,19 @@ func TestAllocSet_filterByTainted(t *testing.T) {
// false failures, so don't perform that test if in this case.
skipNilNodeTest: true,
all: allocSet{
// Non-terminal allocs on lost nodes are lost
// Non-terminal allocs on lost nodes are down
"lost1": {
ID: "lost1",
ClientStatus: structs.AllocClientStatusPending,
Job: testJobSingle,
NodeID: "lost",
NodeID: "down",
},
// Non-terminal allocs on lost nodes are lost
// Non-terminal allocs on lost nodes are down
"lost2": {
ID: "lost2",
ClientStatus: structs.AllocClientStatusRunning,
Job: testJobSingle,
NodeID: "lost",
NodeID: "down",
},
},
untainted: allocSet{},
Expand All @@ -970,19 +970,19 @@ func TestAllocSet_filterByTainted(t *testing.T) {
reconnecting: allocSet{},
ignore: allocSet{},
lost: allocSet{
// Non-terminal allocs on lost nodes are lost
// Non-terminal allocs on lost nodes are down
"lost1": {
ID: "lost1",
ClientStatus: structs.AllocClientStatusPending,
Job: testJobSingle,
NodeID: "lost",
NodeID: "down",
},
// Non-terminal allocs on lost nodes are lost
// Non-terminal allocs on lost nodes are down
"lost2": {
ID: "lost2",
ClientStatus: structs.AllocClientStatusRunning,
Job: testJobSingle,
NodeID: "lost",
NodeID: "down",
},
},
expiring: allocSet{},
Expand Down
Loading
Loading