Skip to content

client: persist correct allocation state #25782

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .changelog/25782.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:bug
client: fixes issue where persisting allocation state stored wrong client status
```
18 changes: 14 additions & 4 deletions client/allocrunner/alloc_runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -1136,10 +1136,20 @@ func (ar *allocRunner) PersistState() error {
return err
}

// TODO: consider persisting deployment state along with task status.
// While we study why only the alloc is persisted, I opted to maintain current
// behavior and not risk adding yet more IO calls unnecessarily.
return ar.stateDB.PutAllocation(ar.Alloc(), cstate.WithBatchMode())
// Currently, the allocRunners alloc is never updated with the correct
// ClientStatus. Instead of directly mutating the alloc held by the
// allocRunner, we make a copy and persist that updated copy to the
// state store. In the event of a client restart, we will restore
// with the correct state.
arCopy := ar.Alloc().Copy()

state := ar.AllocState()

arCopy.ClientStatus = state.ClientStatus
arCopy.ClientDescription = state.ClientDescription
arCopy.DeploymentStatus = state.DeploymentStatus

return ar.stateDB.PutAllocation(arCopy, cstate.WithBatchMode())
}

// Destroy the alloc runner by stopping it if it is still running and cleaning
Expand Down
9 changes: 7 additions & 2 deletions client/allocrunner/alloc_runner_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2026,8 +2026,7 @@ func TestAllocRunner_TerminalUpdate_Destroy(t *testing.T) {
})
}

// TestAllocRunner_PersistState_Destroyed asserts that destroyed allocs don't persist anymore
func TestAllocRunner_PersistState_Destroyed(t *testing.T) {
func TestAllocRunner_PersistState(t *testing.T) {
ci.Parallel(t)

alloc := mock.BatchAlloc()
Expand All @@ -2050,12 +2049,18 @@ func TestAllocRunner_PersistState_Destroyed(t *testing.T) {
require.Fail(t, "timed out waiting for alloc to complete")
}

// force tasks into running state
for _, val := range ar.AllocState().TaskStates {
val.State = structs.TaskStateRunning
}

// test final persisted state upon completion
require.NoError(t, ar.PersistState())
allocs, _, err := conf.StateDB.GetAllAllocations()
require.NoError(t, err)
require.Len(t, allocs, 1)
require.Equal(t, alloc.ID, allocs[0].ID)
require.Equal(t, structs.TaskStateRunning, allocs[0].ClientStatus)
_, ts, err := conf.StateDB.GetTaskRunnerState(alloc.ID, taskName)
require.NoError(t, err)
require.Equal(t, structs.TaskStateDead, ts.State)
Expand Down