Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .changelog/27804.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:bug
deployments: reset ProgressDeadline after pausing and do not fail while paused
```
9 changes: 9 additions & 0 deletions nomad/deploymentwatcher/deployment_watcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -669,6 +669,9 @@ func (w *deploymentWatcher) shouldFail() (fail, rollback bool, err error) {
}

fail = false
if d.Status == structs.DeploymentStatusPaused {
return false, false, nil
}
for tg, dstate := range d.TaskGroups {
// If we are in a canary state we fail if there aren't enough healthy
// allocs to satisfy DesiredCanaries
Expand Down Expand Up @@ -885,10 +888,16 @@ func (w *deploymentWatcher) getEval() *structs.Evaluation {

// getDeploymentStatusUpdate returns a deployment status update
func (w *deploymentWatcher) getDeploymentStatusUpdate(status, desc string) *structs.DeploymentStatusUpdate {
// only pass UpdatedAt value for paused deployments
var updatedAt int64
if status == structs.DeploymentStatusPaused || status == structs.DeploymentStatusRunning {
updatedAt = time.Now().UTC().UnixNano()
}
return &structs.DeploymentStatusUpdate{
DeploymentID: w.deploymentID,
Status: status,
StatusDescription: desc,
UpdatedAt: updatedAt,
}
}

Expand Down
89 changes: 88 additions & 1 deletion nomad/deploymentwatcher/deployments_watcher_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,14 @@
package deploymentwatcher

import (
"bytes"
"fmt"
"io"
"strings"
"testing"
"time"

"github.com/hashicorp/go-hclog"
"github.com/hashicorp/go-memdb"
"github.com/hashicorp/nomad/ci"
"github.com/hashicorp/nomad/helper/pointer"
Expand All @@ -17,6 +20,7 @@ import (
"github.com/hashicorp/nomad/nomad/mock"
"github.com/hashicorp/nomad/nomad/state"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/shoenig/test"
"github.com/shoenig/test/must"
"github.com/shoenig/test/wait"
)
Expand All @@ -31,6 +35,28 @@ func defaultTestDeploymentWatcher(t *testing.T) (*Watcher, *mockBackend) {
return testDeploymentWatcher(t, LimitStateQueriesPerSecond, CrossDeploymentUpdateBatchDuration)
}

// logRecorder is a modification of the logRecorder pattern from hostvolumemanager
// and added to the TestDeploymentWatcher so we can assert that stdout/stderr
// appear in logs
func logRecorderTestDeploymentWatcher(t *testing.T) (*Watcher, *mockBackend, func() string) {
buf := &bytes.Buffer{}
logger := hclog.New(&hclog.LoggerOptions{
Name: "log-recorder",
Output: buf,
Level: hclog.Debug,
IncludeLocation: true,
DisableTime: true,
})
m := newMockBackend(t)
w := NewDeploymentsWatcher(logger, m, nil, nil, LimitStateQueriesPerSecond, CrossDeploymentUpdateBatchDuration)
return w, m, func() string {
bts, err := io.ReadAll(buf)
test.NoError(t, err)
buf.Reset()
return string(bts)
}
}

// Tests that the watcher properly watches for deployments and reconciles them
func TestWatcher_WatchDeployments(t *testing.T) {
ci.Parallel(t)
Expand Down Expand Up @@ -701,7 +727,68 @@ func TestWatcher_PauseDeployment_Pause_Paused(t *testing.T) {
must.Eq(t, structs.DeploymentStatusDescriptionPaused, d.StatusDescription)
}

// Test unpausing a deployment that is paused
// Test that the timeline check is skipped for paused deployment
func TestWatcher_PauseDeployment_IgnoreProgressDeadline(t *testing.T) {
ci.Parallel(t)
w, m, getLogs := logRecorderTestDeploymentWatcher(t)

// Create a job and a deployment
j := mock.Job()
j.TaskGroups[0].Count = 1
j.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy()
j.TaskGroups[0].Update.ProgressDeadline = 50 * time.Millisecond
d := mock.Deployment()
d.JobID = j.ID
d.TaskGroups["web"].DesiredTotal = 1
d.TaskGroups["web"].ProgressDeadline = 50 * time.Millisecond

a := mock.Alloc()
now := time.Now()
a.CreateTime = now.UnixNano()
a.ModifyTime = now.UnixNano()
a.DeploymentID = d.ID
a.Job = j
a.JobID = j.ID
must.NoError(t, m.state.UpsertJob(structs.MsgTypeTestSetup, m.nextIndex(), nil, j))
must.NoError(t, m.state.UpsertDeployment(m.nextIndex(), d))
must.NoError(t, m.state.UpsertAllocs(structs.MsgTypeTestSetup, m.nextIndex(), []*structs.Allocation{a}))
w.SetEnabled(true, m.state)
waitForWatchers(t, w, 1)

// manually pause
req := &structs.DeploymentPauseRequest{
DeploymentID: d.ID,
Pause: true,
}
var resp structs.DeploymentUpdateResponse
must.NoError(t, w.PauseDeployment(req, &resp))
must.Eq(t, 1, watchersCount(w), must.Sprint("watcher should still be active"))

d1, err := m.state.DeploymentByID(nil, d.ID)

state := d1.TaskGroups["web"]
must.NoError(t, err)
must.Eq(t, structs.DeploymentStatusPaused, d1.Status)
must.Eq(t, structs.DeploymentStatusDescriptionPaused, d1.StatusDescription)

watcher, err := w.getOrCreateWatcher(d1.ID)
must.NoError(t, err)
must.NotNil(t, watcher)

time.Sleep(time.Until(state.RequireProgressBy.Add(time.Second)))
cutoff1 := watcher.getDeploymentProgressCutoff(d1)
must.False(t, cutoff1.IsZero())

// confirm deadline was skipped
must.StrContains(t, getLogs(), "skipping deadline")
// confirm RequireProgressBy was set to UpdateTime + ProgressDeadline
modifiedTime := time.Unix(0, d1.ModifyTime)
must.Eq(t, modifiedTime.Add(d1.TaskGroups["web"].ProgressDeadline), d1.TaskGroups["web"].RequireProgressBy)

}

// Test unpausing a deployment that is paused and ensure timeout
// is reset
func TestWatcher_PauseDeployment_Unpause_Paused(t *testing.T) {
ci.Parallel(t)
w, m := defaultTestDeploymentWatcher(t)
Expand Down
11 changes: 11 additions & 0 deletions nomad/state/state_store.go
Original file line number Diff line number Diff line change
Expand Up @@ -4797,6 +4797,17 @@ func (s *StateStore) updateDeploymentStatusImpl(index uint64, u *structs.Deploym
copy.ModifyIndex = index
copy.ModifyTime = u.UpdatedAt

// check each TaskGroup for ProgressDeadline and reset RequireProgressBy
// to u.UpdatedAt + ProgressDeadline if neither equal 0
for _, dState := range copy.TaskGroups {
if dState == nil {
continue
}
if u.UpdatedAt != 0 && dState.ProgressDeadline != 0 {
updateTime := time.Unix(0, u.UpdatedAt)
dState.RequireProgressBy = updateTime.Add(dState.ProgressDeadline)
}
}
// Insert the deployment
if err := txn.Insert("deployment", copy); err != nil {
return err
Expand Down
Loading