Skip to content

Commit 5199c2b

Browse files
committed
Mark installs and available rollbacks correctly during upgrade
1 parent 7b3817a commit 5199c2b

File tree

6 files changed

+98
-43
lines changed

6 files changed

+98
-43
lines changed

internal/pkg/agent/application/application.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,9 +150,10 @@ func New(
150150
return nil
151151
})
152152
if updateInstallDescErr != nil {
153-
log.Warnf("Error removing rolled back version %s installed in %s")
153+
log.Warnf("Error setting current version as active in installDescriptor: %s", updateInstallDescErr)
154154
}
155155
}
156+
156157
}
157158

158159
// monitoring is not supported in bootstrap mode https://github.com/elastic/elastic-agent/issues/1761

internal/pkg/agent/application/upgrade/rollback_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -725,6 +725,6 @@ func createUpdateMarker(t *testing.T, log *logger.Logger, topDir, newAgentVersio
725725
time.Now(),
726726
newAgentInstall,
727727
oldAgentInstall,
728-
nil, nil, disableRollbackWindow)
728+
nil, nil, nil)
729729
require.NoError(t, err, "error writing fake update marker")
730730
}

internal/pkg/agent/application/upgrade/step_mark.go

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import (
1616
"github.com/elastic/elastic-agent/internal/pkg/agent/application/upgrade/details"
1717
"github.com/elastic/elastic-agent/internal/pkg/agent/errors"
1818
"github.com/elastic/elastic-agent/internal/pkg/fleetapi"
19+
v1 "github.com/elastic/elastic-agent/pkg/api/v1"
1920
"github.com/elastic/elastic-agent/pkg/core/logger"
2021
"github.com/elastic/elastic-agent/pkg/version"
2122
)
@@ -142,7 +143,7 @@ type updateActiveCommitFunc func(log *logger.Logger, topDirPath, hash string, wr
142143

143144
// markUpgrade marks update happened so we can handle grace period
144145
func markUpgradeProvider(updateActiveCommit updateActiveCommitFunc, writeFile writeFileFunc) markUpgradeFunc {
145-
return func(log *logger.Logger, dataDirPath string, updatedOn time.Time, agent, previousAgent agentInstall, action *fleetapi.ActionUpgrade, upgradeDetails *details.Details, rollbackWindow time.Duration) error {
146+
return func(log *logger.Logger, dataDirPath string, updatedOn time.Time, agent, previousAgent agentInstall, action *fleetapi.ActionUpgrade, upgradeDetails *details.Details, availableRollbacks []v1.AgentInstallDesc) error {
146147

147148
if len(previousAgent.hash) > hashLen {
148149
previousAgent.hash = previousAgent.hash[:hashLen]
@@ -160,16 +161,21 @@ func markUpgradeProvider(updateActiveCommit updateActiveCommitFunc, writeFile wr
160161
Details: upgradeDetails,
161162
}
162163

163-
if rollbackWindow > disableRollbackWindow && agent.parsedVersion != nil && !agent.parsedVersion.Less(*Version_9_2_0_SNAPSHOT) {
164+
if agent.parsedVersion != nil && !agent.parsedVersion.Less(*Version_9_2_0_SNAPSHOT) {
164165
// if we have a not empty rollback window, write the prev version in the rollbacks_available field
165166
// we also need to check the destination version because the manual rollback and delayed cleanup will be
166167
// handled by that version of agent, so it needs to be recent enough
167-
marker.RollbacksAvailable = []RollbackAvailable{
168-
{
169-
Version: previousAgent.version,
170-
Home: previousAgent.versionedHome,
171-
ValidUntil: updatedOn.Add(rollbackWindow),
172-
},
168+
for _, rollback := range availableRollbacks {
169+
rollbackAvailable := RollbackAvailable{
170+
Version: rollback.Version,
171+
Home: rollback.VersionedHome,
172+
}
173+
174+
if rollback.TTL != nil {
175+
rollbackAvailable.ValidUntil = *rollback.TTL
176+
}
177+
178+
marker.RollbacksAvailable = append(marker.RollbacksAvailable, rollbackAvailable)
173179
}
174180
}
175181

internal/pkg/agent/application/upgrade/step_mark_test.go

Lines changed: 44 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import (
1616

1717
"github.com/elastic/elastic-agent/internal/pkg/agent/application/upgrade/details"
1818
"github.com/elastic/elastic-agent/internal/pkg/fleetapi"
19+
v1 "github.com/elastic/elastic-agent/pkg/api/v1"
1920
"github.com/elastic/elastic-agent/pkg/core/logger"
2021
"github.com/elastic/elastic-agent/pkg/core/logger/loggertest"
2122
agtversion "github.com/elastic/elastic-agent/pkg/version"
@@ -86,14 +87,15 @@ func TestMarkUpgrade(t *testing.T) {
8687
var parsed920SNAPSHOT = agtversion.NewParsedSemVer(9, 2, 0, "SNAPSHOT", "")
8788
// fix a timestamp (truncated to the second because of loss of precision during marshalling/unmarshalling)
8889
updatedOnNow := time.Now().UTC().Truncate(time.Second)
90+
twentyFourHoursFromNow := updatedOnNow.Add(24 * time.Hour)
8991

9092
type args struct {
91-
updatedOn time.Time
92-
currentAgent agentInstall
93-
previousAgent agentInstall
94-
action *fleetapi.ActionUpgrade
95-
details *details.Details
96-
rollbackWindow time.Duration
93+
updatedOn time.Time
94+
currentAgent agentInstall
95+
previousAgent agentInstall
96+
action *fleetapi.ActionUpgrade
97+
details *details.Details
98+
availableRollbacks []v1.AgentInstallDesc
9799
}
98100
type workingDirHook func(t *testing.T, dataDir string)
99101

@@ -130,9 +132,9 @@ func TestMarkUpgrade(t *testing.T) {
130132
hash: "prvagt",
131133
versionedHome: filepath.Join("data", "elastic-agent-1.2.3-SNAPSHOT-prvagt"),
132134
},
133-
action: nil,
134-
details: details.NewDetails("4.5.6-SNAPSHOT", details.StateReplacing, ""),
135-
rollbackWindow: 0,
135+
action: nil,
136+
details: details.NewDetails("4.5.6-SNAPSHOT", details.StateReplacing, ""),
137+
availableRollbacks: nil,
136138
},
137139
wantErr: assert.Error,
138140
},
@@ -152,9 +154,9 @@ func TestMarkUpgrade(t *testing.T) {
152154
hash: "prvagt",
153155
versionedHome: filepath.Join("data", "elastic-agent-1.2.3-SNAPSHOT-prvagt"),
154156
},
155-
action: nil,
156-
details: details.NewDetails("4.5.6-SNAPSHOT", details.StateReplacing, ""),
157-
rollbackWindow: 0,
157+
action: nil,
158+
details: details.NewDetails("4.5.6-SNAPSHOT", details.StateReplacing, ""),
159+
availableRollbacks: nil,
158160
},
159161
wantErr: assert.NoError,
160162
assertAfterMark: func(t *testing.T, dataDir string) {
@@ -197,9 +199,20 @@ func TestMarkUpgrade(t *testing.T) {
197199
hash: "prvagt",
198200
versionedHome: filepath.Join("data", "elastic-agent-1.2.3-SNAPSHOT-prvagt"),
199201
},
200-
action: nil,
201-
details: details.NewDetails("4.5.6-SNAPSHOT", details.StateReplacing, ""),
202-
rollbackWindow: 7 * 24 * time.Hour,
202+
action: nil,
203+
details: details.NewDetails("4.5.6-SNAPSHOT", details.StateReplacing, ""),
204+
availableRollbacks: []v1.AgentInstallDesc{
205+
{
206+
OptionalTTLItem: v1.OptionalTTLItem{
207+
TTL: &twentyFourHoursFromNow,
208+
},
209+
Version: "1.2.3-SNAPSHOT",
210+
Hash: "prvagt",
211+
VersionedHome: filepath.Join("data", "elastic-agent-1.2.3-SNAPSHOT-prvagt"),
212+
Flavor: "basic",
213+
Active: false,
214+
},
215+
},
203216
},
204217
wantErr: assert.NoError,
205218
assertAfterMark: func(t *testing.T, dataDir string) {
@@ -241,9 +254,20 @@ func TestMarkUpgrade(t *testing.T) {
241254
hash: "prvagt",
242255
versionedHome: filepath.Join("data", "elastic-agent-1.2.3-SNAPSHOT-prvagt"),
243256
},
244-
action: nil,
245-
details: details.NewDetails("9.2.0-SNAPSHOT", details.StateReplacing, ""),
246-
rollbackWindow: 7 * 24 * time.Hour,
257+
action: nil,
258+
details: details.NewDetails("9.2.0-SNAPSHOT", details.StateReplacing, ""),
259+
availableRollbacks: []v1.AgentInstallDesc{
260+
{
261+
OptionalTTLItem: v1.OptionalTTLItem{
262+
TTL: &twentyFourHoursFromNow,
263+
},
264+
Version: "1.2.3-SNAPSHOT",
265+
Hash: "prvagt",
266+
VersionedHome: filepath.Join("data", "elastic-agent-1.2.3-SNAPSHOT-prvagt"),
267+
Flavor: "basic",
268+
Active: false,
269+
},
270+
},
247271
},
248272
wantErr: assert.NoError,
249273
assertAfterMark: func(t *testing.T, dataDir string) {
@@ -270,7 +294,7 @@ func TestMarkUpgrade(t *testing.T) {
270294
{
271295
Version: "1.2.3-SNAPSHOT",
272296
Home: filepath.Join("data", "elastic-agent-1.2.3-SNAPSHOT-prvagt"),
273-
ValidUntil: updatedOnNow.Add(7 * 24 * time.Hour),
297+
ValidUntil: twentyFourHoursFromNow,
274298
},
275299
},
276300
}
@@ -295,7 +319,7 @@ func TestMarkUpgrade(t *testing.T) {
295319
tc.setupBeforeMark(t, dataDir)
296320
}
297321

298-
err := markUpgrade(log, dataDir, tc.args.updatedOn, tc.args.currentAgent, tc.args.previousAgent, tc.args.action, tc.args.details, tc.args.rollbackWindow)
322+
err := markUpgrade(log, dataDir, tc.args.updatedOn, tc.args.currentAgent, tc.args.previousAgent, tc.args.action, tc.args.details, tc.args.availableRollbacks)
299323
tc.wantErr(t, err)
300324
if tc.assertAfterMark != nil {
301325
tc.assertAfterMark(t, dataDir)

internal/pkg/agent/application/upgrade/upgrade.go

Lines changed: 36 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ type unpackHandler interface {
9191
type copyActionStoreFunc func(log *logger.Logger, newHome string) error
9292
type copyRunDirectoryFunc func(log *logger.Logger, oldRunPath, newRunPath string) error
9393
type fileDirCopyFunc func(from, to string, opts ...filecopy.Options) error
94-
type markUpgradeFunc func(log *logger.Logger, dataDirPath string, updatedOn time.Time, agent, previousAgent agentInstall, action *fleetapi.ActionUpgrade, upgradeDetails *details.Details, rollbackWindow time.Duration) error
94+
type markUpgradeFunc func(log *logger.Logger, dataDirPath string, updatedOn time.Time, agent, previousAgent agentInstall, action *fleetapi.ActionUpgrade, upgradeDetails *details.Details, availableRollbacks []v1.AgentInstallDesc) error
9595
type changeSymlinkFunc func(log *logger.Logger, topDirPath, symlinkPath, newTarget string) error
9696
type rollbackInstallFunc func(ctx context.Context, log *logger.Logger, topDirPath, versionedHome, oldVersionedHome string, ids installDescriptorSource) error
9797

@@ -414,7 +414,6 @@ func (u *Upgrader) Upgrade(ctx context.Context, version string, rollback bool, s
414414
return nil, fmt.Errorf("calculating home path relative to top, home: %q top: %q : %w", paths.Home(), paths.Top(), err)
415415
}
416416

417-
//FIXME make it nicer
418417
_, err = u.installDescriptorSource.AddInstallDesc(
419418
v1.AgentInstallDesc{Version: version, VersionedHome: unpackRes.VersionedHome, Hash: unpackRes.Hash, Flavor: detectedFlavor, Active: false},
420419
)
@@ -465,13 +464,9 @@ func (u *Upgrader) Upgrade(ctx context.Context, version string, rollback bool, s
465464
rollbackWindow = u.upgradeSettings.Rollback.Window
466465
}
467466

468-
var currentInstallTTL *time.Time = nil
469-
if rollbackWindow > 0 {
470-
currentInstallTTLVar := time.Now().Add(rollbackWindow)
471-
currentInstallTTL = &currentInstallTTLVar
472-
}
473-
474-
_, err = u.installDescriptorSource.ModifyInstallDesc(
467+
// timestamp marking the moment the links have been rotated. It will be used for TTL calculations of pre-existing elastic-agent installs
468+
rotationTimestamp := time.Now()
469+
modifiedInstallDescriptor, err := u.installDescriptorSource.ModifyInstallDesc(
475470
func(desc *v1.AgentInstallDesc) error {
476471
if desc.VersionedHome == unpackRes.VersionedHome {
477472
desc.Active = true
@@ -480,16 +475,17 @@ func (u *Upgrader) Upgrade(ctx context.Context, version string, rollback bool, s
480475
desc.Active = false
481476
}
482477

478+
// set the TTL only for the current install
483479
if desc.VersionedHome == currentVersionedHome {
484-
desc.TTL = currentInstallTTL
480+
desc.TTL = getCurrentInstallTTL(rollbackWindow, rotationTimestamp)
485481
}
486482

487483
return nil
488484
},
489485
)
490486

491487
if err != nil {
492-
err = fmt.Errorf("error encountered when adding install description: %w", err)
488+
err = fmt.Errorf("error encountered when setting new install description as active: %w", err)
493489

494490
rollbackErr := rollbackInstall(ctx, u.log, paths.Top(), unpackRes.VersionedHome, currentVersionedHome, u.installDescriptorSource)
495491
if rollbackErr != nil {
@@ -517,12 +513,14 @@ func (u *Upgrader) Upgrade(ctx context.Context, version string, rollback bool, s
517513
versionedHome: currentVersionedHome,
518514
}
519515

516+
availableRollbacks := getAvailableRollbacks(rollbackWindow, rotationTimestamp, unpackRes.VersionedHome, modifiedInstallDescriptor)
517+
520518
if err := u.markUpgrade(u.log,
521519
paths.Data(), // data dir to place the marker in
522520
time.Now(),
523521
current, // new agent version data
524522
previous, // old agent version data
525-
action, det, rollbackWindow); err != nil {
523+
action, det, availableRollbacks); err != nil {
526524
u.log.Errorw("Rolling back: marking upgrade failed", "error.message", err)
527525
rollbackErr := u.rollbackInstall(ctx, u.log, paths.Top(), hashedDir, currentVersionedHome, u.installDescriptorSource)
528526
return nil, goerrors.Join(err, rollbackErr)
@@ -556,6 +554,32 @@ func (u *Upgrader) Upgrade(ctx context.Context, version string, rollback bool, s
556554
return cb, nil
557555
}
558556

557+
func getAvailableRollbacks(rollbackWindow time.Duration, now time.Time, newVersionedHome string, descriptor *v1.InstallDescriptor) []v1.AgentInstallDesc {
558+
if rollbackWindow == 0 {
559+
// if there's no rollback window it means that no rollback should survive the watcher cleanup at the end of the grace period.
560+
return nil
561+
}
562+
563+
res := make([]v1.AgentInstallDesc, 0, len(descriptor.AgentInstalls))
564+
for _, installDesc := range descriptor.AgentInstalls {
565+
if installDesc.VersionedHome != newVersionedHome && (installDesc.TTL == nil || now.Before(*installDesc.TTL)) {
566+
// this is a valid possible rollback target, so we have to keep it available beyond the end of the grace period
567+
res = append(res, installDesc)
568+
}
569+
}
570+
return res
571+
}
572+
573+
func getCurrentInstallTTL(rollbackWindow time.Duration, now time.Time) *time.Time {
574+
if rollbackWindow == 0 {
575+
// no rollback window, no TTL
576+
return nil
577+
}
578+
579+
currentInstallTTLVar := now.Add(rollbackWindow)
580+
return &currentInstallTTLVar
581+
}
582+
559583
func (u *Upgrader) rollbackToPreviousVersion(ctx context.Context, topDir string, now time.Time, version string, action *fleetapi.ActionUpgrade) (reexec.ShutdownCallbackFn, error) {
560584
if version == "" {
561585
return nil, ErrEmptyRollbackVersion

internal/pkg/agent/application/upgrade/upgrade_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1610,7 +1610,7 @@ func TestUpgradeErrorHandling(t *testing.T) {
16101610
upgrader.rollbackInstall = func(ctx context.Context, log *logger.Logger, topDirPath, versionedHome, oldVersionedHome string, ids installDescriptorSource) error {
16111611
return nil
16121612
}
1613-
upgrader.markUpgrade = func(log *logger.Logger, dataDirPath string, updatedOn time.Time, agent, previousAgent agentInstall, action *fleetapi.ActionUpgrade, upgradeDetails *details.Details, rollbackWindow time.Duration) error {
1613+
upgrader.markUpgrade = func(log *logger.Logger, dataDirPath string, updatedOn time.Time, agent, previousAgent agentInstall, action *fleetapi.ActionUpgrade, upgradeDetails *details.Details, availableRollbacks []v1.AgentInstallDesc) error {
16141614
return testError
16151615
}
16161616
},

0 commit comments

Comments
 (0)