diff --git a/.mockery.yaml b/.mockery.yaml index 8d8a896feb3..0b2e6c8c63b 100644 --- a/.mockery.yaml +++ b/.mockery.yaml @@ -5,6 +5,9 @@ filename: mocks.go template-data: unroll-variadic: true packages: + github.com/elastic/elastic-agent/internal/pkg/agent/application: + interfaces: + rollbacksSource: {} github.com/elastic/elastic-agent/internal/pkg/agent/application/actions/handlers: interfaces: Uploader: {} diff --git a/internal/pkg/agent/application/application.go b/internal/pkg/agent/application/application.go index 825b078cceb..948db85f88f 100644 --- a/internal/pkg/agent/application/application.go +++ b/internal/pkg/agent/application/application.go @@ -7,11 +7,14 @@ package application import ( "context" "fmt" + "os" + "path/filepath" "time" "go.elastic.co/apm/v2" componentmonitoring "github.com/elastic/elastic-agent/internal/pkg/agent/application/monitoring/component" + "github.com/elastic/elastic-agent/internal/pkg/agent/install" "github.com/elastic/go-ucfg" @@ -48,6 +51,11 @@ import ( "github.com/elastic/elastic-agent/version" ) +type rollbacksSource interface { + Set(map[string]upgrade.TTLMarker) error + Get() (map[string]upgrade.TTLMarker, error) +} + // CfgOverrider allows for application driven overrides of configuration read from disk. type CfgOverrider func(cfg *configuration.Configuration) @@ -131,6 +139,10 @@ func New( isMonitoringSupported := !disableMonitoring && cfg.Settings.V1MonitoringEnabled availableRollbacksSource := upgrade.NewTTLMarkerRegistry(log, paths.Top()) + if upgrade.IsUpgradeable() { + // If we are not running in a container, check and normalize the install descriptor before we start the agent + normalizeAgentInstalls(log, paths.Top(), time.Now(), initialUpdateMarker, availableRollbacksSource) + } upgrader, err := upgrade.NewUpgrader(log, cfg.Settings.DownloadConfig, cfg.Settings.Upgrade, agentInfo, new(upgrade.AgentWatcherHelper), availableRollbacksSource) if err != nil { return nil, nil, nil, fmt.Errorf("failed to create upgrader: %w", err) @@ -296,6 +308,86 @@ func New( return coord, configMgr, varsManager, nil } +// normalizeAgentInstalls will attempt to normalize the agent installs and related TTL markers: +// - if we just rolled back: the update marker is checked and in case of rollback we clean up the TTL marker of the rolled back version +// - check all the entries: +// - verify that the home directory for that install still exists (remove TTL markers for what does not exist anymore) +// - check if the agent install: if it is no longer valid collect the versioned home and the TTL marker for deletion +// +// This function will NOT error out, it will log any errors it encounters as warnings but any error must be treated as non-fatal +func normalizeAgentInstalls(log *logger.Logger, topDir string, now time.Time, initialUpdateMarker *upgrade.UpdateMarker, rollbackSource rollbacksSource) { + // Check if we rolled back and update the TTL markers + if initialUpdateMarker != nil && initialUpdateMarker.Details != nil && initialUpdateMarker.Details.State == details.StateRollback { + // Reset the TTL for the current version if we are coming off a rollback + rollbacks, err := rollbackSource.Get() + if err != nil { + log.Warnf("Error getting available rollbacks from rollbackSource during startup check: %s", err) + return + } + + // remove the current versioned home TTL marker + delete(rollbacks, initialUpdateMarker.PrevVersionedHome) + err = rollbackSource.Set(rollbacks) + if err != nil { + log.Warnf("Error setting available rollbacks during normalization: %s", err) + return + } + } + + // check if we need to cleanup old agent installs + rollbacks, err := rollbackSource.Get() + if err != nil { + log.Warnf("Error getting available rollbacks during startup check: %s", err) + return + } + + var versionedHomesToCleanup []string + for versionedHome, ttlMarker := range rollbacks { + + versionedHomeAbsPath := filepath.Join(topDir, versionedHome) + + if versionedHomeAbsPath == paths.HomeFrom(topDir) { + // skip the current install + log.Warnf("Found a TTL marker for the currently running agent at %s. Skipping cleanup...", versionedHome) + continue + } + + _, err = os.Stat(versionedHomeAbsPath) + if errors.Is(err, os.ErrNotExist) { + log.Warnf("Versioned home %s corresponding to agent TTL marker %+v is not found on disk", versionedHomeAbsPath, ttlMarker) + versionedHomesToCleanup = append(versionedHomesToCleanup, versionedHome) + continue + } + + if err != nil { + log.Warnf("error checking versioned home %s for agent install: %s", versionedHomeAbsPath, err.Error()) + continue + } + + if now.After(ttlMarker.ValidUntil) { + // the install directory exists but it's expired. Remove the files. + log.Infof("agent TTL marker %+v marks %q as expired, removing directory", ttlMarker, versionedHomeAbsPath) + if cleanupErr := install.RemoveBut(versionedHomeAbsPath, true); cleanupErr != nil { + log.Warnf("Error removing directory %q: %s", versionedHomeAbsPath, cleanupErr) + } else { + log.Infof("Directory %q was removed", versionedHomeAbsPath) + versionedHomesToCleanup = append(versionedHomesToCleanup, versionedHome) + } + } + } + + if len(versionedHomesToCleanup) > 0 { + log.Infof("removing install descriptor(s) for %v", versionedHomesToCleanup) + for _, versionedHomeToCleanup := range versionedHomesToCleanup { + delete(rollbacks, versionedHomeToCleanup) + } + err = rollbackSource.Set(rollbacks) + if err != nil { + log.Warnf("Error removing install descriptor(s): %s", err) + } + } +} + func mergeFleetConfig(ctx context.Context, rawConfig *config.Config) (storage.Store, *configuration.Configuration, error) { path := paths.AgentConfigFile() store, err := storage.NewEncryptedDiskStore(ctx, path) diff --git a/internal/pkg/agent/application/application_test.go b/internal/pkg/agent/application/application_test.go index 3c4ea820ea0..0c7fb737f7f 100644 --- a/internal/pkg/agent/application/application_test.go +++ b/internal/pkg/agent/application/application_test.go @@ -7,6 +7,9 @@ package application import ( "context" "fmt" + "os" + "path/filepath" + "runtime" "testing" "time" @@ -15,6 +18,9 @@ import ( "github.com/elastic/elastic-agent-libs/logp" "github.com/elastic/elastic-agent/internal/pkg/agent/application/info" + "github.com/elastic/elastic-agent/internal/pkg/agent/application/paths" + "github.com/elastic/elastic-agent/internal/pkg/agent/application/upgrade" + "github.com/elastic/elastic-agent/internal/pkg/agent/application/upgrade/details" "github.com/elastic/elastic-agent/internal/pkg/config" "github.com/elastic/elastic-agent/internal/pkg/testutils" "github.com/elastic/elastic-agent/pkg/core/logger/loggertest" @@ -302,3 +308,188 @@ func TestInjectOutputOverrides(t *testing.T) { }) } } + +func Test_normalizeInstallDescriptorAtStartup(t *testing.T) { + + now := time.Now() + tomorrow := now.Add(24 * time.Hour) + yesterday := now.Add(-24 * time.Hour) + + tests := []struct { + name string + setup func(t *testing.T, topDir string) (*upgrade.UpdateMarker, rollbacksSource) + postNormalizeAssertions func(t *testing.T, topDir string, initialUpdateMarker *upgrade.UpdateMarker) + }{ + { + name: "happy path: single install, no rollbacks, no modifications needed", + setup: func(t *testing.T, topDir string) (*upgrade.UpdateMarker, rollbacksSource) { + mockRollbackSource := newMockRollbacksSource(t) + mockRollbackSource.EXPECT().Get().Return(nil, nil) + return nil, mockRollbackSource + }, + + postNormalizeAssertions: nil, + }, + { + name: "Agent was manually rolled back: rolled back install is removed from the list", + setup: func(t *testing.T, topDir string) (*upgrade.UpdateMarker, rollbacksSource) { + newAgentInstallPath := createFakeAgentInstall(t, topDir, "4.5.6", "newversionhash", true) + oldAgentInstallPath := createFakeAgentInstall(t, topDir, "1.2.3", "oldversionhash", true) + + mockRollbackSource := newMockRollbacksSource(t) + mockRollbackSource.EXPECT().Get().Return(map[string]upgrade.TTLMarker{ + oldAgentInstallPath: { + Version: "1.2.3", + Hash: "oldversionhash", + ValidUntil: tomorrow, + }, + }, nil) + + updateMarker := &upgrade.UpdateMarker{ + Version: "4.5.6", + Hash: "newversionhash", + VersionedHome: newAgentInstallPath, + UpdatedOn: now, + PrevVersion: "1.2.3", + PrevHash: "oldversionhash", + PrevVersionedHome: oldAgentInstallPath, + Acked: false, + Action: nil, + Details: &details.Details{ + TargetVersion: "4.5.6", + State: details.StateRollback, + ActionID: "", + Metadata: details.Metadata{ + Reason: details.ReasonManualRollbackPattern, + }, + }, + } + + // expect code to clear the rollback + mockRollbackSource.EXPECT().Set(map[string]upgrade.TTLMarker{}).Return(nil) + return updateMarker, mockRollbackSource + }, + postNormalizeAssertions: nil, + }, + { + name: "Entries not having a matching install directory will be removed from the list", + setup: func(t *testing.T, topDir string) (*upgrade.UpdateMarker, rollbacksSource) { + _ = createFakeAgentInstall(t, topDir, "4.5.6", "newversionhash", true) + oldAgentInstallPath := createFakeAgentInstall(t, topDir, "1.2.3", "oldversionhash", true) + + mockRollbackSource := newMockRollbacksSource(t) + nonExistingVersionedHome := filepath.Join("data", "thisdirectorydoesnotexist") + mockRollbackSource.EXPECT().Get().Return(map[string]upgrade.TTLMarker{ + oldAgentInstallPath: { + Version: "1.2.3", + Hash: "oldversionhash", + ValidUntil: tomorrow, + }, + nonExistingVersionedHome: { + Version: "0.0.0", + Hash: "nonExistingHash", + ValidUntil: tomorrow, + }, + }, nil) + + mockRollbackSource.EXPECT().Set(map[string]upgrade.TTLMarker{ + oldAgentInstallPath: { + Version: "1.2.3", + Hash: "oldversionhash", + ValidUntil: tomorrow, + }, + }).Return(nil) + return nil, mockRollbackSource + }, + postNormalizeAssertions: nil, + }, + { + name: "Expired installs still existing on disk will be removed from the install list and removed from disk", + setup: func(t *testing.T, topDir string) (*upgrade.UpdateMarker, rollbacksSource) { + _ = createFakeAgentInstall(t, topDir, "4.5.6", "newversionhash", true) + oldAgentInstallPath := createFakeAgentInstall(t, topDir, "1.2.3", "oldversionhash", true) + + // assert that the versionedHome of the old install is the same we check in postNormalizeAssertions + assert.Equal(t, oldAgentInstallPath, filepath.Join("data", "elastic-agent-1.2.3-oldver"), + "Unexpected old install versioned home. Post normalize assertions may not be working") + + mockRollbackSource := newMockRollbacksSource(t) + mockRollbackSource.EXPECT().Get().Return( + map[string]upgrade.TTLMarker{ + oldAgentInstallPath: { + Version: "1.2.3", + Hash: "oldver", + ValidUntil: yesterday, + }, + }, + nil, + ) + // expect removal of the existing ttlmarker + mockRollbackSource.EXPECT().Set(map[string]upgrade.TTLMarker{}).Return(nil) + return nil, mockRollbackSource + }, + postNormalizeAssertions: func(t *testing.T, topDir string, _ *upgrade.UpdateMarker) { + assert.NoDirExists(t, filepath.Join(topDir, "data", "elastic-agent-1.2.3-oldver")) + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + logger, _ := loggertest.New(t.Name()) + tmpDir := t.TempDir() + updateMarker, installSource := tt.setup(t, tmpDir) + normalizeAgentInstalls(logger, tmpDir, now, updateMarker, installSource) + if tt.postNormalizeAssertions != nil { + tt.postNormalizeAssertions(t, tmpDir, updateMarker) + } + }) + } +} + +// createFakeAgentInstall (copied from the upgrade package tests) will create a mock agent install within topDir, possibly +// using the version in the directory name, depending on useVersionInPath it MUST return the path to the created versionedHome +// relative to topDir, to mirror what step_unpack returns +func createFakeAgentInstall(t *testing.T, topDir, version, hash string, useVersionInPath bool) string { + + // create versioned home + versionedHome := fmt.Sprintf("elastic-agent-%s", hash[:upgrade.HashLen]) + if useVersionInPath { + // use the version passed as parameter + versionedHome = fmt.Sprintf("elastic-agent-%s-%s", version, hash[:upgrade.HashLen]) + } + relVersionedHomePath := filepath.Join("data", versionedHome) + absVersionedHomePath := filepath.Join(topDir, relVersionedHomePath) + + // recalculate the binary path and launch a mkDirAll to account for MacOS weirdness + // (the extra nesting of elastic agent binary within versionedHome) + absVersioneHomeBinaryPath := paths.BinaryPath(absVersionedHomePath, "") + err := os.MkdirAll(absVersioneHomeBinaryPath, 0o750) + require.NoError(t, err, "error creating fake install versioned home directory (including binary path) %q", absVersioneHomeBinaryPath) + + // place a few directories in the fake install + absComponentsDirPath := filepath.Join(absVersionedHomePath, "components") + err = os.MkdirAll(absComponentsDirPath, 0o750) + require.NoError(t, err, "error creating fake install components directory %q", absVersionedHomePath) + + absLogsDirPath := filepath.Join(absVersionedHomePath, "logs") + err = os.MkdirAll(absLogsDirPath, 0o750) + require.NoError(t, err, "error creating fake install logs directory %q", absLogsDirPath) + + absRunDirPath := filepath.Join(absVersionedHomePath, "run") + err = os.MkdirAll(absRunDirPath, 0o750) + require.NoError(t, err, "error creating fake install run directory %q", absRunDirPath) + + // put some placeholder for files + agentExecutableName := upgrade.AgentName + if runtime.GOOS == "windows" { + agentExecutableName += ".exe" + } + err = os.WriteFile(paths.BinaryPath(absVersionedHomePath, agentExecutableName), []byte(fmt.Sprintf("Placeholder for agent %s", version)), 0o750) + require.NoErrorf(t, err, "error writing elastic agent binary placeholder %q", agentExecutableName) + fakeLogPath := filepath.Join(absLogsDirPath, "fakelog.ndjson") + err = os.WriteFile(fakeLogPath, []byte(fmt.Sprintf("Sample logs for agent %s", version)), 0o750) + require.NoErrorf(t, err, "error writing fake log placeholder %q", fakeLogPath) + + // return the path relative to top exactly like the step_unpack does + return relVersionedHomePath +} diff --git a/internal/pkg/agent/application/mocks.go b/internal/pkg/agent/application/mocks.go new file mode 100644 index 00000000000..e2dd99b9e9c --- /dev/null +++ b/internal/pkg/agent/application/mocks.go @@ -0,0 +1,148 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License 2.0; +// you may not use this file except in compliance with the Elastic License 2.0. + +// Code generated by mockery; DO NOT EDIT. +// github.com/vektra/mockery +// template: testify + +package application + +import ( + mock "github.com/stretchr/testify/mock" + + "github.com/elastic/elastic-agent/internal/pkg/agent/application/upgrade" +) + +// newMockRollbacksSource creates a new instance of mockRollbacksSource. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. +// The first argument is typically a *testing.T value. +func newMockRollbacksSource(t interface { + mock.TestingT + Cleanup(func()) +}) *mockRollbacksSource { + mock := &mockRollbacksSource{} + mock.Mock.Test(t) + + t.Cleanup(func() { mock.AssertExpectations(t) }) + + return mock +} + +// mockRollbacksSource is an autogenerated mock type for the rollbacksSource type +type mockRollbacksSource struct { + mock.Mock +} + +type mockRollbacksSource_Expecter struct { + mock *mock.Mock +} + +func (_m *mockRollbacksSource) EXPECT() *mockRollbacksSource_Expecter { + return &mockRollbacksSource_Expecter{mock: &_m.Mock} +} + +// Get provides a mock function for the type mockRollbacksSource +func (_mock *mockRollbacksSource) Get() (map[string]upgrade.TTLMarker, error) { + ret := _mock.Called() + + if len(ret) == 0 { + panic("no return value specified for Get") + } + + var r0 map[string]upgrade.TTLMarker + var r1 error + if returnFunc, ok := ret.Get(0).(func() (map[string]upgrade.TTLMarker, error)); ok { + return returnFunc() + } + if returnFunc, ok := ret.Get(0).(func() map[string]upgrade.TTLMarker); ok { + r0 = returnFunc() + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(map[string]upgrade.TTLMarker) + } + } + if returnFunc, ok := ret.Get(1).(func() error); ok { + r1 = returnFunc() + } else { + r1 = ret.Error(1) + } + return r0, r1 +} + +// mockRollbacksSource_Get_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'Get' +type mockRollbacksSource_Get_Call struct { + *mock.Call +} + +// Get is a helper method to define mock.On call +func (_e *mockRollbacksSource_Expecter) Get() *mockRollbacksSource_Get_Call { + return &mockRollbacksSource_Get_Call{Call: _e.mock.On("Get")} +} + +func (_c *mockRollbacksSource_Get_Call) Run(run func()) *mockRollbacksSource_Get_Call { + _c.Call.Run(func(args mock.Arguments) { + run() + }) + return _c +} + +func (_c *mockRollbacksSource_Get_Call) Return(stringToTTLMarker map[string]upgrade.TTLMarker, err error) *mockRollbacksSource_Get_Call { + _c.Call.Return(stringToTTLMarker, err) + return _c +} + +func (_c *mockRollbacksSource_Get_Call) RunAndReturn(run func() (map[string]upgrade.TTLMarker, error)) *mockRollbacksSource_Get_Call { + _c.Call.Return(run) + return _c +} + +// Set provides a mock function for the type mockRollbacksSource +func (_mock *mockRollbacksSource) Set(stringToTTLMarker map[string]upgrade.TTLMarker) error { + ret := _mock.Called(stringToTTLMarker) + + if len(ret) == 0 { + panic("no return value specified for Set") + } + + var r0 error + if returnFunc, ok := ret.Get(0).(func(map[string]upgrade.TTLMarker) error); ok { + r0 = returnFunc(stringToTTLMarker) + } else { + r0 = ret.Error(0) + } + return r0 +} + +// mockRollbacksSource_Set_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'Set' +type mockRollbacksSource_Set_Call struct { + *mock.Call +} + +// Set is a helper method to define mock.On call +// - stringToTTLMarker map[string]upgrade.TTLMarker +func (_e *mockRollbacksSource_Expecter) Set(stringToTTLMarker interface{}) *mockRollbacksSource_Set_Call { + return &mockRollbacksSource_Set_Call{Call: _e.mock.On("Set", stringToTTLMarker)} +} + +func (_c *mockRollbacksSource_Set_Call) Run(run func(stringToTTLMarker map[string]upgrade.TTLMarker)) *mockRollbacksSource_Set_Call { + _c.Call.Run(func(args mock.Arguments) { + var arg0 map[string]upgrade.TTLMarker + if args[0] != nil { + arg0 = args[0].(map[string]upgrade.TTLMarker) + } + run( + arg0, + ) + }) + return _c +} + +func (_c *mockRollbacksSource_Set_Call) Return(err error) *mockRollbacksSource_Set_Call { + _c.Call.Return(err) + return _c +} + +func (_c *mockRollbacksSource_Set_Call) RunAndReturn(run func(stringToTTLMarker map[string]upgrade.TTLMarker) error) *mockRollbacksSource_Set_Call { + _c.Call.Return(run) + return _c +} diff --git a/internal/pkg/agent/application/upgrade/manual_rollback.go b/internal/pkg/agent/application/upgrade/manual_rollback.go new file mode 100644 index 00000000000..664ba586da1 --- /dev/null +++ b/internal/pkg/agent/application/upgrade/manual_rollback.go @@ -0,0 +1,262 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License 2.0; +// you may not use this file except in compliance with the Elastic License 2.0. + +package upgrade + +import ( + "context" + "errors" + "fmt" + "os" + "path/filepath" + "time" + + "github.com/elastic/elastic-agent/internal/pkg/agent/application/filelock" + "github.com/elastic/elastic-agent/internal/pkg/agent/application/paths" + "github.com/elastic/elastic-agent/internal/pkg/agent/application/reexec" + "github.com/elastic/elastic-agent/internal/pkg/agent/application/upgrade/details" + "github.com/elastic/elastic-agent/internal/pkg/fleetapi" + "github.com/elastic/elastic-agent/internal/pkg/release" + "github.com/elastic/elastic-agent/pkg/core/logger" + "github.com/elastic/elastic-agent/pkg/version" + agtversion "github.com/elastic/elastic-agent/version" +) + +const disableRollbackWindow = time.Duration(0) + +func (u *Upgrader) rollbackToPreviousVersion(ctx context.Context, topDir string, now time.Time, version string, action *fleetapi.ActionUpgrade) (reexec.ShutdownCallbackFn, error) { + if version == "" { + return nil, ErrEmptyRollbackVersion + } + + // check that the upgrade marker exists and is accessible + updateMarkerPath := markerFilePath(paths.DataFrom(topDir)) + _, err := os.Stat(updateMarkerPath) + if err != nil && !errors.Is(err, os.ErrNotExist) { + return nil, fmt.Errorf("stat() on upgrade marker %q failed: %w", updateMarkerPath, err) + } + + var watcherExecutable string + var versionedHomeToRollbackTo string + var updateMarkerExistsBeforeRollback bool + + if errors.Is(err, os.ErrNotExist) { + // there is no upgrade marker (the rollback was requested after the watcher grace period had elapsed), we need + // to extract available rollbacks from agent installs + watcherExecutable, versionedHomeToRollbackTo, err = rollbackUsingAgentInstalls(u.log, u.watcherHelper, u.availableRollbacksSource, topDir, now, version, u.markUpgrade) + } else { + // If upgrade marker is available, we need to gracefully stop any watcher process, read the available rollbacks from + // the upgrade marker and then proceed with rollback + updateMarkerExistsBeforeRollback = true + watcherExecutable, versionedHomeToRollbackTo, err = rollbackUsingUpgradeMarker(ctx, u.log, u.watcherHelper, topDir, now, version) + } + + if err != nil { + return nil, err + } + + // rollback + watcherCmd, err := u.watcherHelper.InvokeWatcher(u.log, watcherExecutable, "--rollback", versionedHomeToRollbackTo) + if err != nil { + if !updateMarkerExistsBeforeRollback { + // best effort: cleanup the fake update marker + cleanupErr := os.Remove(updateMarkerPath) + if cleanupErr != nil { + u.log.Errorf("Error cleaning up fake upgrade marker: %v", cleanupErr) + } + } else { + // attempt to resume the "normal" watcher to continue watching + _, restoreWatcherErr := u.watcherHelper.InvokeWatcher(u.log, watcherExecutable) + if restoreWatcherErr != nil { + u.log.Errorf("Error resuming watch after rollback error : %v", restoreWatcherErr) + } + } + + return nil, fmt.Errorf("starting rollback command: %w", err) + } + + u.log.Infof("rollback command started successfully, PID: %d", watcherCmd.Process.Pid) + return nil, nil +} + +func rollbackUsingAgentInstalls(log *logger.Logger, watcherHelper WatcherHelper, source availableRollbacksSource, topDir string, now time.Time, rollbackVersion string, markUpgrade markUpgradeFunc) (string, string, error) { + // read the available installs + availableRollbacks, err := source.Get() + if err != nil { + return "", "", fmt.Errorf("retrieving available rollbacks: %w", err) + } + // check for the version we want to rollback to + var targetInstall string + var targetTTLMarker TTLMarker + for versionedHome, ttlMarker := range availableRollbacks { + if ttlMarker.Version == rollbackVersion && now.Before(ttlMarker.ValidUntil) { + // found a valid target + targetInstall = versionedHome + targetTTLMarker = ttlMarker + break + } + } + + if targetInstall == "" { + return "", "", fmt.Errorf("version %q not listed among the available rollbacks: %w", rollbackVersion, ErrNoRollbacksAvailable) + } + + prevAgentParsedVersion, err := version.ParseVersion(targetTTLMarker.Version) + if err != nil { + return "", "", fmt.Errorf("parsing version of target install %+v: %w", targetInstall, err) + } + + // write out a fake upgrade marker to make the upgrade details state happy + currentHome := paths.VersionedHome(topDir) + relCurVersionedHome, err := filepath.Rel(topDir, currentHome) + if err != nil { + return "", "", fmt.Errorf("getting current install home path %q relative to top %q: %w", currentHome, topDir, err) + } + curAgentInstall := agentInstall{ + parsedVersion: agtversion.GetParsedAgentPackageVersion(), + version: release.VersionWithSnapshot(), + hash: release.Commit(), + versionedHome: relCurVersionedHome, + } + + prevAgentInstall := agentInstall{ + parsedVersion: prevAgentParsedVersion, + version: targetTTLMarker.Version, + hash: targetTTLMarker.Hash, + versionedHome: targetInstall, + } + + upgradeDetails := details.NewDetails(release.VersionWithSnapshot(), details.StateRequested, "" /*action.ID*/) + err = markUpgrade(log, paths.DataFrom(topDir), now, curAgentInstall, prevAgentInstall, nil /*action*/, upgradeDetails, nil) + if err != nil { + return "", "", fmt.Errorf("creating upgrade marker: %w", err) + } + + // return watcher executable and versionedHome to rollback to + watcherExecutable := watcherHelper.SelectWatcherExecutable(topDir, prevAgentInstall, curAgentInstall) + return watcherExecutable, targetInstall, nil +} + +func rollbackUsingUpgradeMarker(ctx context.Context, log *logger.Logger, watcherHelper WatcherHelper, topDir string, now time.Time, version string) (string, string, error) { + // read the upgrade marker + updateMarker, err := LoadMarker(paths.DataFrom(topDir)) + if err != nil { + return "", "", fmt.Errorf("loading marker: %w", err) + } + + if updateMarker == nil { + return "", "", ErrNilUpdateMarker + } + + // extract the agent installs involved in the upgrade and select the most appropriate watcher executable + previous, current, err := extractAgentInstallsFromMarker(updateMarker) + if err != nil { + return "", "", fmt.Errorf("extracting current and previous install details: %w", err) + } + watcherExecutable := watcherHelper.SelectWatcherExecutable(topDir, previous, current) + + var selectedRollbackVersionedHome string + + err = withTakeOverWatcher(ctx, log, topDir, watcherHelper, func() error { + // read the upgrade marker + updateMarker, err = LoadMarker(paths.DataFrom(topDir)) + if err != nil { + return fmt.Errorf("loading marker: %w", err) + } + + if updateMarker == nil { + return ErrNilUpdateMarker + } + + if len(updateMarker.RollbacksAvailable) == 0 { + return ErrNoRollbacksAvailable + } + + for versionedHome, rollback := range updateMarker.RollbacksAvailable { + if rollback.Version == version && now.Before(rollback.ValidUntil) { + selectedRollbackVersionedHome = versionedHome + break + } + } + if selectedRollbackVersionedHome == "" { + return fmt.Errorf("version %q not listed among the available rollbacks: %w", version, ErrNoRollbacksAvailable) + } + return nil + }) + + if err != nil { + // Invoke watcher again (now that we released the watcher applocks) + _, invokeWatcherErr := watcherHelper.InvokeWatcher(log, watcherExecutable) + if invokeWatcherErr != nil { + return "", "", errors.Join(err, fmt.Errorf("invoking watcher: %w", invokeWatcherErr)) + } + return "", "", err + } + + return watcherExecutable, selectedRollbackVersionedHome, nil +} + +func withTakeOverWatcher(ctx context.Context, log *logger.Logger, topDir string, watcherHelper WatcherHelper, f func() error) error { + watcherApplock, err := watcherHelper.TakeOverWatcher(ctx, log, topDir) + if err != nil { + return fmt.Errorf("taking over watcher processes: %w", err) + } + defer func(watcherApplock *filelock.AppLocker) { + releaseWatcherAppLockerErr := watcherApplock.Unlock() + if releaseWatcherAppLockerErr != nil { + log.Warnw("error releasing watcher applock", "error", releaseWatcherAppLockerErr) + } + }(watcherApplock) + + return f() +} + +func extractAgentInstallsFromMarker(updateMarker *UpdateMarker) (previous agentInstall, current agentInstall, err error) { + previousParsedVersion, err := version.ParseVersion(updateMarker.PrevVersion) + if err != nil { + return previous, current, fmt.Errorf("parsing previous version %q: %w", updateMarker.PrevVersion, err) + } + previous = agentInstall{ + parsedVersion: previousParsedVersion, + version: updateMarker.PrevVersion, + hash: updateMarker.PrevHash, + versionedHome: updateMarker.PrevVersionedHome, + } + + currentParsedVersion, err := version.ParseVersion(updateMarker.Version) + if err != nil { + return previous, current, fmt.Errorf("parsing current version %q: %w", updateMarker.Version, err) + } + current = agentInstall{ + parsedVersion: currentParsedVersion, + version: updateMarker.Version, + hash: updateMarker.Hash, + versionedHome: updateMarker.VersionedHome, + } + + return previous, current, nil +} + +func getAvailableRollbacks(rollbackWindow time.Duration, now time.Time, currentVersion string, parsedCurrentVersion *version.ParsedSemVer, currentVersionedHome string, currentHash string) map[string]TTLMarker { + if rollbackWindow == disableRollbackWindow { + // if there's no rollback window it means that no rollback should survive the watcher cleanup at the end of the grace period. + return nil + } + + if parsedCurrentVersion == nil || parsedCurrentVersion.Less(*Version_9_3_0_SNAPSHOT) { + // the version we are upgrading to does not support manual rollbacks + return nil + } + + // when multiple rollbacks will be supported, read the existing descriptor + // at this stage we can get by with a single rollback + res := make(map[string]TTLMarker, 1) + res[currentVersionedHome] = TTLMarker{ + Version: currentVersion, + Hash: currentHash, + ValidUntil: now.Add(rollbackWindow), + } + + return res +} diff --git a/internal/pkg/agent/application/upgrade/manual_rollback_test.go b/internal/pkg/agent/application/upgrade/manual_rollback_test.go new file mode 100644 index 00000000000..ee2e840a7f6 --- /dev/null +++ b/internal/pkg/agent/application/upgrade/manual_rollback_test.go @@ -0,0 +1,503 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License 2.0; +// you may not use this file except in compliance with the Elastic License 2.0. + +package upgrade + +import ( + "errors" + "fmt" + "os" + "os/exec" + "path/filepath" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" + "gopkg.in/yaml.v3" + + "github.com/elastic/elastic-agent/internal/pkg/agent/application/filelock" + "github.com/elastic/elastic-agent/internal/pkg/agent/application/info" + "github.com/elastic/elastic-agent/internal/pkg/agent/application/paths" + "github.com/elastic/elastic-agent/internal/pkg/agent/application/upgrade/artifact" + "github.com/elastic/elastic-agent/internal/pkg/agent/application/upgrade/details" + "github.com/elastic/elastic-agent/internal/pkg/agent/configuration" + "github.com/elastic/elastic-agent/internal/pkg/release" + "github.com/elastic/elastic-agent/pkg/core/logger/loggertest" + "github.com/elastic/elastic-agent/pkg/version" + agtversion "github.com/elastic/elastic-agent/version" +) + +func TestManualRollback(t *testing.T) { + const updatemarkerwatching456NoRollbackAvailable = ` + version: 4.5.6 + hash: newver + versioned_home: data/elastic-agent-4.5.6-newver + updated_on: 2025-07-11T10:11:12.131415Z + prev_version: 1.2.3 + prev_hash: oldver + prev_versioned_home: data/elastic-agent-1.2.3-oldver + acked: false + action: null + details: + target_version: 4.5.6 + state: UPG_WATCHING + metadata: + retry_until: null + ` + const updatemarkerwatching456 = ` + version: 4.5.6 + hash: newver + versioned_home: data/elastic-agent-4.5.6-newver + updated_on: 2025-07-11T10:11:12.131415Z + prev_version: 1.2.3 + prev_hash: oldver + prev_versioned_home: data/elastic-agent-1.2.3-oldver + acked: false + action: null + details: + target_version: 4.5.6 + state: UPG_WATCHING + metadata: + retry_until: null + desired_outcome: UPGRADE + rollbacks_available: + "data/elastic-agent-1.2.3-oldver": + version: 1.2.3 + valid_until: 2025-07-18T10:11:12.131415Z + ` + + parsed123Version, err := version.ParseVersion("1.2.3") + require.NoError(t, err) + parsed456Version, err := version.ParseVersion("4.5.6") + require.NoError(t, err) + + agentInstall123 := agentInstall{ + parsedVersion: parsed123Version, + version: "1.2.3", + hash: "oldver", + versionedHome: "data/elastic-agent-1.2.3-oldver", + } + + agentInstall456 := agentInstall{ + parsedVersion: parsed456Version, + version: "4.5.6", + hash: "newver", + versionedHome: "data/elastic-agent-4.5.6-newver", + } + + agentInstallCurrent := agentInstall{ + parsedVersion: agtversion.GetParsedAgentPackageVersion(), + version: release.VersionWithSnapshot(), + hash: release.Commit(), + // Versioned home should contain the version but since the path does not really exist we fallback to the legacy format with just the hash + // versionedHome: filepath.Join("data", fmt.Sprintf("elastic-agent-%s-%s", release.VersionWithSnapshot(), release.ShortCommit())), + versionedHome: filepath.Join("data", fmt.Sprintf("elastic-agent-%s", release.ShortCommit())), + } + + // this is the updated_on timestamp in the example + nowBeforeTTL, err := time.Parse(time.RFC3339, `2025-07-11T10:11:12Z`) + require.NoError(t, err, "error parsing nowBeforeTTL") + + // the update marker yaml assume 7d TLL for rollbacks, let's make an extra day pass + nowAfterTTL := nowBeforeTTL.Add(8 * 24 * time.Hour) + + // save the current timestamp, useful for TTL-based testing + aMomentInTime := time.Now() + aMomentTomorrow := aMomentInTime.Add(24 * time.Hour) + aMomentAgo := aMomentInTime.Add(-1 * time.Second) + + type setupF func(t *testing.T, topDir string, agent *info.MockAgent, watcherHelper *MockWatcherHelper, rollbacksSource *mockAvailableRollbacksSource) + type postRollbackAssertionsF func(t *testing.T, topDir string) + type testcase struct { + name string + setup setupF + artifactSettings *artifact.Config + upgradeSettings *configuration.UpgradeConfig + now time.Time + version string + wantErr assert.ErrorAssertionFunc + additionalAsserts postRollbackAssertionsF + } + + testcases := []testcase{ + { + name: "no rollback version - rollback fails", + setup: func(t *testing.T, topDir string, agent *info.MockAgent, watcherHelper *MockWatcherHelper, rollbacksSource *mockAvailableRollbacksSource) { + //do not setup anything here, let the rollback fail + }, + artifactSettings: artifact.DefaultConfig(), + upgradeSettings: configuration.DefaultUpgradeConfig(), + version: "", + wantErr: func(t assert.TestingT, err error, i ...interface{}) bool { + return assert.ErrorIs(t, err, ErrEmptyRollbackVersion) + }, + additionalAsserts: nil, + }, + { + name: "no update marker - rollback fails", + setup: func(t *testing.T, topDir string, agent *info.MockAgent, watcherHelper *MockWatcherHelper, rollbacksSource *mockAvailableRollbacksSource) { + //do not setup anything here, let the rollback fail + rollbacksSource.EXPECT().Get().Return(nil, nil) + }, + artifactSettings: artifact.DefaultConfig(), + upgradeSettings: configuration.DefaultUpgradeConfig(), + version: "1.2.3", + wantErr: func(t assert.TestingT, err error, i ...interface{}) bool { + return assert.ErrorIs(t, err, ErrNoRollbacksAvailable) + }, + additionalAsserts: nil, + }, + { + name: "update marker is malformed - rollback fails", + setup: func(t *testing.T, topDir string, agent *info.MockAgent, watcherHelper *MockWatcherHelper, rollbacksSource *mockAvailableRollbacksSource) { + err := os.WriteFile(markerFilePath(paths.DataFrom(topDir)), []byte("this is not a proper YAML file"), 0600) + require.NoError(t, err, "error setting up update marker") + locker := filelock.NewAppLocker(topDir, "watcher.lock") + err = locker.TryLock() + require.NoError(t, err, "error locking initial watcher AppLocker") + // there's no takeover watcher so no expectation on that or InvokeWatcher + t.Cleanup(func() { + unlockErr := locker.Unlock() + assert.NoError(t, unlockErr, "error unlocking initial watcher AppLocker") + }) + }, + artifactSettings: artifact.DefaultConfig(), + upgradeSettings: configuration.DefaultUpgradeConfig(), + version: "1.2.3", + wantErr: assert.Error, + additionalAsserts: nil, + }, + { + name: "update marker ok but rollback available is empty - error", + setup: func(t *testing.T, topDir string, agent *info.MockAgent, watcherHelper *MockWatcherHelper, rollbacksSource *mockAvailableRollbacksSource) { + err := os.WriteFile(markerFilePath(paths.DataFrom(topDir)), []byte(updatemarkerwatching456NoRollbackAvailable), 0600) + require.NoError(t, err, "error setting up update marker") + locker := filelock.NewAppLocker(topDir, "watcher.lock") + err = locker.TryLock() + require.NoError(t, err, "error locking initial watcher AppLocker") + watcherHelper.EXPECT().TakeOverWatcher(t.Context(), mock.Anything, topDir).Return(locker, nil) + newerWatcherExecutable := filepath.Join(topDir, "data", "elastic-agent-4.5.6-newver", "elastic-agent") + watcherHelper.EXPECT().SelectWatcherExecutable(topDir, agentInstall123, agentInstall456).Return(newerWatcherExecutable) + watcherHelper.EXPECT().InvokeWatcher(mock.Anything, newerWatcherExecutable).Return(&exec.Cmd{Path: newerWatcherExecutable, Args: []string{"watch", "for realsies"}}, nil) + }, + artifactSettings: artifact.DefaultConfig(), + upgradeSettings: configuration.DefaultUpgradeConfig(), + version: "2.3.4-unknown", + wantErr: func(t assert.TestingT, err error, i ...interface{}) bool { + return assert.ErrorIs(t, err, ErrNoRollbacksAvailable) + }, + additionalAsserts: func(t *testing.T, topDir string) { + // marker should be untouched + filePath := markerFilePath(paths.DataFrom(topDir)) + require.FileExists(t, filePath) + markerFileBytes, readMarkerErr := os.ReadFile(filePath) + require.NoError(t, readMarkerErr) + + assert.YAMLEq(t, updatemarkerwatching456NoRollbackAvailable, string(markerFileBytes), "update marker should be untouched") + }, + }, + { + name: "update marker ok but version is not available for rollback - error", + setup: func(t *testing.T, topDir string, agent *info.MockAgent, watcherHelper *MockWatcherHelper, rollbacksSource *mockAvailableRollbacksSource) { + err := os.WriteFile(markerFilePath(paths.DataFrom(topDir)), []byte(updatemarkerwatching456), 0600) + require.NoError(t, err, "error setting up update marker") + locker := filelock.NewAppLocker(topDir, "watcher.lock") + err = locker.TryLock() + require.NoError(t, err, "error locking initial watcher AppLocker") + watcherHelper.EXPECT().TakeOverWatcher(t.Context(), mock.Anything, topDir).Return(locker, nil) + newerWatcherExecutable := filepath.Join(topDir, "data", "elastic-agent-4.5.6-newver", "elastic-agent") + watcherHelper.EXPECT().SelectWatcherExecutable(topDir, agentInstall123, agentInstall456).Return(newerWatcherExecutable) + watcherHelper.EXPECT().InvokeWatcher(mock.Anything, newerWatcherExecutable).Return(&exec.Cmd{Path: newerWatcherExecutable, Args: []string{"watch", "for realsies"}}, nil) + }, + artifactSettings: artifact.DefaultConfig(), + upgradeSettings: configuration.DefaultUpgradeConfig(), + version: "2.3.4-unknown", + wantErr: func(t assert.TestingT, err error, i ...interface{}) bool { + return assert.ErrorIs(t, err, ErrNoRollbacksAvailable) + }, + additionalAsserts: func(t *testing.T, topDir string) { + // marker should be untouched + filePath := markerFilePath(paths.DataFrom(topDir)) + require.FileExists(t, filePath) + markerFileBytes, readMarkerErr := os.ReadFile(filePath) + require.NoError(t, readMarkerErr) + + assert.YAMLEq(t, updatemarkerwatching456, string(markerFileBytes), "update marker should be untouched") + }, + }, + { + name: "update marker ok but rollback is expired - error", + setup: func(t *testing.T, topDir string, agent *info.MockAgent, watcherHelper *MockWatcherHelper, rollbacksSource *mockAvailableRollbacksSource) { + err := os.WriteFile(markerFilePath(paths.DataFrom(topDir)), []byte(updatemarkerwatching456), 0600) + require.NoError(t, err, "error setting up update marker") + locker := filelock.NewAppLocker(topDir, "watcher.lock") + err = locker.TryLock() + require.NoError(t, err, "error locking initial watcher AppLocker") + watcherHelper.EXPECT().TakeOverWatcher(t.Context(), mock.Anything, topDir).Return(locker, nil) + newerWatcherExecutable := filepath.Join(topDir, "data", "elastic-agent-4.5.6-newver", "elastic-agent") + watcherHelper.EXPECT().SelectWatcherExecutable(topDir, agentInstall123, agentInstall456).Return(newerWatcherExecutable) + watcherHelper.EXPECT().InvokeWatcher(mock.Anything, newerWatcherExecutable).Return(&exec.Cmd{Path: newerWatcherExecutable, Args: []string{"watch", "for realsies"}}, nil) + }, + artifactSettings: artifact.DefaultConfig(), + upgradeSettings: configuration.DefaultUpgradeConfig(), + now: nowAfterTTL, + version: "1.2.3", + wantErr: func(t assert.TestingT, err error, i ...interface{}) bool { + return assert.ErrorIs(t, err, ErrNoRollbacksAvailable) + }, + additionalAsserts: func(t *testing.T, topDir string) { + // marker should be untouched + filePath := markerFilePath(paths.DataFrom(topDir)) + require.FileExists(t, filePath) + markerFileBytes, readMarkerErr := os.ReadFile(filePath) + require.NoError(t, readMarkerErr) + + assert.YAMLEq(t, updatemarkerwatching456, string(markerFileBytes), "update marker should be untouched") + }, + }, + { + name: "update marker ok, rollback valid, invoking watcher fails - error", + setup: func(t *testing.T, topDir string, agent *info.MockAgent, watcherHelper *MockWatcherHelper, rollbacksSource *mockAvailableRollbacksSource) { + err := os.WriteFile(markerFilePath(paths.DataFrom(topDir)), []byte(updatemarkerwatching456), 0600) + require.NoError(t, err, "error setting up update marker") + locker := filelock.NewAppLocker(topDir, "watcher.lock") + err = locker.TryLock() + require.NoError(t, err, "error locking initial watcher AppLocker") + watcherHelper.EXPECT().TakeOverWatcher(t.Context(), mock.Anything, topDir).Return(locker, nil) + newerWatcherExecutable := filepath.Join(topDir, "data", "elastic-agent-4.5.6-newver", "elastic-agent") + watcherHelper.EXPECT().SelectWatcherExecutable(topDir, agentInstall123, agentInstall456).Return(newerWatcherExecutable) + // invoking watcher rollback fails + watcherHelper.EXPECT().InvokeWatcher(mock.Anything, newerWatcherExecutable, "--rollback", agentInstall123.versionedHome).Return(nil, errors.New("error invoking watcher")) + // Expect watch to be resumed + watcherHelper.EXPECT().InvokeWatcher(mock.Anything, newerWatcherExecutable).Return(&exec.Cmd{Path: newerWatcherExecutable, Args: []string{"watch", "for realsies"}}, nil) + }, + artifactSettings: artifact.DefaultConfig(), + upgradeSettings: configuration.DefaultUpgradeConfig(), + now: nowBeforeTTL, + version: "1.2.3", + wantErr: assert.Error, + additionalAsserts: func(t *testing.T, topDir string) { + // marker should be untouched + filePath := markerFilePath(paths.DataFrom(topDir)) + require.FileExists(t, filePath) + markerFileBytes, readMarkerErr := os.ReadFile(filePath) + require.NoError(t, readMarkerErr) + + assert.YAMLEq(t, updatemarkerwatching456, string(markerFileBytes), "update marker should be untouched") + }, + }, + { + name: "update marker ok - takeover watcher, persist rollback and restart most recent watcher", + setup: func(t *testing.T, topDir string, agent *info.MockAgent, watcherHelper *MockWatcherHelper, rollbacksSource *mockAvailableRollbacksSource) { + err := os.WriteFile(markerFilePath(paths.DataFrom(topDir)), []byte(updatemarkerwatching456), 0600) + require.NoError(t, err, "error setting up update marker") + locker := filelock.NewAppLocker(topDir, "watcher.lock") + err = locker.TryLock() + require.NoError(t, err, "error locking initial watcher AppLocker") + watcherHelper.EXPECT().TakeOverWatcher(t.Context(), mock.Anything, topDir).Return(locker, nil) + newerWatcherExecutable := filepath.Join(topDir, "data", "elastic-agent-4.5.6-newver", "elastic-agent") + watcherHelper.EXPECT().SelectWatcherExecutable(topDir, agentInstall123, agentInstall456).Return(newerWatcherExecutable) + watcherHelper.EXPECT(). + InvokeWatcher(mock.Anything, newerWatcherExecutable, "--rollback", "data/elastic-agent-1.2.3-oldver"). + Return(&exec.Cmd{Path: newerWatcherExecutable, Args: []string{"watch", "for rollbacksies"}, Process: &os.Process{Pid: 123}}, nil) + }, + artifactSettings: artifact.DefaultConfig(), + upgradeSettings: configuration.DefaultUpgradeConfig(), + now: nowBeforeTTL, + version: "1.2.3", + wantErr: assert.NoError, + additionalAsserts: func(t *testing.T, topDir string) { + marker, loadMarkerErr := LoadMarker(paths.DataFrom(topDir)) + require.NoError(t, loadMarkerErr, "error loading marker") + require.NotNil(t, marker, "marker is nil") + + require.NotNil(t, marker.Details) + assert.NotEmpty(t, marker.RollbacksAvailable) + }, + }, + { + name: "no update marker, available install for rollback with valid TTL - rollback", + setup: func(t *testing.T, topDir string, agent *info.MockAgent, watcherHelper *MockWatcherHelper, rollbacksSource *mockAvailableRollbacksSource) { + rollbacksSource.EXPECT().Get().Return(map[string]TTLMarker{ + "data/elastic-agent-1.2.3-oldver": { + Version: "1.2.3", + Hash: "oldver", + ValidUntil: aMomentTomorrow, + }, + }, nil) + newerWatcherExecutable := filepath.Join(topDir, "data", fmt.Sprintf("elastic-agent-%s-%s", release.VersionWithSnapshot(), release.ShortCommit()), "elastic-agent") + watcherHelper.EXPECT().SelectWatcherExecutable(topDir, agentInstall123, agentInstallCurrent).Return(newerWatcherExecutable) + watcherHelper.EXPECT().InvokeWatcher(mock.Anything, newerWatcherExecutable, "--rollback", "data/elastic-agent-1.2.3-oldver"). + Return(&exec.Cmd{Path: newerWatcherExecutable, Args: []string{"watch", "for rollbacksies"}, Process: &os.Process{Pid: 123}}, nil) + }, + artifactSettings: artifact.DefaultConfig(), + upgradeSettings: &configuration.UpgradeConfig{ + Rollback: &configuration.UpgradeRollbackConfig{ + Window: 24 * time.Hour, + }, + }, + now: aMomentInTime, + version: "1.2.3", + wantErr: assert.NoError, + additionalAsserts: func(t *testing.T, topDir string) { + actualMarkerFilePath := filepath.Join(topDir, "data", markerFilename) + require.FileExists(t, actualMarkerFilePath, "marker file must have been created") + actualMarkerFileBytes, errReadMarkerFile := os.ReadFile(actualMarkerFilePath) + require.NoError(t, errReadMarkerFile, "marker file should be readable") + + expectedUpdateMarker := &UpdateMarker{ + Version: release.VersionWithSnapshot(), + Hash: release.Commit(), + VersionedHome: filepath.Join("data", fmt.Sprintf("elastic-agent-%s", release.ShortCommit())), + UpdatedOn: aMomentInTime, + PrevVersion: "1.2.3", + PrevHash: "oldver", + PrevVersionedHome: "data/elastic-agent-1.2.3-oldver", + Details: &details.Details{ + TargetVersion: release.VersionWithSnapshot(), + State: details.StateRequested, + }, + RollbacksAvailable: nil, + } + + expectedMarkerBytes, err := yaml.Marshal(newMarkerSerializer(expectedUpdateMarker)) + require.NoError(t, err, "error marshalling expected update marker") + require.YAMLEq(t, string(expectedMarkerBytes), string(actualMarkerFileBytes)) + }, + }, + { + name: "no update marker, available install for rollback with expired TTL - error", + setup: func(t *testing.T, topDir string, agent *info.MockAgent, watcherHelper *MockWatcherHelper, rollbacksSource *mockAvailableRollbacksSource) { + rollbacksSource.EXPECT().Get().Return( + map[string]TTLMarker{ + "data/elastic-agent-1.2.3-oldver": { + Version: "1.2.3", + Hash: "oldver", + ValidUntil: aMomentAgo, + }, + }, + nil) + }, + artifactSettings: artifact.DefaultConfig(), + upgradeSettings: &configuration.UpgradeConfig{ + Rollback: &configuration.UpgradeRollbackConfig{ + Window: 24 * time.Hour, + }, + }, + now: aMomentInTime, + version: "1.2.3", + wantErr: func(t assert.TestingT, err error, i ...interface{}) bool { + return assert.ErrorIs(t, err, ErrNoRollbacksAvailable, i...) + }, + additionalAsserts: func(t *testing.T, topDir string) { + actualMarkerFilePath := filepath.Join(topDir, "data", markerFilename) + require.NoFileExists(t, actualMarkerFilePath, "marker file must not be created") + + }, + }, + { + name: "no update marker, no available install for the version - error", + setup: func(t *testing.T, topDir string, agent *info.MockAgent, watcherHelper *MockWatcherHelper, rollbacksSource *mockAvailableRollbacksSource) { + rollbacksSource.EXPECT().Get().Return( + map[string]TTLMarker{ + "data/elastic-agent-1.2.3-oldver": { + Version: "1.2.3", + Hash: "oldver", + ValidUntil: aMomentTomorrow, + }, + }, + nil) + }, + artifactSettings: artifact.DefaultConfig(), + upgradeSettings: &configuration.UpgradeConfig{ + Rollback: &configuration.UpgradeRollbackConfig{ + Window: 24 * time.Hour, + }, + }, + now: aMomentInTime, + version: "6.6.6", + wantErr: func(t assert.TestingT, err error, i ...interface{}) bool { + return assert.ErrorIs(t, err, ErrNoRollbacksAvailable, i...) + }, + additionalAsserts: func(t *testing.T, topDir string) { + actualMarkerFilePath := filepath.Join(topDir, "data", markerFilename) + require.NoFileExists(t, actualMarkerFilePath, "marker file must not be created") + + }, + }, + { + name: "no update marker, error retrieving agent installs", + setup: func(t *testing.T, topDir string, agent *info.MockAgent, watcherHelper *MockWatcherHelper, rollbacksSource *mockAvailableRollbacksSource) { + rollbacksSource.EXPECT().Get().Return(nil, errors.New("error retrieving agent rollbacks")) + }, + artifactSettings: artifact.DefaultConfig(), + upgradeSettings: &configuration.UpgradeConfig{ + Rollback: &configuration.UpgradeRollbackConfig{ + Window: 24 * time.Hour, + }, + }, + now: aMomentInTime, + version: "1.2.3", + wantErr: assert.Error, + }, + { + name: "no update marker, invoking watcher fails - error", + setup: func(t *testing.T, topDir string, agent *info.MockAgent, watcherHelper *MockWatcherHelper, rollbacksSource *mockAvailableRollbacksSource) { + rollbacksSource.EXPECT().Get().Return( + map[string]TTLMarker{ + "data/elastic-agent-1.2.3-oldver": { + Version: "1.2.3", + Hash: "oldver", + ValidUntil: aMomentTomorrow, + }, + }, + nil, + ) + newerWatcherExecutable := filepath.Join(topDir, "data", fmt.Sprintf("elastic-agent-%s-%s", release.VersionWithSnapshot(), release.ShortCommit()), "elastic-agent") + watcherHelper.EXPECT().SelectWatcherExecutable(topDir, agentInstall123, agentInstallCurrent).Return(newerWatcherExecutable) + watcherHelper.EXPECT().InvokeWatcher(mock.Anything, newerWatcherExecutable, "--rollback", "data/elastic-agent-1.2.3-oldver"). + Return(nil, errors.New("error invoking watcher")) + }, + artifactSettings: artifact.DefaultConfig(), + upgradeSettings: &configuration.UpgradeConfig{ + Rollback: &configuration.UpgradeRollbackConfig{ + Window: 24 * time.Hour, + }, + }, + now: aMomentInTime, + version: "1.2.3", + wantErr: assert.Error, + additionalAsserts: func(t *testing.T, topDir string) { + actualMarkerFilePath := filepath.Join(topDir, "data", markerFilename) + require.NoFileExists(t, actualMarkerFilePath, "marker file must have been created") + }, + }, + } + + for _, tc := range testcases { + t.Run(tc.name, func(t *testing.T) { + log, _ := loggertest.New(t.Name()) + mockAgentInfo := info.NewMockAgent(t) + mockWatcherHelper := NewMockWatcherHelper(t) + mockRollbacksSource := newMockAvailableRollbacksSource(t) + topDir := t.TempDir() + err := os.MkdirAll(paths.DataFrom(topDir), 0777) + require.NoError(t, err, "error creating data directory in topDir %q", topDir) + + if tc.setup != nil { + tc.setup(t, topDir, mockAgentInfo, mockWatcherHelper, mockRollbacksSource) + } + + upgrader, err := NewUpgrader(log, tc.artifactSettings, tc.upgradeSettings, mockAgentInfo, mockWatcherHelper, mockRollbacksSource) + require.NoError(t, err, "error instantiating upgrader") + _, err = upgrader.rollbackToPreviousVersion(t.Context(), topDir, tc.now, tc.version, nil) + tc.wantErr(t, err, "unexpected error returned by rollbackToPreviousVersion()") + if tc.additionalAsserts != nil { + tc.additionalAsserts(t, topDir) + } + }) + } +} diff --git a/internal/pkg/agent/application/upgrade/rollback.go b/internal/pkg/agent/application/upgrade/rollback.go index 6d5e81598fd..96f97fed1cc 100644 --- a/internal/pkg/agent/application/upgrade/rollback.go +++ b/internal/pkg/agent/application/upgrade/rollback.go @@ -11,6 +11,7 @@ import ( "os" "os/exec" "path/filepath" + "slices" "strings" "time" @@ -77,17 +78,17 @@ func RollbackWithOpts(ctx context.Context, log *logger.Logger, c client.Client, settings := NewRollbackSettings(opts...) - symlinkPath := filepath.Join(topDirPath, agentName) + symlinkPath := filepath.Join(topDirPath, AgentName) - var symlinkTarget string - if prevVersionedHome != "" { - symlinkTarget = paths.BinaryPath(filepath.Join(topDirPath, prevVersionedHome), agentName) - } else { + if prevVersionedHome == "" { // fallback for upgrades that didn't use the manifest and path remapping - hashedDir := fmt.Sprintf("%s-%s", agentName, prevHash) - // paths.BinaryPath properly derives the binary directory depending on the platform. The path to the binary for macOS is inside of the app bundle. - symlinkTarget = paths.BinaryPath(filepath.Join(paths.DataFrom(topDirPath), hashedDir), agentName) + hashedDir := fmt.Sprintf("%s-%s", AgentName, prevHash) + prevVersionedHome = filepath.Join("data", hashedDir) } + + // paths.BinaryPath properly derives the binary directory depending on the platform. The path to the binary for macOS is inside of the app bundle. + symlinkTarget := paths.BinaryPath(filepath.Join(topDirPath, prevVersionedHome), AgentName) + // change symlink if err := changeSymlink(log, topDirPath, symlinkPath, symlinkTarget); err != nil { return err @@ -127,16 +128,16 @@ func RollbackWithOpts(ctx context.Context, log *logger.Logger, c client.Client, } // cleanup everything except version we're rolling back into - return Cleanup(log, topDirPath, prevVersionedHome, prevHash, settings.RemoveMarker, true) + return Cleanup(log, topDirPath, settings.RemoveMarker, true, prevVersionedHome) } // Cleanup removes all artifacts and files related to a specified version. -func Cleanup(log *logger.Logger, topDirPath, currentVersionedHome, currentHash string, removeMarker, keepLogs bool) error { - return cleanup(log, topDirPath, currentVersionedHome, currentHash, removeMarker, keepLogs, afterRestartDelay) +func Cleanup(log *logger.Logger, topDirPath string, removeMarker, keepLogs bool, versionedHomesToKeep ...string) error { + return cleanup(log, topDirPath, removeMarker, keepLogs, afterRestartDelay, versionedHomesToKeep...) } -func cleanup(log *logger.Logger, topDirPath, currentVersionedHome, currentHash string, removeMarker, keepLogs bool, delay time.Duration) error { - log.Infow("Cleaning up upgrade", "hash", currentHash, "remove_marker", removeMarker) +func cleanup(log *logger.Logger, topDirPath string, removeMarker, keepLogs bool, delay time.Duration, versionedHomesToKeep ...string) error { + log.Infow("Cleaning up upgrade", "remove_marker", removeMarker) <-time.After(delay) // data directory path @@ -171,20 +172,20 @@ func cleanup(log *logger.Logger, topDirPath, currentVersionedHome, currentHash s log.Infow("Removing previous symlink path", "file.path", prevSymlinkPath(topDirPath)) _ = os.Remove(prevSymlink) - dirPrefix := fmt.Sprintf("%s-", agentName) - var currentDir string - if currentVersionedHome != "" { - currentDir, err = filepath.Rel("data", currentVersionedHome) + dirPrefix := fmt.Sprintf("%s-", AgentName) + + relativeHomePaths := make([]string, len(versionedHomesToKeep)) + for i, h := range versionedHomesToKeep { + relHomePath, err := filepath.Rel("data", h) if err != nil { - return fmt.Errorf("extracting elastic-agent path relative to data directory from %s: %w", currentVersionedHome, err) + return fmt.Errorf("extracting elastic-agent path relative to data directory from %s: %w", h, err) } - } else { - currentDir = fmt.Sprintf("%s-%s", agentName, currentHash) + relativeHomePaths[i] = relHomePath } var errs []error for _, dir := range subdirs { - if dir == currentDir { + if slices.Contains(relativeHomePaths, dir) { continue } diff --git a/internal/pkg/agent/application/upgrade/rollback_test.go b/internal/pkg/agent/application/upgrade/rollback_test.go index eab955d663a..4a977905354 100644 --- a/internal/pkg/agent/application/upgrade/rollback_test.go +++ b/internal/pkg/agent/application/upgrade/rollback_test.go @@ -219,7 +219,11 @@ func TestCleanup(t *testing.T) { require.NoError(t, err, "error loading update marker") require.NotNil(t, marker, "loaded marker must not be nil") t.Logf("Loaded update marker %+v", marker) - tt.wantErr(t, cleanup(testLogger, testTop, marker.VersionedHome, marker.Hash, tt.args.removeMarker, tt.args.keepLogs, 0), fmt.Sprintf("Cleanup(%v, %v, %v, %v)", marker.VersionedHome, marker.Hash, tt.args.removeMarker, tt.args.keepLogs)) + versionedHome := marker.VersionedHome + if versionedHome == "" { + versionedHome = filepath.Join("data", fmt.Sprintf("elastic-agent-%s", marker.Hash[:6])) + } + tt.wantErr(t, cleanup(testLogger, testTop, tt.args.removeMarker, tt.args.keepLogs, 0, versionedHome), fmt.Sprintf("Cleanup(%v, %v, %v, %v)", marker.Hash, tt.args.removeMarker, tt.args.keepLogs, versionedHome)) tt.checkAfterCleanup(t, testTop) }) } @@ -335,7 +339,7 @@ func TestRollback(t *testing.T) { func TestRollbackWithOpts(t *testing.T) { type hookFuncWithLogs func(t *testing.T, logs *observer.ObservedLogs, topDir string) - agentExecutableName := agentName + agentExecutableName := AgentName if runtime.GOOS == "windows" { agentExecutableName += ".exe" } @@ -591,7 +595,7 @@ func checkFilesAfterCleanup(t *testing.T, topDir, newAgentHome string, oldAgentH // check the new agent home assert.DirExists(t, filepath.Join(topDir, newAgentHome), "new agent directory should exist after cleanup") - agentExecutable := agentName + agentExecutable := AgentName if runtime.GOOS == "windows" { agentExecutable += ".exe" } @@ -620,11 +624,11 @@ func checkFilesAfterRollback(t *testing.T, topDir, oldAgentHome, newAgentHome st // some things should have been removed from the new agent directory assert.NoDirExists(t, filepath.Join(topDir, newAgentHome, "components"), "new agent components directory should have been cleaned up in the rollback") assert.NoDirExists(t, filepath.Join(topDir, newAgentHome, "run"), "new agent run directory should have been cleaned up in the rollback") - assert.NoFileExists(t, filepath.Join(topDir, newAgentHome, agentName), "new agent binary should have been cleaned up in the rollback") + assert.NoFileExists(t, filepath.Join(topDir, newAgentHome, AgentName), "new agent binary should have been cleaned up in the rollback") // check the old agent home assert.DirExists(t, filepath.Join(topDir, oldAgentHome), "old agent directory should exist after rollback") - agentExecutable := agentName + agentExecutable := AgentName if runtime.GOOS == "windows" { agentExecutable += ".exe" } @@ -689,10 +693,10 @@ func setupAgents(t *testing.T, log *logger.Logger, topDir string, installations func createFakeAgentInstall(t *testing.T, topDir, version, hash string, useVersionInPath bool) string { // create versioned home - versionedHome := fmt.Sprintf("elastic-agent-%s", hash[:hashLen]) + versionedHome := fmt.Sprintf("elastic-agent-%s", hash[:HashLen]) if useVersionInPath { // use the version passed as parameter - versionedHome = fmt.Sprintf("elastic-agent-%s-%s", version, hash[:hashLen]) + versionedHome = fmt.Sprintf("elastic-agent-%s-%s", version, hash[:HashLen]) } relVersionedHomePath := filepath.Join("data", versionedHome) absVersionedHomePath := filepath.Join(topDir, relVersionedHomePath) @@ -717,7 +721,7 @@ func createFakeAgentInstall(t *testing.T, topDir, version, hash string, useVersi require.NoError(t, err, "error creating fake install run directory %q", absRunDirPath) // put some placeholder for files - agentExecutableName := agentName + agentExecutableName := AgentName if runtime.GOOS == "windows" { agentExecutableName += ".exe" } @@ -732,8 +736,8 @@ func createFakeAgentInstall(t *testing.T, topDir, version, hash string, useVersi } func createLink(t *testing.T, topDir string, currentAgentVersionedHome string) { - linkTarget := paths.BinaryPath(currentAgentVersionedHome, agentName) - linkName := agentName + linkTarget := paths.BinaryPath(currentAgentVersionedHome, AgentName) + linkName := AgentName if runtime.GOOS == "windows" { linkTarget += ".exe" linkName += ".exe" diff --git a/internal/pkg/agent/application/upgrade/step_mark.go b/internal/pkg/agent/application/upgrade/step_mark.go index 7f93ff43a27..11e7b208430 100644 --- a/internal/pkg/agent/application/upgrade/step_mark.go +++ b/internal/pkg/agent/application/upgrade/step_mark.go @@ -21,11 +21,11 @@ import ( ) const markerFilename = ".update-marker" -const disableRollbackWindow = time.Duration(0) // TTLMarker marks an elastic-agent install available for rollback type TTLMarker struct { Version string `json:"version" yaml:"version"` + Hash string `json:"hash" yaml:"hash"` ValidUntil time.Time `json:"valid_until" yaml:"valid_until"` } @@ -143,8 +143,8 @@ type updateActiveCommitFunc func(log *logger.Logger, topDirPath, hash string, wr func markUpgradeProvider(updateActiveCommit updateActiveCommitFunc, writeFile writeFileFunc) markUpgradeFunc { return func(log *logger.Logger, dataDirPath string, updatedOn time.Time, agent, previousAgent agentInstall, action *fleetapi.ActionUpgrade, upgradeDetails *details.Details, availableRollbacks map[string]TTLMarker) error { - if len(previousAgent.hash) > hashLen { - previousAgent.hash = previousAgent.hash[:hashLen] + if len(previousAgent.hash) > HashLen { + previousAgent.hash = previousAgent.hash[:HashLen] } marker := &UpdateMarker{ diff --git a/internal/pkg/agent/application/upgrade/step_relink.go b/internal/pkg/agent/application/upgrade/step_relink.go index d6fc9a6b9c1..82b1a597889 100644 --- a/internal/pkg/agent/application/upgrade/step_relink.go +++ b/internal/pkg/agent/application/upgrade/step_relink.go @@ -44,11 +44,11 @@ func changeSymlink(log *logger.Logger, topDirPath, symlinkPath, newTarget string } func prevSymlinkPath(topDirPath string) string { - agentPrevName := agentName + ".prev" + agentPrevName := AgentName + ".prev" // handle windows suffixes if runtime.GOOS == windowsOSName { - agentPrevName = agentName + ".exe.prev" + agentPrevName = AgentName + ".exe.prev" } return filepath.Join(topDirPath, agentPrevName) diff --git a/internal/pkg/agent/application/upgrade/step_unpack.go b/internal/pkg/agent/application/upgrade/step_unpack.go index ae0f964ee65..e39ea77ebe5 100644 --- a/internal/pkg/agent/application/upgrade/step_unpack.go +++ b/internal/pkg/agent/application/upgrade/step_unpack.go @@ -123,7 +123,7 @@ func unzip(log *logger.Logger, archivePath, dataDir string, flavor string, copy return UnpackResult{}, fmt.Errorf("retrieving package metadata from %q: %w", archivePath, err) } - hash = metadata.hash[:hashLen] + hash = metadata.hash[:HashLen] var registry map[string][]string if metadata.manifest != nil { pm.mappings = metadata.manifest.Package.PathMappings @@ -368,7 +368,7 @@ func untar(log *logger.Logger, archivePath, dataDir string, flavor string, copy return UnpackResult{}, fmt.Errorf("retrieving package metadata from %q: %w", archivePath, err) } - hash = metadata.hash[:hashLen] + hash = metadata.hash[:HashLen] var registry map[string][]string if metadata.manifest != nil { @@ -641,8 +641,8 @@ func readCommitHash(reader io.Reader) (string, error) { return "", fmt.Errorf("reading agent commit hash file: %w", err) } hash := strings.TrimSpace(string(commitBytes)) - if len(hash) < hashLen { - return "", fmt.Errorf("hash %q is shorter than minimum length %d", string(commitBytes), hashLen) + if len(hash) < HashLen { + return "", fmt.Errorf("hash %q is shorter than minimum length %d", string(commitBytes), HashLen) } return hash, nil } @@ -767,5 +767,5 @@ func getFilesContentFromTar(archivePath string, files ...string) (map[string]io. // createVersionedHomeFromHash returns a versioned home path relative to topPath in the legacy format `elastic-agent-` // formatted using OS-dependent path separators func createVersionedHomeFromHash(hash string) string { - return filepath.Join("data", fmt.Sprintf("elastic-agent-%s", hash[:hashLen])) + return filepath.Join("data", fmt.Sprintf("elastic-agent-%s", hash[:HashLen])) } diff --git a/internal/pkg/agent/application/upgrade/step_unpack_test.go b/internal/pkg/agent/application/upgrade/step_unpack_test.go index 49a0bc1accb..6b2df7d5f4a 100644 --- a/internal/pkg/agent/application/upgrade/step_unpack_test.go +++ b/internal/pkg/agent/application/upgrade/step_unpack_test.go @@ -121,7 +121,7 @@ var archiveFilesWithMoreComponents = []files{ {fType: REGULAR, path: "elastic-agent-1.2.3-SNAPSHOT-someos-x86_64/" + agentCommitFile, content: "abcdefghijklmnopqrstuvwxyz", mode: fs.ModePerm & 0o640}, {fType: DIRECTORY, path: "elastic-agent-1.2.3-SNAPSHOT-someos-x86_64/data", mode: fs.ModeDir | (fs.ModePerm & 0o750)}, {fType: DIRECTORY, path: "elastic-agent-1.2.3-SNAPSHOT-someos-x86_64/data/elastic-agent-abcdef", mode: fs.ModeDir | (fs.ModePerm & 0o750)}, - {fType: REGULAR, path: "elastic-agent-1.2.3-SNAPSHOT-someos-x86_64/data/elastic-agent-abcdef/" + agentName, content: agentBinaryPlaceholderContent, mode: fs.ModePerm & 0o750}, + {fType: REGULAR, path: "elastic-agent-1.2.3-SNAPSHOT-someos-x86_64/data/elastic-agent-abcdef/" + AgentName, content: agentBinaryPlaceholderContent, mode: fs.ModePerm & 0o750}, {fType: REGULAR, path: "elastic-agent-1.2.3-SNAPSHOT-someos-x86_64/data/elastic-agent-abcdef/package.version", content: "1.2.3", mode: fs.ModePerm & 0o640}, {fType: DIRECTORY, path: "elastic-agent-1.2.3-SNAPSHOT-someos-x86_64/data/elastic-agent-abcdef/components", mode: fs.ModeDir | (fs.ModePerm & 0o750)}, {fType: REGULAR, path: "elastic-agent-1.2.3-SNAPSHOT-someos-x86_64/data/elastic-agent-abcdef/components/comp1", binary: true, content: "Placeholder for component", mode: fs.ModePerm & 0o750}, @@ -142,7 +142,7 @@ var archiveFilesWithManifestNoSymlink = []files{ {fType: REGULAR, path: "elastic-agent-1.2.3-SNAPSHOT-someos-x86_64/" + agentCommitFile, content: "abcdefghijklmnopqrstuvwxyz", mode: fs.ModePerm & 0o640}, {fType: DIRECTORY, path: "elastic-agent-1.2.3-SNAPSHOT-someos-x86_64/data", mode: fs.ModeDir | (fs.ModePerm & 0o750)}, {fType: DIRECTORY, path: "elastic-agent-1.2.3-SNAPSHOT-someos-x86_64/data/elastic-agent-abcdef", mode: fs.ModeDir | (fs.ModePerm & 0o750)}, - {fType: REGULAR, path: "elastic-agent-1.2.3-SNAPSHOT-someos-x86_64/data/elastic-agent-abcdef/" + agentName, content: agentBinaryPlaceholderContent, mode: fs.ModePerm & 0o750}, + {fType: REGULAR, path: "elastic-agent-1.2.3-SNAPSHOT-someos-x86_64/data/elastic-agent-abcdef/" + AgentName, content: agentBinaryPlaceholderContent, mode: fs.ModePerm & 0o750}, {fType: REGULAR, path: "elastic-agent-1.2.3-SNAPSHOT-someos-x86_64/data/elastic-agent-abcdef/package.version", content: "1.2.3", mode: fs.ModePerm & 0o640}, {fType: DIRECTORY, path: "elastic-agent-1.2.3-SNAPSHOT-someos-x86_64/data/elastic-agent-abcdef/components", mode: fs.ModeDir | (fs.ModePerm & 0o750)}, {fType: REGULAR, path: "elastic-agent-1.2.3-SNAPSHOT-someos-x86_64/data/elastic-agent-abcdef/components/comp1", content: "Placeholder for component", mode: fs.ModePerm & 0o750}, @@ -153,7 +153,7 @@ var outOfOrderArchiveFilesNoManifestNoSymlink = []files{ {fType: DIRECTORY, path: "elastic-agent-1.2.3-SNAPSHOT-someos-x86_64", mode: fs.ModeDir | (fs.ModePerm & 0o750)}, {fType: REGULAR, path: "elastic-agent-1.2.3-SNAPSHOT-someos-x86_64/" + agentCommitFile, content: "abcdefghijklmnopqrstuvwxyz", mode: fs.ModePerm & 0o640}, {fType: REGULAR, path: "elastic-agent-1.2.3-SNAPSHOT-someos-x86_64/data/elastic-agent-abcdef/package.version", content: "1.2.3", mode: fs.ModePerm & 0o640}, - {fType: REGULAR, path: "elastic-agent-1.2.3-SNAPSHOT-someos-x86_64/data/elastic-agent-abcdef/" + agentName, content: agentBinaryPlaceholderContent, mode: fs.ModePerm & 0o750}, + {fType: REGULAR, path: "elastic-agent-1.2.3-SNAPSHOT-someos-x86_64/data/elastic-agent-abcdef/" + AgentName, content: agentBinaryPlaceholderContent, mode: fs.ModePerm & 0o750}, {fType: DIRECTORY, path: "elastic-agent-1.2.3-SNAPSHOT-someos-x86_64/data", mode: fs.ModeDir | (fs.ModePerm & 0o750)}, {fType: DIRECTORY, path: "elastic-agent-1.2.3-SNAPSHOT-someos-x86_64/data/elastic-agent-abcdef", mode: fs.ModeDir | (fs.ModePerm & 0o700)}, {fType: DIRECTORY, path: "elastic-agent-1.2.3-SNAPSHOT-someos-x86_64/data/elastic-agent-abcdef/components", mode: fs.ModeDir | (fs.ModePerm & 0o750)}, @@ -161,7 +161,7 @@ var outOfOrderArchiveFilesNoManifestNoSymlink = []files{ {fType: REGULAR, path: "elastic-agent-1.2.3-SNAPSHOT-someos-x86_64/data/elastic-agent-abcdef/components/comp1.spec.yml", content: foo_component_spec, mode: fs.ModePerm & 0o640}, } -var agentArchiveSymLink = files{fType: SYMLINK, path: "elastic-agent-1.2.3-SNAPSHOT-someos-x86_64/" + agentName, content: "data/elastic-agent-abcdef/" + agentName, mode: fs.ModeSymlink | (fs.ModePerm & 0o750)} +var agentArchiveSymLink = files{fType: SYMLINK, path: "elastic-agent-1.2.3-SNAPSHOT-someos-x86_64/" + AgentName, content: "data/elastic-agent-abcdef/" + AgentName, mode: fs.ModeSymlink | (fs.ModePerm & 0o750)} type fileType uint @@ -603,7 +603,7 @@ func checkExtractedFilesOutOfOrder(t *testing.T, versionedHome string) { } actualPermissions := fs.ModePerm & stat.Mode() assert.Equalf(t, expectedPermissions, actualPermissions, "Wrong permissions set on versioned home %q: expected %O, got %O", versionedHome, expectedPermissions, actualPermissions) - agentExecutable := filepath.Join(versionedHome, agentName) + agentExecutable := filepath.Join(versionedHome, AgentName) if assert.FileExistsf(t, agentExecutable, "agent executable %q is not found in versioned home directory %q", agentExecutable, versionedHome) { fileBytes, err := os.ReadFile(agentExecutable) if assert.NoErrorf(t, err, "error reading elastic-agent executable %q", agentExecutable) { @@ -615,7 +615,7 @@ func checkExtractedFilesOutOfOrder(t *testing.T, versionedHome string) { func checkExtractedFilesWithManifest(t *testing.T, testDataDir string) { versionedHome := filepath.Join(testDataDir, "elastic-agent-1.2.3-SNAPSHOT-abcdef") require.DirExists(t, versionedHome, "mapped versioned home directory does not exists") - mappedAgentExecutable := filepath.Join(versionedHome, agentName) + mappedAgentExecutable := filepath.Join(versionedHome, AgentName) if assert.FileExistsf(t, mappedAgentExecutable, "agent executable %q is not found in mapped versioned home directory %q", mappedAgentExecutable, versionedHome) { fileBytes, err := os.ReadFile(mappedAgentExecutable) if assert.NoErrorf(t, err, "error reading elastic-agent executable %q", mappedAgentExecutable) { diff --git a/internal/pkg/agent/application/upgrade/ttl_marker_source_test.go b/internal/pkg/agent/application/upgrade/ttl_marker_source_test.go index 3ac95e1769d..dd79961102c 100644 --- a/internal/pkg/agent/application/upgrade/ttl_marker_source_test.go +++ b/internal/pkg/agent/application/upgrade/ttl_marker_source_test.go @@ -140,6 +140,7 @@ func TestTTLMarkerRegistry_Get(t *testing.T) { func TestTTLMarkerRegistry_Set(t *testing.T) { const TTLMarkerYAMLTemplate = ` version: {{ .Version }} + hash: {{ .Hash }} valid_until: {{ .ValidUntil }}` expectedMarkerContentTemplate, err := template.New("expected marker").Parse(TTLMarkerYAMLTemplate) @@ -156,6 +157,7 @@ func TestTTLMarkerRegistry_Set(t *testing.T) { versions := []string{"1.2.3", "4.5.6"} versionedHomes := []string{"elastic-agent-1.2.3-past", "elastic-agent-4.5.6-present"} + hashes := []string{"past", "present"} ttls := []string{tomorrowString, ""} type args struct { @@ -180,6 +182,7 @@ func TestTTLMarkerRegistry_Set(t *testing.T) { map[string]TTLMarker{ filepath.Join("data", versionedHomes[0]): { Version: versions[0], + Hash: hashes[0], ValidUntil: tomorrow, }, }, @@ -192,7 +195,7 @@ func TestTTLMarkerRegistry_Set(t *testing.T) { if assert.FileExists(t, expectedTTLMarkerFilePath, "new TTL marker should have been created") { b := new(strings.Builder) - err = expectedMarkerContentTemplate.Execute(b, map[string]string{"Version": versions[0], "ValidUntil": ttls[0]}) + err = expectedMarkerContentTemplate.Execute(b, map[string]string{"Version": versions[0], "ValidUntil": ttls[0], "Hash": hashes[0]}) require.NoError(t, err) actualMarkerContent, err := os.ReadFile(expectedTTLMarkerFilePath) require.NoError(t, err) @@ -207,7 +210,7 @@ func TestTTLMarkerRegistry_Set(t *testing.T) { err = os.MkdirAll(filepath.Join(tmpDir, "data", versionedHome), 0755) require.NoError(t, err, "error setting up fake agent install directory") b := new(strings.Builder) - err = expectedMarkerContentTemplate.Execute(b, map[string]string{"Version": versions[i], "ValidUntil": ttls[i]}) + err = expectedMarkerContentTemplate.Execute(b, map[string]string{"Version": versions[i], "ValidUntil": ttls[i], "Hash": hashes[i]}) require.NoError(t, err, "error setting up ttl marker") err = os.WriteFile(filepath.Join(tmpDir, "data", versionedHomes[i], ttlMarkerName), []byte(b.String()), 0644) } diff --git a/internal/pkg/agent/application/upgrade/upgrade.go b/internal/pkg/agent/application/upgrade/upgrade.go index 359fdf25fa5..d2157eb8301 100644 --- a/internal/pkg/agent/application/upgrade/upgrade.go +++ b/internal/pkg/agent/application/upgrade/upgrade.go @@ -43,8 +43,8 @@ import ( ) const ( - agentName = "elastic-agent" - hashLen = 6 + AgentName = "elastic-agent" + HashLen = 6 agentCommitFile = ".elastic-agent.active.commit" runDirMod = 0770 snapshotSuffix = "-SNAPSHOT" @@ -54,8 +54,8 @@ const ( var agentArtifact = artifact.Artifact{ Name: "Elastic Agent", - Cmd: agentName, - Artifact: "beats/" + agentName, + Cmd: AgentName, + Artifact: "beats/" + AgentName, } var ( @@ -428,10 +428,10 @@ func (u *Upgrader) Upgrade(ctx context.Context, version string, rollback bool, s // create symlink to the /elastic-agent hashedDir := unpackRes.VersionedHome - symlinkPath := filepath.Join(paths.Top(), agentName) + symlinkPath := filepath.Join(paths.Top(), AgentName) // paths.BinaryPath properly derives the binary directory depending on the platform. The path to the binary for macOS is inside of the app bundle. - newPath := paths.BinaryPath(filepath.Join(paths.Top(), hashedDir), agentName) + newPath := paths.BinaryPath(filepath.Join(paths.Top(), hashedDir), AgentName) currentVersionedHome, err := filepath.Rel(paths.Top(), paths.Home()) if err != nil { @@ -468,7 +468,7 @@ func (u *Upgrader) Upgrade(ctx context.Context, version string, rollback bool, s versionedHome: currentVersionedHome, } - availableRollbacks := getAvailableRollbacks(rollbackWindow, time.Now(), release.VersionWithSnapshot(), previousParsedVersion, currentVersionedHome) + availableRollbacks := getAvailableRollbacks(rollbackWindow, time.Now(), release.VersionWithSnapshot(), previousParsedVersion, currentVersionedHome, release.Commit()) if err = u.availableRollbacksSource.Set(availableRollbacks); err != nil { u.log.Errorw("Rolling back: setting ttl markers failed", "error.message", err) @@ -515,145 +515,6 @@ func (u *Upgrader) Upgrade(ctx context.Context, version string, rollback bool, s return cb, nil } -func getAvailableRollbacks(rollbackWindow time.Duration, now time.Time, currentVersion string, parsedCurrentVersion *agtversion.ParsedSemVer, currentVersionedHome string) map[string]TTLMarker { - if rollbackWindow == 0 { - // if there's no rollback window it means that no rollback should survive the watcher cleanup at the end of the grace period. - return nil - } - - if parsedCurrentVersion == nil || parsedCurrentVersion.Less(*Version_9_3_0_SNAPSHOT) { - // the version we are upgrading to does not support manual rollbacks - return nil - } - - // when multiple rollbacks will be supported, read the existing descriptor - // at this stage we can get by with a single rollback - res := make(map[string]TTLMarker, 1) - res[currentVersionedHome] = TTLMarker{ - Version: currentVersion, - ValidUntil: now.Add(rollbackWindow), - } - - return res -} - -func (u *Upgrader) rollbackToPreviousVersion(ctx context.Context, topDir string, now time.Time, version string, action *fleetapi.ActionUpgrade) (reexec.ShutdownCallbackFn, error) { - if version == "" { - return nil, ErrEmptyRollbackVersion - } - - // check that the upgrade marker exists and is accessible - updateMarkerPath := markerFilePath(paths.DataFrom(topDir)) - _, err := os.Stat(updateMarkerPath) - if err != nil { - return nil, fmt.Errorf("stat() on upgrade marker %q failed: %w", updateMarkerPath, err) - } - - // read the upgrade marker - updateMarker, err := LoadMarker(paths.DataFrom(topDir)) - if err != nil { - return nil, fmt.Errorf("loading marker: %w", err) - } - - if updateMarker == nil { - return nil, ErrNilUpdateMarker - } - - // extract the agent installs involved in the upgrade and select the most appropriate watcher executable - previous, current, err := extractAgentInstallsFromMarker(updateMarker) - if err != nil { - return nil, fmt.Errorf("extracting current and previous install details: %w", err) - } - watcherExecutable := u.watcherHelper.SelectWatcherExecutable(topDir, previous, current) - - err = withTakeOverWatcher(ctx, u.log, topDir, u.watcherHelper, func() error { - // read the upgrade marker - updateMarker, err = LoadMarker(paths.DataFrom(topDir)) - if err != nil { - return fmt.Errorf("loading marker: %w", err) - } - - if updateMarker == nil { - return ErrNilUpdateMarker - } - - if len(updateMarker.RollbacksAvailable) == 0 { - return ErrNoRollbacksAvailable - } - var selectedRollback *TTLMarker - for _, rollback := range updateMarker.RollbacksAvailable { - if rollback.Version == version && now.Before(rollback.ValidUntil) { - selectedRollback = &rollback - break - } - } - if selectedRollback == nil { - return fmt.Errorf("version %q not listed among the available rollbacks: %w", version, ErrNoRollbacksAvailable) - } - - // rollback - _, err = u.watcherHelper.InvokeWatcher(u.log, watcherExecutable, "watch", "--rollback", updateMarker.PrevVersionedHome) - if err != nil { - return fmt.Errorf("starting rollback command: %w", err) - } - u.log.Debug("rollback command started successfully, PID") - return nil - }) - - if err != nil { - // Invoke watcher again (now that we released the watcher applocks) - _, invokeWatcherErr := u.watcherHelper.InvokeWatcher(u.log, watcherExecutable) - if invokeWatcherErr != nil { - return nil, goerrors.Join(err, fmt.Errorf("invoking watcher: %w", invokeWatcherErr)) - } - return nil, err - } - - return nil, nil - -} - -func withTakeOverWatcher(ctx context.Context, log *logger.Logger, topDir string, watcherHelper WatcherHelper, f func() error) error { - watcherApplock, err := watcherHelper.TakeOverWatcher(ctx, log, topDir) - if err != nil { - return fmt.Errorf("taking over watcher processes: %w", err) - } - defer func(watcherApplock *filelock.AppLocker) { - releaseWatcherAppLockerErr := watcherApplock.Unlock() - if releaseWatcherAppLockerErr != nil { - log.Warnw("error releasing watcher applock", "error", releaseWatcherAppLockerErr) - } - }(watcherApplock) - - return f() -} - -func extractAgentInstallsFromMarker(updateMarker *UpdateMarker) (previous agentInstall, current agentInstall, err error) { - previousParsedVersion, err := agtversion.ParseVersion(updateMarker.PrevVersion) - if err != nil { - return previous, current, fmt.Errorf("parsing previous version %q: %w", updateMarker.PrevVersion, err) - } - previous = agentInstall{ - parsedVersion: previousParsedVersion, - version: updateMarker.PrevVersion, - hash: updateMarker.PrevHash, - versionedHome: updateMarker.PrevVersionedHome, - } - - currentParsedVersion, err := agtversion.ParseVersion(updateMarker.Version) - if err != nil { - return previous, current, fmt.Errorf("parsing current version %q: %w", updateMarker.Version, err) - } - current = agentInstall{ - parsedVersion: currentParsedVersion, - version: updateMarker.Version, - hash: updateMarker.Hash, - versionedHome: updateMarker.VersionedHome, - } - - return previous, current, nil -} - // Ack acks last upgrade action func (u *Upgrader) Ack(ctx context.Context, acker acker.Acker) error { // get upgrade action @@ -732,8 +593,8 @@ func isSameVersion(log *logger.Logger, current agentVersion, newVersion agentVer } func rollbackInstall(ctx context.Context, log *logger.Logger, topDirPath, versionedHome, oldVersionedHome string, rollbackSource availableRollbacksSource) error { - oldAgentPath := paths.BinaryPath(filepath.Join(topDirPath, oldVersionedHome), agentName) - err := changeSymlink(log, topDirPath, filepath.Join(topDirPath, agentName), oldAgentPath) + oldAgentPath := paths.BinaryPath(filepath.Join(topDirPath, oldVersionedHome), AgentName) + err := changeSymlink(log, topDirPath, filepath.Join(topDirPath, AgentName), oldAgentPath) if err != nil && !errors.Is(err, fs.ErrNotExist) { return fmt.Errorf("rolling back install: restoring symlink to %q failed: %w", oldAgentPath, err) } diff --git a/internal/pkg/agent/application/upgrade/upgrade_test.go b/internal/pkg/agent/application/upgrade/upgrade_test.go index a019fa0e2b5..dc91be46bc2 100644 --- a/internal/pkg/agent/application/upgrade/upgrade_test.go +++ b/internal/pkg/agent/application/upgrade/upgrade_test.go @@ -13,7 +13,6 @@ import ( "net/http" "net/url" "os" - "os/exec" "path/filepath" "runtime" "testing" @@ -27,13 +26,11 @@ import ( "github.com/elastic/elastic-agent-libs/transport/httpcommon" "github.com/elastic/elastic-agent-libs/transport/tlscommon" - "github.com/elastic/elastic-agent/internal/pkg/agent/application/filelock" "github.com/elastic/elastic-agent/internal/pkg/agent/application/info" "github.com/elastic/elastic-agent/internal/pkg/agent/application/paths" "github.com/elastic/elastic-agent/internal/pkg/agent/application/upgrade/artifact" upgradeErrors "github.com/elastic/elastic-agent/internal/pkg/agent/application/upgrade/artifact/download/errors" "github.com/elastic/elastic-agent/internal/pkg/agent/application/upgrade/details" - "github.com/elastic/elastic-agent/internal/pkg/agent/configuration" "github.com/elastic/elastic-agent/internal/pkg/agent/errors" "github.com/elastic/elastic-agent/internal/pkg/config" "github.com/elastic/elastic-agent/internal/pkg/fleetapi" @@ -139,8 +136,8 @@ func TestShutdownCallback(t *testing.T) { testcases := []testcase{ { name: "legacy run directories", - agentHomeDirectory: fmt.Sprintf("%s-%s", agentName, release.ShortCommit()), - newAgentHomeDirectory: fmt.Sprintf("%s-%s", agentName, "abc123"), + agentHomeDirectory: fmt.Sprintf("%s-%s", AgentName, release.ShortCommit()), + newAgentHomeDirectory: fmt.Sprintf("%s-%s", AgentName, "abc123"), agentVersion: "7.14.0", newAgentVersion: "7.15.0", oldRunFile: filepath.Join("run", "default", "process-7.14.0", "file.test"), @@ -1042,275 +1039,6 @@ func TestIsSameReleaseVersion(t *testing.T) { } } -func TestManualRollback(t *testing.T) { - const updatemarkerwatching456NoRollbackAvailable = ` - version: 4.5.6 - hash: newver - versioned_home: data/elastic-agent-4.5.6-newver - updated_on: 2025-07-11T10:11:12.131415Z - prev_version: 1.2.3 - prev_hash: oldver - prev_versioned_home: data/elastic-agent-1.2.3-oldver - acked: false - action: null - details: - target_version: 4.5.6 - state: UPG_WATCHING - metadata: - retry_until: null - ` - const updatemarkerwatching456 = ` - version: 4.5.6 - hash: newver - versioned_home: data/elastic-agent-4.5.6-newver - updated_on: 2025-07-11T10:11:12.131415Z - prev_version: 1.2.3 - prev_hash: oldver - prev_versioned_home: data/elastic-agent-1.2.3-oldver - acked: false - action: null - details: - target_version: 4.5.6 - state: UPG_WATCHING - metadata: - retry_until: null - desired_outcome: UPGRADE - rollbacks_available: - "data/elastic-agent-1.2.3-oldver": - version: 1.2.3 - valid_until: 2025-07-18T10:11:12.131415Z - ` - - parsed123Version, err := agtversion.ParseVersion("1.2.3") - require.NoError(t, err) - parsed456Version, err := agtversion.ParseVersion("4.5.6") - require.NoError(t, err) - - agentInstall123 := agentInstall{ - parsedVersion: parsed123Version, - version: "1.2.3", - hash: "oldver", - versionedHome: "data/elastic-agent-1.2.3-oldver", - } - - agentInstall456 := agentInstall{ - parsedVersion: parsed456Version, - version: "4.5.6", - hash: "newver", - versionedHome: "data/elastic-agent-4.5.6-newver", - } - - // this is the updated_on timestamp in the example - nowBeforeTTL, err := time.Parse(time.RFC3339, `2025-07-11T10:11:12Z`) - require.NoError(t, err, "error parsing nowBeforeTTL") - - // the update marker yaml assume 7d TLL for rollbacks, let's make an extra day pass - nowAfterTTL := nowBeforeTTL.Add(8 * 24 * time.Hour) - - type setupF func(t *testing.T, topDir string, agent *info.MockAgent, watcherHelper *MockWatcherHelper) - type postRollbackAssertionsF func(t *testing.T, topDir string) - type testcase struct { - name string - setup setupF - artifactSettings *artifact.Config - upgradeSettings *configuration.UpgradeConfig - now time.Time - version string - wantErr assert.ErrorAssertionFunc - additionalAsserts postRollbackAssertionsF - } - - testcases := []testcase{ - { - name: "no rollback version - rollback fails", - setup: func(t *testing.T, topDir string, agent *info.MockAgent, watcherHelper *MockWatcherHelper) { - //do not setup anything here, let the rollback fail - }, - artifactSettings: artifact.DefaultConfig(), - upgradeSettings: configuration.DefaultUpgradeConfig(), - version: "", - wantErr: func(t assert.TestingT, err error, i ...interface{}) bool { - return assert.ErrorIs(t, err, ErrEmptyRollbackVersion) - }, - additionalAsserts: nil, - }, - { - name: "no update marker - rollback fails", - setup: func(t *testing.T, topDir string, agent *info.MockAgent, watcherHelper *MockWatcherHelper) { - //do not setup anything here, let the rollback fail - }, - artifactSettings: artifact.DefaultConfig(), - upgradeSettings: configuration.DefaultUpgradeConfig(), - version: "1.2.3", - wantErr: func(t assert.TestingT, err error, i ...interface{}) bool { - return assert.ErrorIs(t, err, fs.ErrNotExist) - }, - additionalAsserts: nil, - }, - { - name: "update marker is malformed - rollback fails", - setup: func(t *testing.T, topDir string, agent *info.MockAgent, watcherHelper *MockWatcherHelper) { - err := os.WriteFile(markerFilePath(paths.DataFrom(topDir)), []byte("this is not a proper YAML file"), 0600) - require.NoError(t, err, "error setting up update marker") - locker := filelock.NewAppLocker(topDir, "watcher.lock") - err = locker.TryLock() - require.NoError(t, err, "error locking initial watcher AppLocker") - // there's no takeover watcher so no expectation on that or InvokeWatcher - t.Cleanup(func() { - unlockErr := locker.Unlock() - assert.NoError(t, unlockErr, "error unlocking initial watcher AppLocker") - }) - }, - artifactSettings: artifact.DefaultConfig(), - upgradeSettings: configuration.DefaultUpgradeConfig(), - version: "1.2.3", - wantErr: assert.Error, - additionalAsserts: nil, - }, - { - name: "update marker ok but rollback available is empty - error", - setup: func(t *testing.T, topDir string, agent *info.MockAgent, watcherHelper *MockWatcherHelper) { - err := os.WriteFile(markerFilePath(paths.DataFrom(topDir)), []byte(updatemarkerwatching456NoRollbackAvailable), 0600) - require.NoError(t, err, "error setting up update marker") - locker := filelock.NewAppLocker(topDir, "watcher.lock") - err = locker.TryLock() - require.NoError(t, err, "error locking initial watcher AppLocker") - watcherHelper.EXPECT().TakeOverWatcher(t.Context(), mock.Anything, topDir).Return(locker, nil) - newerWatcherExecutable := filepath.Join(topDir, "data", "elastic-agent-4.5.6-newver", "elastic-agent") - watcherHelper.EXPECT().SelectWatcherExecutable(topDir, agentInstall123, agentInstall456).Return(newerWatcherExecutable) - watcherHelper.EXPECT().InvokeWatcher(mock.Anything, newerWatcherExecutable).Return(&exec.Cmd{Path: newerWatcherExecutable, Args: []string{"watch", "for realsies"}}, nil) - }, - artifactSettings: artifact.DefaultConfig(), - upgradeSettings: configuration.DefaultUpgradeConfig(), - version: "2.3.4-unknown", - wantErr: func(t assert.TestingT, err error, i ...interface{}) bool { - return assert.ErrorIs(t, err, ErrNoRollbacksAvailable) - }, - additionalAsserts: func(t *testing.T, topDir string) { - // marker should be untouched - filePath := markerFilePath(paths.DataFrom(topDir)) - require.FileExists(t, filePath) - markerFileBytes, readMarkerErr := os.ReadFile(filePath) - require.NoError(t, readMarkerErr) - - assert.YAMLEq(t, updatemarkerwatching456NoRollbackAvailable, string(markerFileBytes), "update marker should be untouched") - }, - }, - { - name: "update marker ok but version is not available for rollback - error", - setup: func(t *testing.T, topDir string, agent *info.MockAgent, watcherHelper *MockWatcherHelper) { - err := os.WriteFile(markerFilePath(paths.DataFrom(topDir)), []byte(updatemarkerwatching456), 0600) - require.NoError(t, err, "error setting up update marker") - locker := filelock.NewAppLocker(topDir, "watcher.lock") - err = locker.TryLock() - require.NoError(t, err, "error locking initial watcher AppLocker") - watcherHelper.EXPECT().TakeOverWatcher(t.Context(), mock.Anything, topDir).Return(locker, nil) - newerWatcherExecutable := filepath.Join(topDir, "data", "elastic-agent-4.5.6-newver", "elastic-agent") - watcherHelper.EXPECT().SelectWatcherExecutable(topDir, agentInstall123, agentInstall456).Return(newerWatcherExecutable) - watcherHelper.EXPECT().InvokeWatcher(mock.Anything, newerWatcherExecutable).Return(&exec.Cmd{Path: newerWatcherExecutable, Args: []string{"watch", "for realsies"}}, nil) - }, - artifactSettings: artifact.DefaultConfig(), - upgradeSettings: configuration.DefaultUpgradeConfig(), - version: "2.3.4-unknown", - wantErr: func(t assert.TestingT, err error, i ...interface{}) bool { - return assert.ErrorIs(t, err, ErrNoRollbacksAvailable) - }, - additionalAsserts: func(t *testing.T, topDir string) { - // marker should be untouched - filePath := markerFilePath(paths.DataFrom(topDir)) - require.FileExists(t, filePath) - markerFileBytes, readMarkerErr := os.ReadFile(filePath) - require.NoError(t, readMarkerErr) - - assert.YAMLEq(t, updatemarkerwatching456, string(markerFileBytes), "update marker should be untouched") - }, - }, - { - name: "update marker ok but rollback is expired - error", - setup: func(t *testing.T, topDir string, agent *info.MockAgent, watcherHelper *MockWatcherHelper) { - err := os.WriteFile(markerFilePath(paths.DataFrom(topDir)), []byte(updatemarkerwatching456), 0600) - require.NoError(t, err, "error setting up update marker") - locker := filelock.NewAppLocker(topDir, "watcher.lock") - err = locker.TryLock() - require.NoError(t, err, "error locking initial watcher AppLocker") - watcherHelper.EXPECT().TakeOverWatcher(t.Context(), mock.Anything, topDir).Return(locker, nil) - newerWatcherExecutable := filepath.Join(topDir, "data", "elastic-agent-4.5.6-newver", "elastic-agent") - watcherHelper.EXPECT().SelectWatcherExecutable(topDir, agentInstall123, agentInstall456).Return(newerWatcherExecutable) - watcherHelper.EXPECT().InvokeWatcher(mock.Anything, newerWatcherExecutable).Return(&exec.Cmd{Path: newerWatcherExecutable, Args: []string{"watch", "for realsies"}}, nil) - }, - artifactSettings: artifact.DefaultConfig(), - upgradeSettings: configuration.DefaultUpgradeConfig(), - now: nowAfterTTL, - version: "1.2.3", - wantErr: func(t assert.TestingT, err error, i ...interface{}) bool { - return assert.ErrorIs(t, err, ErrNoRollbacksAvailable) - }, - additionalAsserts: func(t *testing.T, topDir string) { - // marker should be untouched - filePath := markerFilePath(paths.DataFrom(topDir)) - require.FileExists(t, filePath) - markerFileBytes, readMarkerErr := os.ReadFile(filePath) - require.NoError(t, readMarkerErr) - - assert.YAMLEq(t, updatemarkerwatching456, string(markerFileBytes), "update marker should be untouched") - }, - }, - { - name: "update marker ok - takeover watcher, persist rollback and restart most recent watcher", - setup: func(t *testing.T, topDir string, agent *info.MockAgent, watcherHelper *MockWatcherHelper) { - err := os.WriteFile(markerFilePath(paths.DataFrom(topDir)), []byte(updatemarkerwatching456), 0600) - require.NoError(t, err, "error setting up update marker") - locker := filelock.NewAppLocker(topDir, "watcher.lock") - err = locker.TryLock() - require.NoError(t, err, "error locking initial watcher AppLocker") - watcherHelper.EXPECT().TakeOverWatcher(t.Context(), mock.Anything, topDir).Return(locker, nil) - newerWatcherExecutable := filepath.Join(topDir, "data", "elastic-agent-4.5.6-newver", "elastic-agent") - watcherHelper.EXPECT().SelectWatcherExecutable(topDir, agentInstall123, agentInstall456).Return(newerWatcherExecutable) - watcherHelper.EXPECT(). - InvokeWatcher(mock.Anything, newerWatcherExecutable, watcherSubcommand, "--rollback", "data/elastic-agent-1.2.3-oldver"). - Return(&exec.Cmd{Path: newerWatcherExecutable, Args: []string{"watch", "for rollbacksies"}}, nil) - }, - artifactSettings: artifact.DefaultConfig(), - upgradeSettings: configuration.DefaultUpgradeConfig(), - now: nowBeforeTTL, - version: "1.2.3", - wantErr: assert.NoError, - additionalAsserts: func(t *testing.T, topDir string) { - marker, loadMarkerErr := LoadMarker(paths.DataFrom(topDir)) - require.NoError(t, loadMarkerErr, "error loading marker") - require.NotNil(t, marker, "marker is nil") - - require.NotNil(t, marker.Details) - assert.NotEmpty(t, marker.RollbacksAvailable) - }, - }, - } - - for _, tc := range testcases { - t.Run(tc.name, func(t *testing.T) { - log, _ := loggertest.New(t.Name()) - mockAgentInfo := info.NewMockAgent(t) - mockWatcherHelper := NewMockWatcherHelper(t) - mockRollbacksSource := newMockAvailableRollbacksSource(t) - topDir := t.TempDir() - err := os.MkdirAll(paths.DataFrom(topDir), 0777) - require.NoError(t, err, "error creating data directory in topDir %q", topDir) - - if tc.setup != nil { - tc.setup(t, topDir, mockAgentInfo, mockWatcherHelper) - } - - upgrader, err := NewUpgrader(log, tc.artifactSettings, tc.upgradeSettings, mockAgentInfo, mockWatcherHelper, mockRollbacksSource) - require.NoError(t, err, "error instantiating upgrader") - _, err = upgrader.rollbackToPreviousVersion(t.Context(), topDir, tc.now, tc.version, nil) - tc.wantErr(t, err, "unexpected error returned by rollbackToPreviousVersion()") - if tc.additionalAsserts != nil { - tc.additionalAsserts(t, topDir) - } - }) - } -} - type mockArtifactDownloader struct { returnError error returnArchivePath string diff --git a/internal/pkg/agent/application/upgrade/watcher.go b/internal/pkg/agent/application/upgrade/watcher.go index e4163483b56..dac69262d62 100644 --- a/internal/pkg/agent/application/upgrade/watcher.go +++ b/internal/pkg/agent/application/upgrade/watcher.go @@ -358,10 +358,10 @@ func selectWatcherExecutable(topDir string, previous agentInstall, current agent // check if the upgraded version is less than the previous (currently installed) version if current.parsedVersion.Less(*previous.parsedVersion) { // use the current agent executable for watch, if downgrading the old agent doesn't understand the current agent's path structure. - return paths.BinaryPath(filepath.Join(topDir, previous.versionedHome), agentName) + return paths.BinaryPath(filepath.Join(topDir, previous.versionedHome), AgentName) } else { // use the new agent executable as it should be able to parse the new update marker - return paths.BinaryPath(filepath.Join(topDir, current.versionedHome), agentName) + return paths.BinaryPath(filepath.Join(topDir, current.versionedHome), AgentName) } } diff --git a/internal/pkg/agent/application/upgrade/watcher_test.go b/internal/pkg/agent/application/upgrade/watcher_test.go index 12a7c744c61..0b03957bf28 100644 --- a/internal/pkg/agent/application/upgrade/watcher_test.go +++ b/internal/pkg/agent/application/upgrade/watcher_test.go @@ -706,7 +706,7 @@ func Test_selectWatcherExecutable(t *testing.T) { fakeTopDir := filepath.Join(t.TempDir(), "Elastic", "Agent") for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - assert.Equalf(t, paths.BinaryPath(filepath.Join(fakeTopDir, tt.want), agentName), selectWatcherExecutable(fakeTopDir, tt.args.previous, tt.args.current), "selectWatcherExecutable(%v, %v)", tt.args.previous, tt.args.current) + assert.Equalf(t, paths.BinaryPath(filepath.Join(fakeTopDir, tt.want), AgentName), selectWatcherExecutable(fakeTopDir, tt.args.previous, tt.args.current), "selectWatcherExecutable(%v, %v)", tt.args.previous, tt.args.current) }) } } diff --git a/internal/pkg/agent/cmd/mocks.go b/internal/pkg/agent/cmd/mocks.go index 360210cbc90..8191f6c6293 100644 --- a/internal/pkg/agent/cmd/mocks.go +++ b/internal/pkg/agent/cmd/mocks.go @@ -144,16 +144,24 @@ func (_m *mockInstallationModifier) EXPECT() *mockInstallationModifier_Expecter } // Cleanup provides a mock function for the type mockInstallationModifier -func (_mock *mockInstallationModifier) Cleanup(log *logger.Logger, topDirPath string, currentVersionedHome string, currentHash string, removeMarker bool, keepLogs bool) error { - ret := _mock.Called(log, topDirPath, currentVersionedHome, currentHash, removeMarker, keepLogs) +func (_mock *mockInstallationModifier) Cleanup(log *logger.Logger, topDirPath string, removeMarker bool, keepLogs bool, versionedHomesToKeep ...string) error { + // string + _va := make([]interface{}, len(versionedHomesToKeep)) + for _i := range versionedHomesToKeep { + _va[_i] = versionedHomesToKeep[_i] + } + var _ca []interface{} + _ca = append(_ca, log, topDirPath, removeMarker, keepLogs) + _ca = append(_ca, _va...) + ret := _mock.Called(_ca...) if len(ret) == 0 { panic("no return value specified for Cleanup") } var r0 error - if returnFunc, ok := ret.Get(0).(func(*logger.Logger, string, string, string, bool, bool) error); ok { - r0 = returnFunc(log, topDirPath, currentVersionedHome, currentHash, removeMarker, keepLogs) + if returnFunc, ok := ret.Get(0).(func(*logger.Logger, string, bool, bool, ...string) error); ok { + r0 = returnFunc(log, topDirPath, removeMarker, keepLogs, versionedHomesToKeep...) } else { r0 = ret.Error(0) } @@ -168,15 +176,15 @@ type mockInstallationModifier_Cleanup_Call struct { // Cleanup is a helper method to define mock.On call // - log *logger.Logger // - topDirPath string -// - currentVersionedHome string -// - currentHash string // - removeMarker bool // - keepLogs bool -func (_e *mockInstallationModifier_Expecter) Cleanup(log interface{}, topDirPath interface{}, currentVersionedHome interface{}, currentHash interface{}, removeMarker interface{}, keepLogs interface{}) *mockInstallationModifier_Cleanup_Call { - return &mockInstallationModifier_Cleanup_Call{Call: _e.mock.On("Cleanup", log, topDirPath, currentVersionedHome, currentHash, removeMarker, keepLogs)} +// - versionedHomesToKeep ...string +func (_e *mockInstallationModifier_Expecter) Cleanup(log interface{}, topDirPath interface{}, removeMarker interface{}, keepLogs interface{}, versionedHomesToKeep ...interface{}) *mockInstallationModifier_Cleanup_Call { + return &mockInstallationModifier_Cleanup_Call{Call: _e.mock.On("Cleanup", + append([]interface{}{log, topDirPath, removeMarker, keepLogs}, versionedHomesToKeep...)...)} } -func (_c *mockInstallationModifier_Cleanup_Call) Run(run func(log *logger.Logger, topDirPath string, currentVersionedHome string, currentHash string, removeMarker bool, keepLogs bool)) *mockInstallationModifier_Cleanup_Call { +func (_c *mockInstallationModifier_Cleanup_Call) Run(run func(log *logger.Logger, topDirPath string, removeMarker bool, keepLogs bool, versionedHomesToKeep ...string)) *mockInstallationModifier_Cleanup_Call { _c.Call.Run(func(args mock.Arguments) { var arg0 *logger.Logger if args[0] != nil { @@ -186,29 +194,28 @@ func (_c *mockInstallationModifier_Cleanup_Call) Run(run func(log *logger.Logger if args[1] != nil { arg1 = args[1].(string) } - var arg2 string + var arg2 bool if args[2] != nil { - arg2 = args[2].(string) + arg2 = args[2].(bool) } - var arg3 string + var arg3 bool if args[3] != nil { - arg3 = args[3].(string) + arg3 = args[3].(bool) } - var arg4 bool - if args[4] != nil { - arg4 = args[4].(bool) - } - var arg5 bool - if args[5] != nil { - arg5 = args[5].(bool) + var arg4 []string + variadicArgs := make([]string, len(args)-4) + for i, a := range args[4:] { + if a != nil { + variadicArgs[i] = a.(string) + } } + arg4 = variadicArgs run( arg0, arg1, arg2, arg3, - arg4, - arg5, + arg4..., ) }) return _c @@ -219,7 +226,7 @@ func (_c *mockInstallationModifier_Cleanup_Call) Return(err error) *mockInstalla return _c } -func (_c *mockInstallationModifier_Cleanup_Call) RunAndReturn(run func(log *logger.Logger, topDirPath string, currentVersionedHome string, currentHash string, removeMarker bool, keepLogs bool) error) *mockInstallationModifier_Cleanup_Call { +func (_c *mockInstallationModifier_Cleanup_Call) RunAndReturn(run func(log *logger.Logger, topDirPath string, removeMarker bool, keepLogs bool, versionedHomesToKeep ...string) error) *mockInstallationModifier_Cleanup_Call { _c.Call.Return(run) return _c } diff --git a/internal/pkg/agent/cmd/watch.go b/internal/pkg/agent/cmd/watch.go index 11ac556ea0a..2308a7630d5 100644 --- a/internal/pkg/agent/cmd/watch.go +++ b/internal/pkg/agent/cmd/watch.go @@ -8,6 +8,7 @@ import ( "context" "fmt" "os" + "path/filepath" "runtime" "time" @@ -28,7 +29,6 @@ import ( "github.com/elastic/elastic-agent/internal/pkg/agent/errors" "github.com/elastic/elastic-agent/internal/pkg/cli" "github.com/elastic/elastic-agent/internal/pkg/config" - "github.com/elastic/elastic-agent/internal/pkg/release" "github.com/elastic/elastic-agent/pkg/core/logger" "github.com/elastic/elastic-agent/version" ) @@ -149,7 +149,7 @@ func WithRemoveMarker(removeMarker bool) upgrade.RollbackOption { } type installationModifier interface { - Cleanup(log *logger.Logger, topDirPath, currentVersionedHome, currentHash string, removeMarker, keepLogs bool) error + Cleanup(log *logger.Logger, topDirPath string, removeMarker, keepLogs bool, versionedHomesToKeep ...string) error Rollback(ctx context.Context, log *logger.Logger, c client.Client, topDirPath, prevVersionedHome, prevHash string, opts ...upgrade.RollbackOption) error } @@ -198,7 +198,12 @@ func watchCmd(log *logp.Logger, topDir string, cfg *configuration.UpgradeWatcher // if we're not within grace and marker is still there it might mean // that cleanup was not performed ok, cleanup everything except current version // hash is the same as hash of agent which initiated watcher. - if err := installModifier.Cleanup(log, paths.Top(), paths.VersionedHome(topDir), release.ShortCommit(), true, false); err != nil { + versionedHomesToKeep := make([]string, 0, len(marker.RollbacksAvailable)+1) + // current version needs to be kept + versionedHomesToKeep = append(versionedHomesToKeep, paths.VersionedHome(topDir)) + versionedHomesToKeep = appendAvailableRollbacks(log, marker, versionedHomesToKeep) + log.Infof("About to clean up upgrade. Keeping versioned homes: %v", versionedHomesToKeep) + if err := installModifier.Cleanup(log, paths.Top(), true, false, versionedHomesToKeep...); err != nil { log.Error("clean up of prior watcher run failed", err) } // exit nicely @@ -253,13 +258,31 @@ func watchCmd(log *logp.Logger, topDir string, cfg *configuration.UpgradeWatcher // Why is this being skipped on Windows? The comment above is not clear. // issue: https://github.com/elastic/elastic-agent/issues/3027 removeMarker := !isWindows() - err = installModifier.Cleanup(log, topDir, marker.VersionedHome, marker.Hash, removeMarker, false) + newVersionedHome := marker.VersionedHome + if newVersionedHome == "" { + // the upgrade marker may have been created by an older version of agent where the versionedHome is always `data/elastic-agent-` + newVersionedHome = filepath.Join("data", fmt.Sprintf("elastic-agent-%s", marker.Hash[:6])) + } + versionedHomesToKeep := make([]string, 0, len(marker.RollbacksAvailable)+1) + versionedHomesToKeep = append(versionedHomesToKeep, newVersionedHome) + versionedHomesToKeep = appendAvailableRollbacks(log, marker, versionedHomesToKeep) + + err = installModifier.Cleanup(log, topDir, removeMarker, false, versionedHomesToKeep...) if err != nil { log.Error("cleanup after successful watch failed", err) } return err } +func appendAvailableRollbacks(log *logp.Logger, marker *upgrade.UpdateMarker, versionedHomesToKeep []string) []string { + // add any available rollbacks + for versionedHome, ra := range marker.RollbacksAvailable { + log.Debugf("Adding available rollback %s:%+v to the directories to keep during cleanup", versionedHome, ra) + versionedHomesToKeep = append(versionedHomesToKeep, versionedHome) + } + return versionedHomesToKeep +} + func rollback(log *logp.Logger, topDir string, client client.Client, installModifier installationModifier, versionedHome string) error { // TODO: there should be some sanity check in rollback functions like the installation we are going back to should exist and work log.Infof("rolling back to %s", versionedHome) diff --git a/internal/pkg/agent/cmd/watch_impl.go b/internal/pkg/agent/cmd/watch_impl.go index ebb9c04fcfb..3dfee99ed17 100644 --- a/internal/pkg/agent/cmd/watch_impl.go +++ b/internal/pkg/agent/cmd/watch_impl.go @@ -25,8 +25,8 @@ func (a upgradeAgentWatcher) Watch(ctx context.Context, tilGrace, errorCheckInte type upgradeInstallationModifier struct{} -func (a upgradeInstallationModifier) Cleanup(log *logger.Logger, topDirPath, currentVersionedHome, currentHash string, removeMarker, keepLogs bool) error { - return upgrade.Cleanup(log, topDirPath, currentVersionedHome, currentHash, removeMarker, keepLogs) +func (a upgradeInstallationModifier) Cleanup(log *logger.Logger, topDirPath string, removeMarker, keepLogs bool, versionedHomesToKeep ...string) error { + return upgrade.Cleanup(log, topDirPath, removeMarker, keepLogs, versionedHomesToKeep...) } func (a upgradeInstallationModifier) Rollback(ctx context.Context, log *logger.Logger, c client.Client, topDirPath, prevVersionedHome, prevHash string, opts ...upgrade.RollbackOption) error { diff --git a/internal/pkg/agent/cmd/watch_test.go b/internal/pkg/agent/cmd/watch_test.go index b04ce04b00d..0ebce30753f 100644 --- a/internal/pkg/agent/cmd/watch_test.go +++ b/internal/pkg/agent/cmd/watch_test.go @@ -26,7 +26,6 @@ import ( "github.com/elastic/elastic-agent/internal/pkg/agent/configuration" "github.com/elastic/elastic-agent/internal/pkg/agent/errors" "github.com/elastic/elastic-agent/internal/pkg/fleetapi" - "github.com/elastic/elastic-agent/internal/pkg/release" "github.com/elastic/elastic-agent/pkg/control/v2/client" "github.com/elastic/elastic-agent/pkg/core/logger" "github.com/elastic/elastic-agent/pkg/core/logger/loggertest" @@ -144,7 +143,7 @@ func Test_watchCmd(t *testing.T) { expectedRemoveMarkerFlag := runtime.GOOS != "windows" installModifier.EXPECT(). - Cleanup(mock.Anything, topDir, "elastic-agent-4.5.6-newver", "newver", expectedRemoveMarkerFlag, false). + Cleanup(mock.Anything, topDir, expectedRemoveMarkerFlag, false, "elastic-agent-4.5.6-newver"). Return(nil) }, args: args{ @@ -272,7 +271,7 @@ func Test_watchCmd(t *testing.T) { // topdir, prevVersionedHome and prevHash are not taken from the upgrade marker, otherwise they would be // installModifier.EXPECT(). - Cleanup(mock.Anything, paths.Top(), paths.VersionedHome(topDir), release.ShortCommit(), true, false). + Cleanup(mock.Anything, paths.Top(), true, false, paths.VersionedHome(topDir)). Return(nil) }, args: args{ @@ -308,7 +307,7 @@ func Test_watchCmd(t *testing.T) { // topdir, prevVersionedHome and prevHash are not taken from the upgrade marker, otherwise they would be // installModifier.EXPECT(). - Cleanup(mock.Anything, paths.Top(), paths.VersionedHome(topDir), release.ShortCommit(), true, false). + Cleanup(mock.Anything, paths.Top(), true, false, paths.VersionedHome(topDir)). Return(nil) }, args: args{ @@ -352,7 +351,7 @@ func Test_watchCmd(t *testing.T) { require.NoError(t, err) installModifier.EXPECT(). - Cleanup(mock.Anything, paths.Top(), paths.VersionedHome(tmpDir), release.ShortCommit(), true, false). + Cleanup(mock.Anything, paths.Top(), true, false, paths.VersionedHome(tmpDir)). Return(nil) }, args: args{ diff --git a/testing/integration/ess/upgrade_rollback_test.go b/testing/integration/ess/upgrade_rollback_test.go index 71e94ff2cc2..9d75123cb69 100644 --- a/testing/integration/ess/upgrade_rollback_test.go +++ b/testing/integration/ess/upgrade_rollback_test.go @@ -44,7 +44,7 @@ agent.upgrade.watcher: const fastWatcherCfgWithRollbackWindow = ` agent.upgrade: watcher: - grace_period: 2m + grace_period: 1m error_check.interval: 5s rollback: window: 10m @@ -339,6 +339,8 @@ func TestFleetManagedUpgradeRollbackOnRestarts(t *testing.T) { } } +type rollbackTriggerFunc func(ctx context.Context, t *testing.T, client client.Client, startFixture, endFixture *atesting.Fixture) + // TestStandaloneUpgradeManualRollback tests the scenario where, after upgrading to a new version // of Agent, a manual rollback is triggered. It checks that the Agent is rolled back to the previous version. func TestStandaloneUpgradeManualRollback(t *testing.T) { @@ -349,12 +351,16 @@ func TestStandaloneUpgradeManualRollback(t *testing.T) { }) type fixturesSetupFunc func(t *testing.T) (from *atesting.Fixture, to *atesting.Fixture) + testcases := []struct { - name string - fixturesSetup fixturesSetupFunc + name string + fixturesSetup fixturesSetupFunc + agentConfig string + rollbackTrigger rollbackTriggerFunc }{ { - name: "upgrade to a repackaged agent built from the same commit", + name: "upgrade to a repackaged agent built from the same commit, rollback during grace period", + agentConfig: fastWatcherCfgWithRollbackWindow, fixturesSetup: func(t *testing.T) (from *atesting.Fixture, to *atesting.Fixture) { // Upgrade from the current build to the same build as Independent Agent Release. @@ -390,13 +396,84 @@ func TestStandaloneUpgradeManualRollback(t *testing.T) { return fromFixture, toFixture }, + rollbackTrigger: func(ctx context.Context, t *testing.T, client client.Client, startFixture, endFixture *atesting.Fixture) { + t.Logf("sending version=%s rollback=%v upgrade to agent", startFixture.Version(), true) + retVal, err := client.Upgrade(ctx, startFixture.Version(), true, "", false, false) + require.NoError(t, err, "error triggering manual rollback to version %s", startFixture.Version()) + t.Logf("received output %s from upgrade command", retVal) + }, }, - } + { + name: "upgrade to a repackaged agent built from the same commit, rollback after grace period", + agentConfig: fastWatcherCfgWithRollbackWindow, + fixturesSetup: func(t *testing.T) (from *atesting.Fixture, to *atesting.Fixture) { + // Upgrade from the current build to the same build as Independent Agent Release. - // set up start ficture with a rollback window of 1h - rollbackWindowConfig := ` -agent.upgrade.rollback.window: 1h -` + // Start from the build under test. + fromFixture, err := define.NewFixtureFromLocalBuild(t, define.Version()) + require.NoError(t, err) + + // Create a new package with a different version (IAR-style) + // modify the version with the "+buildYYYYMMDDHHMMSS" + currentVersion, err := version.ParseVersion(define.Version()) + require.NoErrorf(t, err, "define.Version() %q is not parsable.", define.Version()) + + newVersionBuildMetadata := "build" + time.Now().Format("20060102150405") + parsedNewVersion := version.NewParsedSemVer(currentVersion.Major(), currentVersion.Minor(), currentVersion.Patch(), "", newVersionBuildMetadata) + + err = fromFixture.EnsurePrepared(t.Context()) + require.NoErrorf(t, err, "fixture should be prepared") + + // retrieve the compressed package file location + srcPackage, err := fromFixture.SrcPackage(t.Context()) + require.NoErrorf(t, err, "error retrieving start fixture source package") + + versionForFixture, repackagedArchiveFile, err := repackageArchive(t, srcPackage, newVersionBuildMetadata, currentVersion, parsedNewVersion) + require.NoError(t, err, "error repackaging the archive built from the same commit") + + repackagedLocalFetcher := atesting.LocalFetcher(filepath.Dir(repackagedArchiveFile)) + toFixture, err := atesting.NewFixture( + t, + versionForFixture.String(), + atesting.WithFetcher(repackagedLocalFetcher), + ) + require.NoError(t, err) + + return fromFixture, toFixture + }, + rollbackTrigger: func(ctx context.Context, t *testing.T, client client.Client, startFixture, endFixture *atesting.Fixture) { + // trim -SNAPSHOT at the end of the fixture version as that is reported as a separate flag + expectedVersion := endFixture.Version() + expectedSnapshot := false + if strings.HasSuffix(expectedVersion, "-SNAPSHOT") { + expectedVersion = strings.TrimSuffix(endFixture.Version(), "-SNAPSHOT") + expectedSnapshot = true + } + + // It will take at least 2 minutes before the agent exits the grace period (see fastWatcherCfgWithRollbackWindow) + // let's shoot for up to 4 minutes to exit grace period, checking every 10 seconds + require.EventuallyWithT(t, func(collect *assert.CollectT) { + state, err := client.State(ctx) + require.NoError(collect, err) + t.Logf("checking agent state: %+v", state) + require.NotNil(collect, state) + assert.Nil(collect, state.UpgradeDetails) + assert.Equal(t, cproto.State_HEALTHY, state.State) + assert.Equal(collect, expectedVersion, state.Info.Version) + assert.Equal(collect, expectedSnapshot, state.Info.Snapshot) + if runtime.GOOS != "windows" { + // on windows the update marker is not removed when cleaning up + assert.NoFileExists(collect, filepath.Join(startFixture.WorkDir(), "data", ".update-marker")) + } + }, 4*time.Minute, 10*time.Second) + t.Log("elastic agent is out of grace period.") + t.Logf("sending version=%s rollback=%v upgrade to agent", startFixture.Version(), true) + retVal, err := client.Upgrade(ctx, startFixture.Version(), true, "", false, false) + require.NoError(t, err, "error triggering manual rollback to version %s", startFixture.Version()) + t.Logf("received output %s from upgrade command", retVal) + }, + }, + } for _, tc := range testcases { t.Run(tc.name, func(t *testing.T) { @@ -404,9 +481,11 @@ agent.upgrade.rollback.window: 1h defer cancel() from, to := tc.fixturesSetup(t) - err := from.Configure(ctx, []byte(rollbackWindowConfig)) - require.NoError(t, err, "error setting up rollback window") - standaloneManualRollbackTest(ctx, t, from, to) + err := from.Configure(ctx, []byte(tc.agentConfig)) + require.NoError(t, err, "error configuring starting fixture") + standaloneRollbackTest( + ctx, t, from, to, fastWatcherCfgWithRollbackWindow, fmt.Sprintf(details.ReasonManualRollbackPattern, from.Version()), + tc.rollbackTrigger) }) } @@ -491,23 +570,12 @@ func managedRollbackRestartTest(ctx context.Context, t *testing.T, info *define. func standaloneRollbackRestartTest(ctx context.Context, t *testing.T, startFixture *atesting.Fixture, endFixture *atesting.Fixture) { standaloneRollbackTest(ctx, t, startFixture, endFixture, reallyFastWatcherCfg, details.ReasonWatchFailed, - func(t *testing.T, _ client.Client) { + func(ctx context.Context, t *testing.T, _ client.Client, _ *atesting.Fixture, _ *atesting.Fixture) { restartAgentNTimes(t, 3, 10*time.Second) }) } -func standaloneManualRollbackTest(ctx context.Context, t *testing.T, startFixture *atesting.Fixture, endFixture *atesting.Fixture) { - standaloneRollbackTest(ctx, t, startFixture, endFixture, fastWatcherCfgWithRollbackWindow, fmt.Sprintf(details.ReasonManualRollbackPattern, startFixture.Version()), - func(t *testing.T, client client.Client) { - t.Logf("sending version=%s rollback=%v upgrade to agent", startFixture.Version(), true) - retVal, err := client.Upgrade(ctx, startFixture.Version(), true, "", false, false) - require.NoError(t, err, "error triggering manual rollback to version %s", startFixture.Version()) - t.Logf("received output %s from upgrade command", retVal) - }, - ) -} - -func standaloneRollbackTest(ctx context.Context, t *testing.T, startFixture *atesting.Fixture, endFixture *atesting.Fixture, customConfig string, rollbackReason string, rollbackTrigger func(t *testing.T, client client.Client)) { +func standaloneRollbackTest(ctx context.Context, t *testing.T, startFixture *atesting.Fixture, endFixture *atesting.Fixture, customConfig string, rollbackReason string, rollbackTrigger rollbackTriggerFunc) { startVersionInfo, err := startFixture.ExecVersion(ctx) require.NoError(t, err, "failed to get start agent build version info") @@ -540,7 +608,7 @@ func standaloneRollbackTest(ctx context.Context, t *testing.T, startFixture *ate defer elasticAgentClient.Disconnect() // A few seconds after the upgrade, trigger a rollback using the passed trigger - rollbackTrigger(t, elasticAgentClient) + rollbackTrigger(ctx, t, elasticAgentClient, startFixture, endFixture) // wait for the agent to be healthy and back at the start version err = upgradetest.WaitHealthyAndVersion(ctx, startFixture, startVersionInfo.Binary, 2*time.Minute, 10*time.Second, t)