Skip to content

Commit 411205b

Browse files
committed
Implement manual rollback from list of agent installs
1 parent b94903d commit 411205b

File tree

5 files changed

+287
-16
lines changed

5 files changed

+287
-16
lines changed

internal/pkg/agent/application/upgrade/manual_rollback.go

Lines changed: 66 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,19 @@ import (
99
"errors"
1010
"fmt"
1111
"os"
12+
"path/filepath"
1213
"time"
1314

1415
"github.com/elastic/elastic-agent/internal/pkg/agent/application/filelock"
1516
"github.com/elastic/elastic-agent/internal/pkg/agent/application/paths"
1617
"github.com/elastic/elastic-agent/internal/pkg/agent/application/reexec"
18+
"github.com/elastic/elastic-agent/internal/pkg/agent/application/upgrade/details"
1719
"github.com/elastic/elastic-agent/internal/pkg/fleetapi"
20+
"github.com/elastic/elastic-agent/internal/pkg/release"
1821
v1 "github.com/elastic/elastic-agent/pkg/api/v1"
1922
"github.com/elastic/elastic-agent/pkg/core/logger"
2023
"github.com/elastic/elastic-agent/pkg/version"
24+
agtversion "github.com/elastic/elastic-agent/version"
2125
)
2226

2327
func (u *Upgrader) rollbackToPreviousVersion(ctx context.Context, topDir string, now time.Time, version string, action *fleetapi.ActionUpgrade) (reexec.ShutdownCallbackFn, error) {
@@ -37,7 +41,7 @@ func (u *Upgrader) rollbackToPreviousVersion(ctx context.Context, topDir string,
3741

3842
if errors.Is(err, os.ErrNotExist) {
3943
// there is no upgrade marker, we need to extract available rollbacks from agent installs
40-
watcherExecutable, versionedHomeToRollbackTo, err = rollbackUsingAgentInstalls()
44+
watcherExecutable, versionedHomeToRollbackTo, err = rollbackUsingAgentInstalls(ctx, u.log, u.watcherHelper, u.installDescriptorSource, topDir, now, version, u.markUpgrade)
4145
} else {
4246
watcherExecutable, versionedHomeToRollbackTo, err = rollbackUsingUpgradeMarker(ctx, u.log, u.watcherHelper, topDir, now, version)
4347
}
@@ -56,10 +60,69 @@ func (u *Upgrader) rollbackToPreviousVersion(ctx context.Context, topDir string,
5660
return nil, nil
5761
}
5862

59-
func rollbackUsingAgentInstalls() (string, string, error) {
63+
func rollbackUsingAgentInstalls(ctx context.Context, log *logger.Logger, watcherHelper WatcherHelper, source installDescriptorSource, topDir string, now time.Time, rollbackVersion string, markUpgrade markUpgradeFunc) (string, string, error) {
6064
//FIXME implement
6165
//panic("Not implemented")
62-
return "", "", errors.Join(errors.New("not implemented"), os.ErrNotExist)
66+
//return "", "", errors.Join(errors.New("not implemented"), os.ErrNotExist)
67+
68+
// read the available installs
69+
installDescriptor, err := source.GetInstallDesc()
70+
if err != nil {
71+
return "", "", fmt.Errorf("retrieving agent installs: %w", err)
72+
}
73+
// check for the version we want to rollback to
74+
found := false
75+
var targetInstall v1.AgentInstallDesc
76+
for _, i := range installDescriptor.AgentInstalls {
77+
if i.TTL == nil {
78+
// valid rollbacks should have a TTL associated
79+
continue
80+
}
81+
if i.Version == rollbackVersion && now.Before(*i.TTL) {
82+
// found a valid target
83+
found = true
84+
targetInstall = i
85+
break
86+
}
87+
}
88+
89+
if !found {
90+
return "", "", fmt.Errorf("version %q not listed among the available rollbacks: %w", rollbackVersion, ErrNoRollbacksAvailable)
91+
}
92+
93+
prevAgentParsedVersion, err := version.ParseVersion(targetInstall.Version)
94+
if err != nil {
95+
return "", "", fmt.Errorf("parsing version of target install %+v: %w", targetInstall, err)
96+
}
97+
98+
// write out a fake upgrade marker to make the upgrade details state happy
99+
relCurVersionedHome, err := filepath.Rel(paths.Top(), paths.Home())
100+
if err != nil {
101+
return "", "", fmt.Errorf("getting current install home path %q relative to top %q: %w", paths.Home(), paths.Top(), err)
102+
}
103+
curAgentInstall := agentInstall{
104+
parsedVersion: agtversion.GetParsedAgentPackageVersion(),
105+
version: release.VersionWithSnapshot(),
106+
hash: release.Commit(),
107+
versionedHome: relCurVersionedHome,
108+
}
109+
110+
prevAgentInstall := agentInstall{
111+
parsedVersion: prevAgentParsedVersion,
112+
version: targetInstall.Version,
113+
hash: targetInstall.Hash,
114+
versionedHome: targetInstall.VersionedHome,
115+
}
116+
117+
upgradeDetails := details.NewDetails(release.VersionWithSnapshot(), details.StateRequested, "" /*action.ID*/)
118+
err = markUpgrade(log, paths.DataFrom(topDir), now, curAgentInstall, prevAgentInstall, nil /*action*/, upgradeDetails, nil)
119+
if err != nil {
120+
return "", "", fmt.Errorf("creating upgrade marker: %w", err)
121+
}
122+
123+
// return watcher executable and versionedHome to rollback to
124+
watcherExecutable := watcherHelper.SelectWatcherExecutable(topDir, prevAgentInstall, curAgentInstall)
125+
return watcherExecutable, targetInstall.VersionedHome, nil
63126
}
64127

65128
func rollbackUsingUpgradeMarker(ctx context.Context, log *logger.Logger, watcherHelper WatcherHelper, topDir string, now time.Time, version string) (string, string, error) {

internal/pkg/agent/application/upgrade/manual_rollback_test.go

Lines changed: 159 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
package upgrade
66

77
import (
8-
"io/fs"
8+
"fmt"
99
"os"
1010
"os/exec"
1111
"path/filepath"
@@ -15,14 +15,19 @@ import (
1515
"github.com/stretchr/testify/assert"
1616
"github.com/stretchr/testify/mock"
1717
"github.com/stretchr/testify/require"
18+
"gopkg.in/yaml.v3"
1819

1920
"github.com/elastic/elastic-agent/internal/pkg/agent/application/filelock"
2021
"github.com/elastic/elastic-agent/internal/pkg/agent/application/paths"
2122
"github.com/elastic/elastic-agent/internal/pkg/agent/application/upgrade/artifact"
23+
"github.com/elastic/elastic-agent/internal/pkg/agent/application/upgrade/details"
2224
"github.com/elastic/elastic-agent/internal/pkg/agent/configuration"
25+
"github.com/elastic/elastic-agent/internal/pkg/release"
26+
v1 "github.com/elastic/elastic-agent/pkg/api/v1"
2327
"github.com/elastic/elastic-agent/pkg/core/logger/loggertest"
2428
"github.com/elastic/elastic-agent/pkg/version"
2529
"github.com/elastic/elastic-agent/testing/mocks/internal_/pkg/agent/application/info"
30+
agtversion "github.com/elastic/elastic-agent/version"
2631
)
2732

2833
func TestManualRollback(t *testing.T) {
@@ -84,14 +89,28 @@ func TestManualRollback(t *testing.T) {
8489
versionedHome: "data/elastic-agent-4.5.6-newver",
8590
}
8691

92+
agentInstallCurrent := agentInstall{
93+
parsedVersion: agtversion.GetParsedAgentPackageVersion(),
94+
version: release.VersionWithSnapshot(),
95+
hash: release.Commit(),
96+
// Versioned home should contain the version but since the path does not really exist we fallback to the legacy format with just the hash
97+
// versionedHome: filepath.Join("data", fmt.Sprintf("elastic-agent-%s-%s", release.VersionWithSnapshot(), release.ShortCommit())),
98+
versionedHome: filepath.Join("data", fmt.Sprintf("elastic-agent-%s", release.ShortCommit())),
99+
}
100+
87101
// this is the updated_on timestamp in the example
88102
nowBeforeTTL, err := time.Parse(time.RFC3339, `2025-07-11T10:11:12Z`)
89103
require.NoError(t, err, "error parsing nowBeforeTTL")
90104

91105
// the update marker yaml assume 7d TLL for rollbacks, let's make an extra day pass
92106
nowAfterTTL := nowBeforeTTL.Add(8 * 24 * time.Hour)
93107

94-
type setupF func(t *testing.T, topDir string, agent *info.Agent, watcherHelper *MockWatcherHelper)
108+
// save the current timestamp, useful for TTL-based testing
109+
aMomentInTime := time.Now()
110+
aMomentTomorrow := aMomentInTime.Add(24 * time.Hour)
111+
aMomentAgo := aMomentInTime.Add(-1 * time.Second)
112+
113+
type setupF func(t *testing.T, topDir string, agent *info.Agent, watcherHelper *MockWatcherHelper, installSource *mockInstallDescriptorSource)
95114
type postRollbackAssertionsF func(t *testing.T, topDir string)
96115
type testcase struct {
97116
name string
@@ -107,7 +126,7 @@ func TestManualRollback(t *testing.T) {
107126
testcases := []testcase{
108127
{
109128
name: "no rollback version - rollback fails",
110-
setup: func(t *testing.T, topDir string, agent *info.Agent, watcherHelper *MockWatcherHelper) {
129+
setup: func(t *testing.T, topDir string, agent *info.Agent, watcherHelper *MockWatcherHelper, installSource *mockInstallDescriptorSource) {
111130
//do not setup anything here, let the rollback fail
112131
},
113132
artifactSettings: artifact.DefaultConfig(),
@@ -119,21 +138,34 @@ func TestManualRollback(t *testing.T) {
119138
additionalAsserts: nil,
120139
},
121140
{
122-
name: "no update marker - rollback fails",
123-
setup: func(t *testing.T, topDir string, agent *info.Agent, watcherHelper *MockWatcherHelper) {
124-
//do not setup anything here, let the rollback fail
141+
name: "no update marker, no other installs - rollback fails",
142+
setup: func(t *testing.T, topDir string, agent *info.Agent, watcherHelper *MockWatcherHelper, installSource *mockInstallDescriptorSource) {
143+
installSource.EXPECT().GetInstallDesc().Return(
144+
&v1.InstallDescriptor{
145+
AgentInstalls: []v1.AgentInstallDesc{
146+
{
147+
Version: "1.2.3",
148+
Hash: "oldver",
149+
VersionedHome: "data/elastic-agent-1.2.3-oldver",
150+
Flavor: "strawberry",
151+
Active: true,
152+
},
153+
},
154+
},
155+
nil,
156+
)
125157
},
126158
artifactSettings: artifact.DefaultConfig(),
127159
upgradeSettings: configuration.DefaultUpgradeConfig(),
128160
version: "1.2.3",
129161
wantErr: func(t assert.TestingT, err error, i ...interface{}) bool {
130-
return assert.ErrorIs(t, err, fs.ErrNotExist)
162+
return assert.ErrorIs(t, err, ErrNoRollbacksAvailable)
131163
},
132164
additionalAsserts: nil,
133165
},
134166
{
135167
name: "update marker is malformed - rollback fails",
136-
setup: func(t *testing.T, topDir string, agent *info.Agent, watcherHelper *MockWatcherHelper) {
168+
setup: func(t *testing.T, topDir string, agent *info.Agent, watcherHelper *MockWatcherHelper, installSource *mockInstallDescriptorSource) {
137169
err := os.WriteFile(markerFilePath(paths.DataFrom(topDir)), []byte("this is not a proper YAML file"), 0600)
138170
require.NoError(t, err, "error setting up update marker")
139171
locker := filelock.NewAppLocker(topDir, "watcher.lock")
@@ -153,7 +185,7 @@ func TestManualRollback(t *testing.T) {
153185
},
154186
{
155187
name: "update marker ok but rollback available is empty - error",
156-
setup: func(t *testing.T, topDir string, agent *info.Agent, watcherHelper *MockWatcherHelper) {
188+
setup: func(t *testing.T, topDir string, agent *info.Agent, watcherHelper *MockWatcherHelper, installSource *mockInstallDescriptorSource) {
157189
err := os.WriteFile(markerFilePath(paths.DataFrom(topDir)), []byte(updatemarkerwatching456NoRollbackAvailable), 0600)
158190
require.NoError(t, err, "error setting up update marker")
159191
locker := filelock.NewAppLocker(topDir, "watcher.lock")
@@ -182,7 +214,7 @@ func TestManualRollback(t *testing.T) {
182214
},
183215
{
184216
name: "update marker ok but version is not available for rollback - error",
185-
setup: func(t *testing.T, topDir string, agent *info.Agent, watcherHelper *MockWatcherHelper) {
217+
setup: func(t *testing.T, topDir string, agent *info.Agent, watcherHelper *MockWatcherHelper, installSource *mockInstallDescriptorSource) {
186218
err := os.WriteFile(markerFilePath(paths.DataFrom(topDir)), []byte(updatemarkerwatching456), 0600)
187219
require.NoError(t, err, "error setting up update marker")
188220
locker := filelock.NewAppLocker(topDir, "watcher.lock")
@@ -211,7 +243,7 @@ func TestManualRollback(t *testing.T) {
211243
},
212244
{
213245
name: "update marker ok but rollback is expired - error",
214-
setup: func(t *testing.T, topDir string, agent *info.Agent, watcherHelper *MockWatcherHelper) {
246+
setup: func(t *testing.T, topDir string, agent *info.Agent, watcherHelper *MockWatcherHelper, installSource *mockInstallDescriptorSource) {
215247
err := os.WriteFile(markerFilePath(paths.DataFrom(topDir)), []byte(updatemarkerwatching456), 0600)
216248
require.NoError(t, err, "error setting up update marker")
217249
locker := filelock.NewAppLocker(topDir, "watcher.lock")
@@ -241,7 +273,7 @@ func TestManualRollback(t *testing.T) {
241273
},
242274
{
243275
name: "update marker ok - takeover watcher, persist rollback and restart most recent watcher",
244-
setup: func(t *testing.T, topDir string, agent *info.Agent, watcherHelper *MockWatcherHelper) {
276+
setup: func(t *testing.T, topDir string, agent *info.Agent, watcherHelper *MockWatcherHelper, installSource *mockInstallDescriptorSource) {
245277
err := os.WriteFile(markerFilePath(paths.DataFrom(topDir)), []byte(updatemarkerwatching456), 0600)
246278
require.NoError(t, err, "error setting up update marker")
247279
locker := filelock.NewAppLocker(topDir, "watcher.lock")
@@ -268,6 +300,120 @@ func TestManualRollback(t *testing.T) {
268300
assert.NotEmpty(t, marker.RollbacksAvailable)
269301
},
270302
},
303+
{
304+
name: "no update marker, available install for rollback with valid TTL",
305+
setup: func(t *testing.T, topDir string, agent *info.Agent, watcherHelper *MockWatcherHelper, installSource *mockInstallDescriptorSource) {
306+
installSource.EXPECT().GetInstallDesc().Return(
307+
&v1.InstallDescriptor{
308+
AgentInstalls: []v1.AgentInstallDesc{
309+
{
310+
Version: release.VersionWithSnapshot(),
311+
Hash: release.Commit(),
312+
// Versioned home should contain the version but since the path does not really exist we fallback to the legacy format with just the hash
313+
//VersionedHome: filepath.Join("data", fmt.Sprintf("elastic-agent-%s-%s", release.VersionWithSnapshot(), release.ShortCommit())),
314+
VersionedHome: filepath.Join("data", fmt.Sprintf("elastic-agent-%s", release.ShortCommit())),
315+
Flavor: "basic",
316+
Active: true,
317+
},
318+
{
319+
// old install is still valid for the next 24 hours
320+
OptionalTTLItem: v1.OptionalTTLItem{TTL: &aMomentTomorrow},
321+
Version: "1.2.3",
322+
Hash: "oldver",
323+
VersionedHome: "data/elastic-agent-1.2.3-oldver",
324+
Flavor: "basic",
325+
Active: false,
326+
},
327+
},
328+
},
329+
nil,
330+
)
331+
newerWatcherExecutable := filepath.Join(topDir, "data", fmt.Sprintf("elastic-agent-%s-%s", release.VersionWithSnapshot(), release.ShortCommit()), "elastic-agent")
332+
watcherHelper.EXPECT().SelectWatcherExecutable(topDir, agentInstall123, agentInstallCurrent).Return(newerWatcherExecutable)
333+
watcherHelper.EXPECT().InvokeWatcher(mock.Anything, newerWatcherExecutable, "watch", "--rollback", "data/elastic-agent-1.2.3-oldver").
334+
Return(&exec.Cmd{Path: newerWatcherExecutable, Args: []string{"watch", "for rollbacksies"}, Process: &os.Process{Pid: 123}}, nil)
335+
},
336+
artifactSettings: artifact.DefaultConfig(),
337+
upgradeSettings: &configuration.UpgradeConfig{
338+
Rollback: &configuration.UpgradeRollbackConfig{
339+
Window: 24 * time.Hour,
340+
},
341+
},
342+
now: aMomentInTime,
343+
version: "1.2.3",
344+
wantErr: assert.NoError,
345+
additionalAsserts: func(t *testing.T, topDir string) {
346+
actualMarkerFilePath := filepath.Join(topDir, "data", markerFilename)
347+
require.FileExists(t, actualMarkerFilePath, "marker file must have been created")
348+
actualMarkerFileBytes, errReadMarkerFile := os.ReadFile(actualMarkerFilePath)
349+
require.NoError(t, errReadMarkerFile, "marker file should be readable")
350+
351+
expectedUpdateMarker := &UpdateMarker{
352+
Version: release.VersionWithSnapshot(),
353+
Hash: release.Commit(),
354+
VersionedHome: filepath.Join("data", fmt.Sprintf("elastic-agent-%s", release.ShortCommit())),
355+
UpdatedOn: aMomentInTime,
356+
PrevVersion: "1.2.3",
357+
PrevHash: "oldver",
358+
PrevVersionedHome: "data/elastic-agent-1.2.3-oldver",
359+
Details: &details.Details{
360+
TargetVersion: release.VersionWithSnapshot(),
361+
State: details.StateRequested,
362+
},
363+
RollbacksAvailable: nil,
364+
}
365+
366+
expectedMarkerBytes, err := yaml.Marshal(newMarkerSerializer(expectedUpdateMarker))
367+
require.NoError(t, err, "error marshalling expected update marker")
368+
require.YAMLEq(t, string(expectedMarkerBytes), string(actualMarkerFileBytes))
369+
},
370+
},
371+
{
372+
name: "no update marker, available install for rollback with expired TTL",
373+
setup: func(t *testing.T, topDir string, agent *info.Agent, watcherHelper *MockWatcherHelper, installSource *mockInstallDescriptorSource) {
374+
installSource.EXPECT().GetInstallDesc().Return(
375+
&v1.InstallDescriptor{
376+
AgentInstalls: []v1.AgentInstallDesc{
377+
{
378+
Version: release.VersionWithSnapshot(),
379+
Hash: release.Commit(),
380+
// Versioned home should contain the version but since the path does not really exist we fallback to the legacy format with just the hash
381+
//VersionedHome: filepath.Join("data", fmt.Sprintf("elastic-agent-%s-%s", release.VersionWithSnapshot(), release.ShortCommit())),
382+
VersionedHome: filepath.Join("data", fmt.Sprintf("elastic-agent-%s", release.ShortCommit())),
383+
Flavor: "basic",
384+
Active: true,
385+
},
386+
{
387+
// old install expired a second ago
388+
OptionalTTLItem: v1.OptionalTTLItem{TTL: &aMomentAgo},
389+
Version: "1.2.3",
390+
Hash: "oldver",
391+
VersionedHome: "data/elastic-agent-1.2.3-oldver",
392+
Flavor: "basic",
393+
Active: false,
394+
},
395+
},
396+
},
397+
nil,
398+
)
399+
},
400+
artifactSettings: artifact.DefaultConfig(),
401+
upgradeSettings: &configuration.UpgradeConfig{
402+
Rollback: &configuration.UpgradeRollbackConfig{
403+
Window: 24 * time.Hour,
404+
},
405+
},
406+
now: aMomentInTime,
407+
version: "1.2.3",
408+
wantErr: func(t assert.TestingT, err error, i ...interface{}) bool {
409+
return assert.ErrorIs(t, err, ErrNoRollbacksAvailable, i...)
410+
},
411+
additionalAsserts: func(t *testing.T, topDir string) {
412+
actualMarkerFilePath := filepath.Join(topDir, "data", markerFilename)
413+
require.NoFileExists(t, actualMarkerFilePath, "marker file must not be created")
414+
415+
},
416+
},
271417
}
272418

273419
for _, tc := range testcases {
@@ -281,7 +427,7 @@ func TestManualRollback(t *testing.T) {
281427
require.NoError(t, err, "error creating data directory in topDir %q", topDir)
282428

283429
if tc.setup != nil {
284-
tc.setup(t, topDir, mockAgentInfo, mockWatcherHelper)
430+
tc.setup(t, topDir, mockAgentInfo, mockWatcherHelper, mockInstallSource)
285431
}
286432

287433
upgrader, err := NewUpgrader(log, tc.artifactSettings, tc.upgradeSettings, mockAgentInfo, mockWatcherHelper, mockInstallSource)

0 commit comments

Comments
 (0)