Skip to content
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .mockery.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ filename: mocks.go
template-data:
unroll-variadic: true
packages:
github.com/elastic/elastic-agent/internal/pkg/agent/application:
interfaces:
rollbacksSource: {}
github.com/elastic/elastic-agent/internal/pkg/agent/application/actions/handlers:
interfaces:
Uploader: {}
Expand Down
92 changes: 92 additions & 0 deletions internal/pkg/agent/application/application.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,14 @@ package application
import (
"context"
"fmt"
"os"
"path/filepath"
"time"

"go.elastic.co/apm/v2"

componentmonitoring "github.com/elastic/elastic-agent/internal/pkg/agent/application/monitoring/component"
"github.com/elastic/elastic-agent/internal/pkg/agent/install"

"github.com/elastic/go-ucfg"

Expand Down Expand Up @@ -48,6 +51,11 @@ import (
"github.com/elastic/elastic-agent/version"
)

type rollbacksSource interface {
Set(map[string]upgrade.TTLMarker) error
Get() (map[string]upgrade.TTLMarker, error)
}

// CfgOverrider allows for application driven overrides of configuration read from disk.
type CfgOverrider func(cfg *configuration.Configuration)

Expand Down Expand Up @@ -131,6 +139,10 @@ func New(
isMonitoringSupported := !disableMonitoring && cfg.Settings.V1MonitoringEnabled

availableRollbacksSource := upgrade.NewTTLMarkerRegistry(log, paths.Top())
if platform.OS != component.Container {
// If we are not running in a container, check and normalize the install descriptor before we start the agent
normalizeInstallDescriptorAtStartup(log, paths.Top(), time.Now(), initialUpdateMarker, availableRollbacksSource)
}
upgrader, err := upgrade.NewUpgrader(log, cfg.Settings.DownloadConfig, cfg.Settings.Upgrade, agentInfo, new(upgrade.AgentWatcherHelper), availableRollbacksSource)
if err != nil {
return nil, nil, nil, fmt.Errorf("failed to create upgrader: %w", err)
Expand Down Expand Up @@ -296,6 +308,86 @@ func New(
return coord, configMgr, varsManager, nil
}

// normalizeInstallDescriptorAtStartup will check the install descriptor checking:
// - if we just rolled back: the update marker is checked and in case of rollback we clean up the entry about the failed upgraded install
// - check all the entries:
// - verify that the home directory for that install still exists (remove what does not exist anymore)
// - TODO check TTLs of installs to schedule delayed cleanup while the agent is running
//
// This function will NOT error out, it will log any errors it encounters as warnings but any error must be treated as non-fatal
func normalizeInstallDescriptorAtStartup(log *logger.Logger, topDir string, now time.Time, initialUpdateMarker *upgrade.UpdateMarker, rollbackSource rollbacksSource) {
// Check if we rolled back and update the install descriptor
if initialUpdateMarker != nil && initialUpdateMarker.Details != nil && initialUpdateMarker.Details.State == details.StateRollback {
// Reset the TTL for the current version if we are coming off a rollback
rollbacks, err := rollbackSource.Get()
if err != nil {
log.Warnf("Error getting available rollbacks from rollbackSource during startup check: %s", err)
return
}

// remove the current versioned home TTL marker
delete(rollbacks, initialUpdateMarker.PrevVersionedHome)
err = rollbackSource.Set(rollbacks)
if err != nil {
log.Warnf("Error removing install descriptor from installDescriptorSource during startup check: %s", err)
return
}
}

// check if we need to cleanup old agent installs
rollbacks, err := rollbackSource.Get()
if err != nil {
log.Warnf("Error getting available rollbacks during startup check: %s", err)
return
}

var versionedHomesToCleanup []string
for versionedHome, ttlMarker := range rollbacks {

versionedHomeAbsPath := filepath.Join(topDir, versionedHome)

if versionedHomeAbsPath == paths.HomeFrom(topDir) {
// skip the current install
log.Warnf("Found a TTL marker for the currently running agent at %s. Skipping cleanup...", versionedHome)
continue
}

_, err = os.Stat(versionedHomeAbsPath)
if errors.Is(err, os.ErrNotExist) {
log.Warnf("Versioned home %s corresponding to agent install descriptor %+v is not found on disk", versionedHomeAbsPath, ttlMarker)
versionedHomesToCleanup = append(versionedHomesToCleanup, versionedHome)
continue
}

if err != nil {
log.Warnf("error checking versioned home %s for agent install: %s", versionedHomeAbsPath, err.Error())
continue
}

if now.After(ttlMarker.ValidUntil) {
// the install directory exists but it's expired. Remove the files.
log.Infof("agent install descriptor %+v is expired, removing directory %q", ttlMarker, versionedHomeAbsPath)
if cleanupErr := install.RemoveBut(versionedHomeAbsPath, true); cleanupErr != nil {
log.Warnf("Error removing directory %q: %s", versionedHomeAbsPath, cleanupErr)
} else {
log.Infof("Directory %q was removed", versionedHomeAbsPath)
versionedHomesToCleanup = append(versionedHomesToCleanup, versionedHome)
}
}
}

if len(versionedHomesToCleanup) > 0 {
log.Infof("removing install descriptor(s) for %v", versionedHomesToCleanup)
for _, versionedHomeToCleanup := range versionedHomesToCleanup {
delete(rollbacks, versionedHomeToCleanup)
}
err = rollbackSource.Set(rollbacks)
if err != nil {
log.Warnf("Error removing install descriptor(s): %s", err)
}
}
}

func mergeFleetConfig(ctx context.Context, rawConfig *config.Config) (storage.Store, *configuration.Configuration, error) {
path := paths.AgentConfigFile()
store, err := storage.NewEncryptedDiskStore(ctx, path)
Expand Down
191 changes: 191 additions & 0 deletions internal/pkg/agent/application/application_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ package application
import (
"context"
"fmt"
"os"
"path/filepath"
"runtime"
"testing"
"time"

Expand All @@ -15,6 +18,9 @@ import (

"github.com/elastic/elastic-agent-libs/logp"
"github.com/elastic/elastic-agent/internal/pkg/agent/application/info"
"github.com/elastic/elastic-agent/internal/pkg/agent/application/paths"
"github.com/elastic/elastic-agent/internal/pkg/agent/application/upgrade"
"github.com/elastic/elastic-agent/internal/pkg/agent/application/upgrade/details"
"github.com/elastic/elastic-agent/internal/pkg/config"
"github.com/elastic/elastic-agent/internal/pkg/testutils"
"github.com/elastic/elastic-agent/pkg/core/logger/loggertest"
Expand Down Expand Up @@ -302,3 +308,188 @@ func TestInjectOutputOverrides(t *testing.T) {
})
}
}

func Test_normalizeInstallDescriptorAtStartup(t *testing.T) {

now := time.Now()
tomorrow := now.Add(24 * time.Hour)
yesterday := now.Add(-24 * time.Hour)

tests := []struct {
name string
setup func(t *testing.T, topDir string) (*upgrade.UpdateMarker, rollbacksSource)
postNormalizeAssertions func(t *testing.T, topDir string, initialUpdateMarker *upgrade.UpdateMarker)
}{
{
name: "happy path: single install, no rollbacks, no modifications needed",
setup: func(t *testing.T, topDir string) (*upgrade.UpdateMarker, rollbacksSource) {
mockRollbackSource := newMockRollbacksSource(t)
mockRollbackSource.EXPECT().Get().Return(nil, nil)
return nil, mockRollbackSource
},

postNormalizeAssertions: nil,
},
{
name: "Agent was manually rolled back: rolled back install is removed from the list",
setup: func(t *testing.T, topDir string) (*upgrade.UpdateMarker, rollbacksSource) {
newAgentInstallPath := createFakeAgentInstall(t, topDir, "4.5.6", "newversionhash", true)
oldAgentInstallPath := createFakeAgentInstall(t, topDir, "1.2.3", "oldversionhash", true)

mockRollbackSource := newMockRollbacksSource(t)
mockRollbackSource.EXPECT().Get().Return(map[string]upgrade.TTLMarker{
oldAgentInstallPath: {
Version: "1.2.3",
Hash: "oldversionhash",
ValidUntil: tomorrow,
},
}, nil)

updateMarker := &upgrade.UpdateMarker{
Version: "4.5.6",
Hash: "newversionhash",
VersionedHome: newAgentInstallPath,
UpdatedOn: now,
PrevVersion: "1.2.3",
PrevHash: "oldversionhash",
PrevVersionedHome: oldAgentInstallPath,
Acked: false,
Action: nil,
Details: &details.Details{
TargetVersion: "4.5.6",
State: details.StateRollback,
ActionID: "",
Metadata: details.Metadata{
Reason: details.ReasonManualRollbackPattern,
},
},
}

// expect code to clear the rollback
mockRollbackSource.EXPECT().Set(map[string]upgrade.TTLMarker{}).Return(nil)
return updateMarker, mockRollbackSource
},
postNormalizeAssertions: nil,
},
{
name: "Entries not having a matching install directory will be removed from the list",
setup: func(t *testing.T, topDir string) (*upgrade.UpdateMarker, rollbacksSource) {
_ = createFakeAgentInstall(t, topDir, "4.5.6", "newversionhash", true)
oldAgentInstallPath := createFakeAgentInstall(t, topDir, "1.2.3", "oldversionhash", true)

mockRollbackSource := newMockRollbacksSource(t)
nonExistingVersionedHome := filepath.Join("data", "thisdirectorydoesnotexist")
mockRollbackSource.EXPECT().Get().Return(map[string]upgrade.TTLMarker{
oldAgentInstallPath: {
Version: "1.2.3",
Hash: "oldversionhash",
ValidUntil: tomorrow,
},
nonExistingVersionedHome: {
Version: "0.0.0",
Hash: "nonExistingHash",
ValidUntil: tomorrow,
},
}, nil)

mockRollbackSource.EXPECT().Set(map[string]upgrade.TTLMarker{
oldAgentInstallPath: {
Version: "1.2.3",
Hash: "oldversionhash",
ValidUntil: tomorrow,
},
}).Return(nil)
return nil, mockRollbackSource
},
postNormalizeAssertions: nil,
},
{
name: "Expired installs still existing on disk will be removed from the install list and removed from disk",
setup: func(t *testing.T, topDir string) (*upgrade.UpdateMarker, rollbacksSource) {
_ = createFakeAgentInstall(t, topDir, "4.5.6", "newversionhash", true)
oldAgentInstallPath := createFakeAgentInstall(t, topDir, "1.2.3", "oldversionhash", true)

// assert that the versionedHome of the old install is the same we check in postNormalizeAssertions
assert.Equal(t, oldAgentInstallPath, filepath.Join("data", "elastic-agent-1.2.3-oldver"),
"Unexpected old install versioned home. Post normalize assertions may not be working")

mockRollbackSource := newMockRollbacksSource(t)
mockRollbackSource.EXPECT().Get().Return(
map[string]upgrade.TTLMarker{
oldAgentInstallPath: {
Version: "1.2.3",
Hash: "oldver",
ValidUntil: yesterday,
},
},
nil,
)
// expect removal of the existing ttlmarker
mockRollbackSource.EXPECT().Set(map[string]upgrade.TTLMarker{}).Return(nil)
return nil, mockRollbackSource
},
postNormalizeAssertions: func(t *testing.T, topDir string, _ *upgrade.UpdateMarker) {
assert.NoDirExists(t, filepath.Join(topDir, "data", "elastic-agent-1.2.3-oldver"))
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
logger, _ := loggertest.New(t.Name())
tmpDir := t.TempDir()
updateMarker, installSource := tt.setup(t, tmpDir)
normalizeInstallDescriptorAtStartup(logger, tmpDir, now, updateMarker, installSource)
if tt.postNormalizeAssertions != nil {
tt.postNormalizeAssertions(t, tmpDir, updateMarker)
}
})
}
}

// createFakeAgentInstall (copied from the upgrade package tests) will create a mock agent install within topDir, possibly
// using the version in the directory name, depending on useVersionInPath it MUST return the path to the created versionedHome
// relative to topDir, to mirror what step_unpack returns
func createFakeAgentInstall(t *testing.T, topDir, version, hash string, useVersionInPath bool) string {

// create versioned home
versionedHome := fmt.Sprintf("elastic-agent-%s", hash[:upgrade.HashLen])
if useVersionInPath {
// use the version passed as parameter
versionedHome = fmt.Sprintf("elastic-agent-%s-%s", version, hash[:upgrade.HashLen])
}
relVersionedHomePath := filepath.Join("data", versionedHome)
absVersionedHomePath := filepath.Join(topDir, relVersionedHomePath)

// recalculate the binary path and launch a mkDirAll to account for MacOS weirdness
// (the extra nesting of elastic agent binary within versionedHome)
absVersioneHomeBinaryPath := paths.BinaryPath(absVersionedHomePath, "")
err := os.MkdirAll(absVersioneHomeBinaryPath, 0o750)
require.NoError(t, err, "error creating fake install versioned home directory (including binary path) %q", absVersioneHomeBinaryPath)

// place a few directories in the fake install
absComponentsDirPath := filepath.Join(absVersionedHomePath, "components")
err = os.MkdirAll(absComponentsDirPath, 0o750)
require.NoError(t, err, "error creating fake install components directory %q", absVersionedHomePath)

absLogsDirPath := filepath.Join(absVersionedHomePath, "logs")
err = os.MkdirAll(absLogsDirPath, 0o750)
require.NoError(t, err, "error creating fake install logs directory %q", absLogsDirPath)

absRunDirPath := filepath.Join(absVersionedHomePath, "run")
err = os.MkdirAll(absRunDirPath, 0o750)
require.NoError(t, err, "error creating fake install run directory %q", absRunDirPath)

// put some placeholder for files
agentExecutableName := upgrade.AgentName
if runtime.GOOS == "windows" {
agentExecutableName += ".exe"
}
err = os.WriteFile(paths.BinaryPath(absVersionedHomePath, agentExecutableName), []byte(fmt.Sprintf("Placeholder for agent %s", version)), 0o750)
require.NoErrorf(t, err, "error writing elastic agent binary placeholder %q", agentExecutableName)
fakeLogPath := filepath.Join(absLogsDirPath, "fakelog.ndjson")
err = os.WriteFile(fakeLogPath, []byte(fmt.Sprintf("Sample logs for agent %s", version)), 0o750)
require.NoErrorf(t, err, "error writing fake log placeholder %q", fakeLogPath)

// return the path relative to top exactly like the step_unpack does
return relVersionedHomePath
}
Loading