Skip to content

Commit 6b064a4

Browse files
authored
Perform atomic writes during a config apply (#1265)
1 parent a034d5b commit 6b064a4

File tree

11 files changed

+487
-116
lines changed

11 files changed

+487
-116
lines changed

Makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ BINARY_NAME := nginx-agent
4545
PROJECT_DIR = cmd/agent
4646
PROJECT_FILE = main.go
4747
COLLECTOR_PATH ?= /etc/nginx-agent/opentelemetry-collector-agent.yaml
48-
MANIFEST_DIR ?= /var/lib/nginx-agent
48+
LIB_DIR ?= /var/lib/nginx-agent
4949
DIRS = $(BUILD_DIR) $(TEST_BUILD_DIR) $(BUILD_DIR)/$(DOCS_DIR) $(BUILD_DIR)/$(DOCS_DIR)/$(PROTO_DIR)
5050
$(shell mkdir -p $(DIRS))
5151

@@ -202,7 +202,7 @@ run: build ## Run code
202202

203203
dev: ## Run agent executable
204204
@echo "🚀 Running App"
205-
NGINX_AGENT_COLLECTOR_CONFIG_PATH=$(COLLECTOR_PATH) NGINX_AGENT_MANIFEST_DIR=$(MANIFEST_DIR) $(GORUN) -ldflags=$(DEBUG_LDFLAGS) $(PROJECT_DIR)/$(PROJECT_FILE)
205+
NGINX_AGENT_COLLECTOR_CONFIG_PATH=$(COLLECTOR_PATH) NGINX_AGENT_LIB_DIR=$(LIB_DIR) $(GORUN) -ldflags=$(DEBUG_LDFLAGS) $(PROJECT_DIR)/$(PROJECT_FILE)
206206

207207
race-condition-dev: ## Run agent executable with race condition detection
208208
@echo "🏎️ Running app with race condition detection enabled"

internal/config/config.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ func ResolveConfig() (*Config, error) {
121121
Watchers: resolveWatchers(),
122122
Features: viperInstance.GetStringSlice(FeaturesKey),
123123
Labels: resolveLabels(),
124-
ManifestDir: viperInstance.GetString(ManifestDirPathKey),
124+
LibDir: viperInstance.GetString(LibDirPathKey),
125125
}
126126

127127
defaultCollector(collector, config)
@@ -380,9 +380,9 @@ func registerFlags() {
380380
"If the default path doesn't exist, log messages are output to stdout/stderr.",
381381
)
382382
fs.String(
383-
ManifestDirPathKey,
384-
DefManifestDir,
385-
"Specifies the path to the directory containing the manifest files",
383+
LibDirPathKey,
384+
DefLibDir,
385+
"Specifies the path to the nginx-agent lib directory",
386386
)
387387

388388
fs.StringSlice(AllowedDirectoriesKey,

internal/config/defaults.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ const (
110110
DefCollectorExtensionsHealthTLServerNameKey = ""
111111

112112
// File defaults
113-
DefManifestDir = "/var/lib/nginx-agent"
113+
DefLibDir = "/var/lib/nginx-agent"
114114
)
115115

116116
func DefaultFeatures() []string {

internal/config/flags.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ const (
2424
InstanceWatcherMonitoringFrequencyKey = "watchers_instance_watcher_monitoring_frequency"
2525
InstanceHealthWatcherMonitoringFrequencyKey = "watchers_instance_health_watcher_monitoring_frequency"
2626
FileWatcherKey = "watchers_file_watcher"
27-
ManifestDirPathKey = "manifest_dir"
27+
LibDirPathKey = "lib_dir"
2828
)
2929

3030
var (

internal/config/types.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ type (
4848
Version string `yaml:"-"`
4949
Path string `yaml:"-"`
5050
UUID string `yaml:"-"`
51-
ManifestDir string `yaml:"-"`
51+
LibDir string `yaml:"-"`
5252
AllowedDirectories []string `yaml:"allowed_directories" mapstructure:"allowed_directories"`
5353
Features []string `yaml:"features" mapstructure:"features"`
5454
}

internal/file/file_manager_service.go

Lines changed: 117 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ import (
1313
"fmt"
1414
"log/slog"
1515
"os"
16+
"path"
17+
"path/filepath"
1618
"sync"
1719

1820
"google.golang.org/grpc"
@@ -38,11 +40,11 @@ const (
3840

3941
type (
4042
fileOperator interface {
41-
Write(ctx context.Context, fileContent []byte, file *mpi.FileMeta) error
42-
CreateFileDirectories(ctx context.Context, fileMeta *mpi.FileMeta, filePermission os.FileMode) error
43+
Write(ctx context.Context, fileContent []byte, fileName, filePermissions string) error
44+
CreateFileDirectories(ctx context.Context, fileName string) error
4345
WriteChunkedFile(
4446
ctx context.Context,
45-
file *mpi.File,
47+
fileName, filePermissions string,
4648
header *mpi.FileDataChunkHeader,
4749
stream grpc.ServerStreamingClient[mpi.FileDataChunk],
4850
) error
@@ -54,20 +56,22 @@ type (
5456
) (mpi.FileDataChunk_Content, error)
5557
WriteManifestFile(ctx context.Context, updatedFiles map[string]*model.ManifestFile,
5658
manifestDir, manifestPath string) (writeError error)
59+
MoveFile(ctx context.Context, sourcePath, destPath string) error
5760
}
5861

5962
fileServiceOperatorInterface interface {
60-
File(ctx context.Context, file *mpi.File, fileActions map[string]*model.FileCache) error
63+
File(ctx context.Context, file *mpi.File, tempFilePath, expectedHash string) error
6164
UpdateOverview(ctx context.Context, instanceID string, filesToUpdate []*mpi.File, configPath string,
6265
iteration int) error
63-
ChunkedFile(ctx context.Context, file *mpi.File) error
66+
ChunkedFile(ctx context.Context, file *mpi.File, tempFilePath, expectedHash string) error
6467
IsConnected() bool
6568
UpdateFile(
6669
ctx context.Context,
6770
instanceID string,
6871
fileToUpdate *mpi.File,
6972
) error
7073
SetIsConnected(isConnected bool)
74+
MoveFilesFromTempDirectory(ctx context.Context, fileAction *model.FileCache, tempDir string) error
7175
UpdateClient(ctx context.Context, fileServiceClient mpi.FileServiceClient)
7276
}
7377

@@ -118,8 +122,8 @@ func NewFileManagerService(fileServiceClient mpi.FileServiceClient, agentConfig
118122
rollbackFileContents: make(map[string][]byte),
119123
currentFilesOnDisk: make(map[string]*mpi.File),
120124
previousManifestFiles: make(map[string]*model.ManifestFile),
121-
manifestFilePath: agentConfig.ManifestDir + "/manifest.json",
122125
rollbackManifest: true,
126+
manifestFilePath: agentConfig.LibDir + "/manifest.json",
123127
manifestLock: manifestLock,
124128
}
125129
}
@@ -169,7 +173,12 @@ func (fms *FileManagerService) ConfigApply(ctx context.Context,
169173
fms.rollbackFileContents = fileContent
170174
fms.fileActions = diffFiles
171175

172-
fileErr := fms.executeFileActions(ctx)
176+
tempDir, tempDirError := fms.createTempConfigDirectory(ctx)
177+
if tempDirError != nil {
178+
return model.Error, tempDirError
179+
}
180+
181+
fileErr := fms.executeFileActions(ctx, tempDir)
173182
if fileErr != nil {
174183
fms.rollbackManifest = false
175184
return model.RollbackRequired, fileErr
@@ -208,15 +217,16 @@ func (fms *FileManagerService) Rollback(ctx context.Context, instanceID string)
208217

209218
continue
210219
case model.Delete, model.Update:
211-
content := fms.rollbackFileContents[fileAction.File.GetFileMeta().GetName()]
212-
err := fms.fileOperator.Write(ctx, content, fileAction.File.GetFileMeta())
220+
fileMeta := fileAction.File.GetFileMeta()
221+
content := fms.rollbackFileContents[fileMeta.GetName()]
222+
err := fms.fileOperator.Write(ctx, content, fileMeta.GetName(), fileMeta.GetPermissions())
213223
if err != nil {
214224
return err
215225
}
216226

217227
// currentFilesOnDisk needs to be updated after rollback action is performed
218-
fileAction.File.GetFileMeta().Hash = files.GenerateHash(content)
219-
fms.currentFilesOnDisk[fileAction.File.GetFileMeta().GetName()] = fileAction.File
228+
fileMeta.Hash = files.GenerateHash(content)
229+
fms.currentFilesOnDisk[fileMeta.GetName()] = fileAction.File
220230
case model.Unchanged:
221231
fallthrough
222232
default:
@@ -226,8 +236,9 @@ func (fms *FileManagerService) Rollback(ctx context.Context, instanceID string)
226236

227237
if fms.rollbackManifest {
228238
slog.DebugContext(ctx, "Rolling back manifest file", "manifest_previous", fms.previousManifestFiles)
229-
manifestFileErr := fms.fileOperator.WriteManifestFile(ctx, fms.previousManifestFiles,
230-
fms.agentConfig.ManifestDir, fms.manifestFilePath)
239+
manifestFileErr := fms.fileOperator.WriteManifestFile(
240+
ctx, fms.previousManifestFiles, fms.agentConfig.LibDir, fms.manifestFilePath,
241+
)
231242
if manifestFileErr != nil {
232243
return manifestFileErr
233244
}
@@ -443,7 +454,7 @@ func (fms *FileManagerService) UpdateManifestFile(ctx context.Context,
443454
updatedFiles = manifestFiles
444455
}
445456

446-
return fms.fileOperator.WriteManifestFile(ctx, updatedFiles, fms.agentConfig.ManifestDir, fms.manifestFilePath)
457+
return fms.fileOperator.WriteManifestFile(ctx, updatedFiles, fms.agentConfig.LibDir, fms.manifestFilePath)
447458
}
448459

449460
func (fms *FileManagerService) manifestFile() (map[string]*model.ManifestFile, map[string]*mpi.File, error) {
@@ -474,37 +485,103 @@ func (fms *FileManagerService) manifestFile() (map[string]*model.ManifestFile, m
474485
return manifestFiles, fileMap, nil
475486
}
476487

477-
func (fms *FileManagerService) executeFileActions(ctx context.Context) error {
488+
func (fms *FileManagerService) executeFileActions(ctx context.Context, tempDir string) (actionError error) {
489+
// Download files to temporary location
490+
downloadError := fms.downloadUpdatedFilesToTempLocation(ctx, tempDir)
491+
if downloadError != nil {
492+
return downloadError
493+
}
494+
495+
// Remove temp files if there is a failure moving or deleting files
496+
actionError = fms.moveOrDeleteFiles(ctx, tempDir, actionError)
497+
if actionError != nil {
498+
fms.deleteTempFiles(ctx, tempDir)
499+
}
500+
501+
return actionError
502+
}
503+
504+
func (fms *FileManagerService) downloadUpdatedFilesToTempLocation(
505+
ctx context.Context, tempDir string,
506+
) (updateError error) {
507+
for _, fileAction := range fms.fileActions {
508+
if fileAction.Action == model.Add || fileAction.Action == model.Update {
509+
tempFilePath := filepath.Join(tempDir, fileAction.File.GetFileMeta().GetName())
510+
511+
slog.DebugContext(
512+
ctx,
513+
"Downloading file to temp location",
514+
"file", tempFilePath,
515+
)
516+
517+
updateErr := fms.fileUpdate(ctx, fileAction.File, tempFilePath)
518+
if updateErr != nil {
519+
updateError = updateErr
520+
break
521+
}
522+
}
523+
}
524+
525+
// Remove temp files if there is an error downloading any files
526+
if updateError != nil {
527+
fms.deleteTempFiles(ctx, tempDir)
528+
}
529+
530+
return updateError
531+
}
532+
533+
func (fms *FileManagerService) moveOrDeleteFiles(ctx context.Context, tempDir string, actionError error) error {
534+
actionsLoop:
478535
for _, fileAction := range fms.fileActions {
479536
switch fileAction.Action {
480537
case model.Delete:
481-
slog.DebugContext(ctx, "File action, deleting file", "file", fileAction.File.GetFileMeta().GetName())
538+
slog.DebugContext(ctx, "Deleting file", "file", fileAction.File.GetFileMeta().GetName())
482539
if err := os.Remove(fileAction.File.GetFileMeta().GetName()); err != nil && !os.IsNotExist(err) {
483-
return fmt.Errorf("error deleting file: %s error: %w",
540+
actionError = fmt.Errorf("error deleting file: %s error: %w",
484541
fileAction.File.GetFileMeta().GetName(), err)
542+
543+
break actionsLoop
485544
}
486545

487546
continue
488547
case model.Add, model.Update:
489-
slog.DebugContext(ctx, "File action, add or update file", "file", fileAction.File.GetFileMeta().GetName())
490-
updateErr := fms.fileUpdate(ctx, fileAction.File)
491-
if updateErr != nil {
492-
return updateErr
548+
err := fms.fileServiceOperator.MoveFilesFromTempDirectory(ctx, fileAction, tempDir)
549+
if err != nil {
550+
actionError = err
551+
552+
break actionsLoop
493553
}
494554
case model.Unchanged:
495555
slog.DebugContext(ctx, "File unchanged")
496556
}
497557
}
498558

499-
return nil
559+
return actionError
500560
}
501561

502-
func (fms *FileManagerService) fileUpdate(ctx context.Context, file *mpi.File) error {
562+
func (fms *FileManagerService) deleteTempFiles(ctx context.Context, tempDir string) {
563+
for _, fileAction := range fms.fileActions {
564+
if fileAction.Action == model.Add || fileAction.Action == model.Update {
565+
tempFile := path.Join(tempDir, fileAction.File.GetFileMeta().GetName())
566+
if err := os.Remove(tempFile); err != nil && !os.IsNotExist(err) {
567+
slog.ErrorContext(
568+
ctx, "Error deleting temp file",
569+
"file", fileAction.File.GetFileMeta().GetName(),
570+
"error", err,
571+
)
572+
}
573+
}
574+
}
575+
}
576+
577+
func (fms *FileManagerService) fileUpdate(ctx context.Context, file *mpi.File, tempFilePath string) error {
578+
expectedHash := fms.fileActions[file.GetFileMeta().GetName()].File.GetFileMeta().GetHash()
579+
503580
if file.GetFileMeta().GetSize() <= int64(fms.agentConfig.Client.Grpc.MaxFileSize) {
504-
return fms.fileServiceOperator.File(ctx, file, fms.fileActions)
581+
return fms.fileServiceOperator.File(ctx, file, tempFilePath, expectedHash)
505582
}
506583

507-
return fms.fileServiceOperator.ChunkedFile(ctx, file)
584+
return fms.fileServiceOperator.ChunkedFile(ctx, file, tempFilePath, expectedHash)
508585
}
509586

510587
func (fms *FileManagerService) checkAllowedDirectory(checkFiles []*mpi.File) error {
@@ -566,6 +643,21 @@ func (fms *FileManagerService) convertToFile(manifestFile *model.ManifestFile) *
566643
}
567644
}
568645

646+
func (fms *FileManagerService) createTempConfigDirectory(ctx context.Context) (string, error) {
647+
tempDir, tempDirError := os.MkdirTemp(fms.agentConfig.LibDir, "config")
648+
if tempDirError != nil {
649+
return "", fmt.Errorf("failed creating temp config directory: %w", tempDirError)
650+
}
651+
defer func(path string) {
652+
err := os.RemoveAll(path)
653+
if err != nil {
654+
slog.ErrorContext(ctx, "error removing temp config directory", "path", path, "err", err)
655+
}
656+
}(tempDir)
657+
658+
return tempDir, nil
659+
}
660+
569661
// ConvertToMapOfFiles converts a list of files to a map of file caches (file and action) with the file name as the key
570662
func ConvertToMapOfFileCache(convertFiles []*mpi.File) map[string]*model.FileCache {
571663
filesMap := make(map[string]*model.FileCache)

0 commit comments

Comments
 (0)