Skip to content

Commit 9fb9e89

Browse files
priyansh17blackpiglet
authored andcommitted
fix: improve error handling for discarded hooks in WaitExecHookHandler
Signed-off-by: Priyansh Choudhary <im1706@gmail.com>
1 parent 038d780 commit 9fb9e89

3 files changed

Lines changed: 27 additions & 6 deletions

File tree

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fix multi hook tracker to discard hooks when not executable

internal/hook/wait_exec_hook_handler.go

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -82,17 +82,38 @@ func (e *DefaultWaitExecHookHandler) HandleHooks(
8282
return nil
8383
}
8484

85+
var errors []error
8586
// If hooks are defined for a container that does not exist in the pod log a warning and discard
8687
// those hooks to avoid waiting for a container that will never become ready. After that if
8788
// there are no hooks left to be executed return immediately.
88-
for containerName := range byContainer {
89+
// Discarded hooks must be recorded as failed in the hook tracker, otherwise the tracker
90+
// will never reach completion and the restore finalizer will poll forever waiting on hooks
91+
// that can never run.
92+
for containerName, hooks := range byContainer {
8993
if !podHasContainer(pod, containerName) {
90-
log.Warningf("Pod %s does not have container %s: discarding post-restore exec hooks", kube.NamespaceAndName(pod), containerName)
94+
discardErr := fmt.Errorf("pod %s does not have container %s: discarding post-restore exec hooks", kube.NamespaceAndName(pod), containerName)
95+
log.Warning(discardErr)
96+
for i, hook := range hooks {
97+
hookLog := log.WithFields(
98+
logrus.Fields{
99+
"hookSource": hook.HookSource,
100+
"hookType": "exec",
101+
"hookPhase": "post",
102+
"hookName": hook.HookName,
103+
"container": hook.Hook.Container,
104+
},
105+
)
106+
errTracker := multiHookTracker.Record(restoreName, pod.Namespace, pod.Name, hook.Hook.Container, hook.HookSource, hook.HookName, HookPhase(""), i, true, discardErr)
107+
if errTracker != nil {
108+
hookLog.WithError(errTracker).Warn("Error recording the discarded hook in hook tracker")
109+
}
110+
}
111+
errors = append(errors, discardErr)
91112
delete(byContainer, containerName)
92113
}
93114
}
94115
if len(byContainer) == 0 {
95-
return nil
116+
return errors
96117
}
97118

98119
// Every hook in every container can have its own wait timeout. Rather than setting up separate
@@ -108,8 +129,6 @@ func (e *DefaultWaitExecHookHandler) HandleHooks(
108129
}
109130
waitStart := time.Now()
110131

111-
var errors []error
112-
113132
// The first time this handler is called after a container starts running it will execute all
114133
// pending hooks for that container. Subsequent invocations of this handler will never execute
115134
// hooks in that container. It uses the byContainer map to keep track of which containers have

pkg/controller/restore_finalizer_controller.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -561,7 +561,8 @@ func (ctx *finalizerContext) WaitRestoreExecHook() (errs results.Result) {
561561

562562
// wait for restore exec hooks to finish
563563
err := wait.PollUntilContextCancel(context.Background(), 1*time.Second, true, func(context.Context) (bool, error) {
564-
log.Debug("Checking the progress of hooks execution")
564+
attempted, failed := ctx.multiHookTracker.Stat(ctx.restore.Name)
565+
log.Debugf("Checking the progress of hooks execution: attempted=%d, failed=%d", attempted, failed)
565566
if ctx.multiHookTracker.IsComplete(ctx.restore.Name) {
566567
return true, nil
567568
}

0 commit comments

Comments
 (0)