6
6
"github.com/buildkite/agent-stack-k8s/v2/internal/controller/config"
7
7
8
8
"go.uber.org/zap"
9
+
9
10
v1 "k8s.io/api/core/v1"
10
11
kerrors "k8s.io/apimachinery/pkg/api/errors"
11
12
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -16,9 +17,18 @@ import (
16
17
"k8s.io/utils/ptr"
17
18
)
18
19
20
+ const defaultTermGracePeriodSeconds = 60
21
+
19
22
type completionsWatcher struct {
20
23
logger * zap.Logger
21
24
k8s kubernetes.Interface
25
+
26
+ // This is the context passed to RegisterInformer.
27
+ // It's being stored here (grrrr!) because the k8s ResourceEventHandler
28
+ // interface doesn't have context args. (Working around an interface in a
29
+ // library outside of our control is a carve-out from the usual rule.)
30
+ // The context is needed to ensure goroutines are cleaned up.
31
+ resourceEventHandlerCtx context.Context
22
32
}
23
33
24
34
func NewPodCompletionWatcher (logger * zap.Logger , k8s kubernetes.Interface ) * completionsWatcher {
@@ -30,14 +40,12 @@ func NewPodCompletionWatcher(logger *zap.Logger, k8s kubernetes.Interface) *comp
30
40
}
31
41
32
42
// Creates a Pods informer and registers the handler on it
33
- func (w * completionsWatcher ) RegisterInformer (
34
- ctx context.Context ,
35
- factory informers.SharedInformerFactory ,
36
- ) error {
43
+ func (w * completionsWatcher ) RegisterInformer (ctx context.Context , factory informers.SharedInformerFactory ) error {
37
44
informer := factory .Core ().V1 ().Pods ().Informer ()
38
45
if _ , err := informer .AddEventHandler (w ); err != nil {
39
46
return err
40
47
}
48
+ w .resourceEventHandlerCtx = ctx // see note on field
41
49
go factory .Start (ctx .Done ())
42
50
return nil
43
51
}
@@ -49,7 +57,7 @@ func (w *completionsWatcher) OnDelete(obj any) {}
49
57
func (w * completionsWatcher ) OnAdd (obj any , isInInitialList bool ) {
50
58
completionWatcherOnAddEventCounter .Inc ()
51
59
pod := obj .(* v1.Pod )
52
- w .cleanupSidecars (pod )
60
+ w .cleanupSidecars (w . resourceEventHandlerCtx , pod )
53
61
}
54
62
55
63
func (w * completionsWatcher ) OnUpdate (old any , new any ) {
@@ -62,15 +70,15 @@ func (w *completionsWatcher) OnUpdate(old any, new any) {
62
70
}
63
71
64
72
newPod := new .(* v1.Pod )
65
- w .cleanupSidecars (newPod )
73
+ w .cleanupSidecars (w . resourceEventHandlerCtx , newPod )
66
74
}
67
75
68
76
// cleanupSidecars first checks if the container status of the agent container
69
77
// in the pod is Terminated. If so, it ensures the job is cleaned up by updating
70
78
// it with an ActiveDeadlineSeconds value (defaultTermGracePeriodSeconds).
71
79
// (So this is not actually sidecar-specific, but is needed because sidecars
72
80
// would otherwise cause the pod to continue running.)
73
- func (w * completionsWatcher ) cleanupSidecars (pod * v1.Pod ) {
81
+ func (w * completionsWatcher ) cleanupSidecars (ctx context. Context , pod * v1.Pod ) {
74
82
terminated := getTermination (pod )
75
83
if terminated == nil {
76
84
return
@@ -82,7 +90,6 @@ func (w *completionsWatcher) cleanupSidecars(pod *v1.Pod) {
82
90
)
83
91
84
92
if err := retry .RetryOnConflict (retry .DefaultRetry , func () error {
85
- ctx := context .TODO ()
86
93
job , err := w .k8s .BatchV1 ().Jobs (pod .Namespace ).Get (ctx , pod .Labels ["job-name" ], metav1.GetOptions {})
87
94
if err != nil {
88
95
return err
0 commit comments