@@ -19,6 +19,9 @@ import (
1919 "github.com/castai/cluster-controller/internal/waitext"
2020)
2121
22+ // gcCompletedActionAfterTimes specifies after how many GCs to remove the completed action from the store.
23+ const gcCompletedActionAfterTimes = 2
24+
2225type Config struct {
2326 PollWaitInterval time.Duration // How long to wait unit next long polling request.
2427 PollTimeout time.Duration // hard timeout. Normally server should return empty result before this timeout.
@@ -45,7 +48,7 @@ func NewService(
4548 k8sVersion : k8sVersion ,
4649 castAIClient : castaiClient ,
4750 startedActions : make (map [string ]struct {}),
48- completedActions : make (map [string ]time. Time ),
51+ completedActions : make (map [string ]int8 ),
4952 actionHandlers : actionHandlers ,
5053 healthCheck : healthCheck ,
5154 }
@@ -61,10 +64,11 @@ type Controller struct {
6164 actionHandlers actions.ActionHandlers
6265
6366 startedActionsWg sync.WaitGroup
64- startedActions map [string ]struct {}
65- completedActions map [string ]time.Time
66- startedActionsMu sync.Mutex
67- healthCheck * health.HealthzProvider
67+ actionsMu sync.Mutex
68+ startedActions map [string ]struct {} // protected by actionsMu
69+ completedActions map [string ]int8 // protected by actionsMu
70+
71+ healthCheck * health.HealthzProvider
6872}
6973
7074func (s * Controller ) Run (ctx context.Context ) {
@@ -170,21 +174,21 @@ func (s *Controller) handleActions(ctx context.Context, clusterActions []*castai
170174}
171175
172176func (s * Controller ) finishProcessing (actionID string , ackErr error ) {
173- s .startedActionsMu .Lock ()
174- defer s .startedActionsMu .Unlock ()
177+ s .actionsMu .Lock ()
178+ defer s .actionsMu .Unlock ()
175179
176180 s .startedActionsWg .Done ()
177181 delete (s .startedActions , actionID )
178182
179183 if ackErr == nil {
180- // only mark the action as completed if it was succesfully acknowledged so it can be retried quickly if not and still requested.
181- s .completedActions [actionID ] = time . Now ()
184+ // only mark the action as completed if it was successfully acknowledged so it can be retried quickly if not and still requested.
185+ s .completedActions [actionID ] = gcCompletedActionAfterTimes + 1
182186 }
183187}
184188
185189func (s * Controller ) startProcessing (actionID string ) bool {
186- s .startedActionsMu .Lock ()
187- defer s .startedActionsMu .Unlock ()
190+ s .actionsMu .Lock ()
191+ defer s .actionsMu .Unlock ()
188192
189193 if _ , ok := s .startedActions [actionID ]; ok {
190194 return false
@@ -261,17 +265,16 @@ func (s *Controller) ackAction(ctx context.Context, action *castai.ClusterAction
261265}
262266
263267func (s * Controller ) gcCompletedActions () {
264- expireDuration := (s .cfg .PollTimeout + s .cfg .PollWaitInterval ) * 2
265- now := time .Now ()
266-
267- s .startedActionsMu .Lock ()
268- defer s .startedActionsMu .Unlock ()
268+ s .actionsMu .Lock ()
269+ defer s .actionsMu .Unlock ()
269270
270- for actionID , completedAt := range s .completedActions {
271- if now .Before (completedAt .Add (expireDuration )) {
271+ for actionID , timesVisited := range s .completedActions {
272+ timesVisited --
273+ if timesVisited <= 0 {
274+ delete (s .completedActions , actionID )
272275 continue
273276 }
274- delete ( s .completedActions , actionID )
277+ s .completedActions [ actionID ] = timesVisited
275278 }
276279}
277280
0 commit comments