@@ -20,26 +20,24 @@ import (
2020 "github.com/castai/cluster-controller/castai"
2121)
2222
23- var (
24- errPodPresent = errors .New ("pod is still present" )
25- )
26-
2723type drainNodeConfig struct {
28- podsDeleteTimeout time.Duration
29- podDeleteRetries uint64
30- podDeleteRetryDelay time.Duration
31- podEvictRetryDelay time.Duration
24+ podsDeleteTimeout time.Duration
25+ podDeleteRetries uint64
26+ podDeleteRetryDelay time.Duration
27+ podEvictRetryDelay time.Duration
28+ podsTerminationWaitRetryDelay time.Duration
3229}
3330
3431func newDrainNodeHandler (log logrus.FieldLogger , clientset kubernetes.Interface ) ActionHandler {
3532 return & drainNodeHandler {
3633 log : log ,
3734 clientset : clientset ,
3835 cfg : drainNodeConfig {
39- podsDeleteTimeout : 5 * time .Minute ,
40- podDeleteRetries : 5 ,
41- podDeleteRetryDelay : 5 * time .Second ,
42- podEvictRetryDelay : 5 * time .Second ,
36+ podsDeleteTimeout : 5 * time .Minute ,
37+ podDeleteRetries : 5 ,
38+ podDeleteRetryDelay : 5 * time .Second ,
39+ podEvictRetryDelay : 5 * time .Second ,
40+ podsTerminationWaitRetryDelay : 10 * time .Second ,
4341 },
4442 }
4543}
@@ -73,19 +71,10 @@ func (h *drainNodeHandler) Handle(ctx context.Context, data interface{}) error {
7371 return fmt .Errorf ("tainting node %q: %w" , req .NodeName , err )
7472 }
7573
76- allNodePods , err := h .listNodePods (ctx , node )
77- if err != nil {
78- return fmt .Errorf ("listing pods for node %q: %w" , req .NodeName , err )
79- }
80-
81- podsToEvict := lo .Filter (allNodePods .Items , func (pod v1.Pod , _ int ) bool {
82- return ! isDaemonSetPod (& pod ) && ! isStaticPod (& pod )
83- })
84-
8574 // First try to evict pods gracefully.
8675 evictCtx , evictCancel := context .WithTimeout (ctx , time .Duration (req .DrainTimeoutSeconds )* time .Second )
8776 defer evictCancel ()
88- err = h .evictPods (evictCtx , log , podsToEvict )
77+ err = h .evictNodePods (evictCtx , log , node )
8978 if err != nil && ! errors .Is (err , context .DeadlineExceeded ) {
9079 return err
9180 }
@@ -97,7 +86,7 @@ func (h *drainNodeHandler) Handle(ctx context.Context, data interface{}) error {
9786 // If force is set and evict timeout exceeded delete pods.
9887 deleteCtx , deleteCancel := context .WithTimeout (ctx , h .cfg .podsDeleteTimeout )
9988 defer deleteCancel ()
100- if err := h .deletePods (deleteCtx , log , podsToEvict ); err != nil {
89+ if err := h .deleteNodePods (deleteCtx , log , node ); err != nil {
10190 return err
10291 }
10392 }
@@ -107,74 +96,71 @@ func (h *drainNodeHandler) Handle(ctx context.Context, data interface{}) error {
10796 return nil
10897}
10998
110- func (h * drainNodeHandler ) deletePods (ctx context.Context , log logrus.FieldLogger , pods []v1.Pod ) error {
111- log .Infof ("forcefully deleting %d pods" , len (pods ))
112-
113- g , ctx := errgroup .WithContext (ctx )
114- for _ , pod := range pods {
115- pod := pod
116-
117- g .Go (func () error {
118- err := h .deletePod (ctx , pod )
119- if err != nil {
120- return err
121- }
122- return h .waitPodTerminated (ctx , log , pod )
123- })
99+ func (h * drainNodeHandler ) taintNode (ctx context.Context , node * v1.Node ) error {
100+ if node .Spec .Unschedulable {
101+ return nil
124102 }
125103
126- if err := g .Wait (); err != nil {
127- return fmt .Errorf ("deleting pods: %w" , err )
104+ err := patchNode (ctx , h .clientset , node , func (n * v1.Node ) error {
105+ n .Spec .Unschedulable = true
106+ return nil
107+ })
108+ if err != nil {
109+ return fmt .Errorf ("patching node unschedulable: %w" , err )
128110 }
129-
130111 return nil
131112}
132113
133- func (h * drainNodeHandler ) deletePod (ctx context.Context , pod v1.Pod ) error {
134- b := backoff .WithContext (backoff .WithMaxRetries (backoff .NewConstantBackOff (h .cfg .podDeleteRetryDelay ), h .cfg .podDeleteRetries ), ctx ) // nolint:gomnd
135- action := func () error {
136- err := h .clientset .CoreV1 ().Pods (pod .Namespace ).Delete (ctx , pod .Name , metav1.DeleteOptions {})
137- if err != nil {
138- // Pod is not found - ignore.
139- if apierrors .IsNotFound (err ) {
140- return nil
141- }
114+ func (h * drainNodeHandler ) evictNodePods (ctx context.Context , log logrus.FieldLogger , node * v1.Node ) error {
115+ pods , err := h .listNodePodsToEvict (ctx , node )
116+ if err != nil {
117+ return err
118+ }
142119
143- // Pod is misconfigured - stop retry.
144- if apierrors .IsInternalError (err ) {
145- return backoff .Permanent (err )
146- }
147- }
120+ log .Infof ("evicting %d pods" , len (pods ))
148121
149- // Other errors - retry.
122+ if err := h .sendPodsRequests (ctx , pods , h .evictPod ); err != nil {
123+ return fmt .Errorf ("sending evict pods requests: %w" , err )
124+ }
125+
126+ return h .waitNodePodsTerminated (ctx , node )
127+ }
128+
129+ func (h * drainNodeHandler ) deleteNodePods (ctx context.Context , log logrus.FieldLogger , node * v1.Node ) error {
130+ pods , err := h .listNodePodsToEvict (ctx , node )
131+ if err != nil {
150132 return err
151133 }
152- if err := backoff .Retry (action , b ); err != nil {
153- return fmt .Errorf ("deleting pod %s in namespace %s: %w" , pod .Name , pod .Namespace , err )
134+
135+ log .Infof ("forcefully deleting %d pods" , len (pods ))
136+
137+ if err := h .sendPodsRequests (ctx , pods , h .deletePod ); err != nil {
138+ return fmt .Errorf ("sending delete pods requests: %w" , err )
154139 }
155- return nil
140+
141+ return h .waitNodePodsTerminated (ctx , node )
156142}
157143
158- // taintNode to make it unshedulable.
159- func (h * drainNodeHandler ) taintNode (ctx context.Context , node * v1.Node ) error {
160- if node .Spec .Unschedulable {
161- return nil
162- }
144+ func (h * drainNodeHandler ) sendPodsRequests (ctx context.Context , pods []v1.Pod , f func (context.Context , v1.Pod ) error ) error {
145+ const batchSize = 5
163146
164- err := patchNode (ctx , h .clientset , node , func (n * v1.Node ) error {
165- n .Spec .Unschedulable = true
166- return nil
167- })
168- if err != nil {
169- return fmt .Errorf ("patching node unschedulable: %w" , err )
147+ for _ , batch := range lo .Chunk (pods , batchSize ) {
148+ g , ctx := errgroup .WithContext (ctx )
149+ for _ , pod := range batch {
150+ pod := pod
151+ g .Go (func () error { return f (ctx , pod ) })
152+ }
153+ if err := g .Wait (); err != nil {
154+ return err
155+ }
170156 }
157+
171158 return nil
172159}
173160
174- // listNodePods returns a list of all pods scheduled on the provided node.
175- func (h * drainNodeHandler ) listNodePods (ctx context.Context , node * v1.Node ) (* v1.PodList , error ) {
161+ func (h * drainNodeHandler ) listNodePodsToEvict (ctx context.Context , node * v1.Node ) ([]v1.Pod , error ) {
176162 var pods * v1.PodList
177- err := backoff .Retry (func () error {
163+ if err := backoff .Retry (func () error {
178164 p , err := h .clientset .CoreV1 ().Pods (metav1 .NamespaceAll ).List (ctx , metav1.ListOptions {
179165 FieldSelector : fields .SelectorFromSet (fields.Set {"spec.nodeName" : node .Name }).String (),
180166 })
@@ -183,58 +169,28 @@ func (h *drainNodeHandler) listNodePods(ctx context.Context, node *v1.Node) (*v1
183169 }
184170 pods = p
185171 return nil
186- }, defaultBackoff (ctx ))
187- return pods , err
188- }
189-
190- func (h * drainNodeHandler ) evictPods (ctx context.Context , log logrus.FieldLogger , pods []v1.Pod ) error {
191- log .Infof ("evicting %d pods" , len (pods ))
192-
193- g , ctx := errgroup .WithContext (ctx )
194- for _ , pod := range pods {
195- pod := pod
196-
197- g .Go (func () error {
198- err := h .evictPod (ctx , pod )
199- if err != nil {
200- return err
201- }
202- return h .waitPodTerminated (ctx , log , pod )
203- })
172+ }, defaultBackoff (ctx )); err != nil {
173+ return nil , fmt .Errorf ("listing node %v pods: %w" , node .Name , err )
204174 }
205175
206- if err := g . Wait (); err != nil {
207- return fmt . Errorf ( "evicting pods: %w" , err )
208- }
176+ podsToEvict := lo . Filter ( pods . Items , func ( pod v1. Pod , _ int ) bool {
177+ return ! isDaemonSetPod ( & pod ) && ! isStaticPod ( & pod )
178+ })
209179
210- return nil
180+ return podsToEvict , nil
211181}
212182
213- func (h * drainNodeHandler ) waitPodTerminated (ctx context.Context , log logrus.FieldLogger , pod v1.Pod ) error {
214- b := backoff .WithContext (backoff .NewConstantBackOff (5 * time .Second ), ctx ) // nolint:gomnd
215-
216- err := backoff .Retry (func () error {
217- p , err := h .clientset .CoreV1 ().Pods (pod .Namespace ).Get (ctx , pod .Name , metav1.GetOptions {})
218- if err != nil && apierrors .IsNotFound (err ) {
219- return nil
220- }
183+ func (h * drainNodeHandler ) waitNodePodsTerminated (ctx context.Context , node * v1.Node ) error {
184+ return backoff .Retry (func () error {
185+ pods , err := h .listNodePodsToEvict (ctx , node )
221186 if err != nil {
222- return err
187+ return fmt . Errorf ( "waiting for node %q pods to be terminated: %w" , node . Name , err )
223188 }
224- // replicaSets will recreate pods with equal name and namespace, therefore we compare UIDs
225- if p .GetUID () == pod .GetUID () {
226- return errPodPresent
189+ if len (pods ) > 0 {
190+ return fmt .Errorf ("waiting for %d pods to be terminated on node %v" , len (pods ), node .Name )
227191 }
228192 return nil
229- }, b )
230- if err != nil && errors .Is (err , errPodPresent ) {
231- log .Infof ("timeout waiting for pod %s in namespace %s to terminate" , pod .Name , pod .Namespace )
232- return nil
233- }
234- if err != nil {
235- return fmt .Errorf ("waiting for pod %s in namespace %s termination: %w" , pod .Name , pod .Namespace , err )
236- }
237- return nil
193+ }, backoff .WithContext (backoff .NewConstantBackOff (h .cfg .podsTerminationWaitRetryDelay ), ctx ))
238194}
239195
240196// evictPod from the k8s node. Error handling is based on eviction api documentation:
@@ -274,6 +230,31 @@ func (h *drainNodeHandler) evictPod(ctx context.Context, pod v1.Pod) error {
274230 return nil
275231}
276232
233+ func (h * drainNodeHandler ) deletePod (ctx context.Context , pod v1.Pod ) error {
234+ b := backoff .WithContext (backoff .WithMaxRetries (backoff .NewConstantBackOff (h .cfg .podDeleteRetryDelay ), h .cfg .podDeleteRetries ), ctx ) // nolint:gomnd
235+ action := func () error {
236+ err := h .clientset .CoreV1 ().Pods (pod .Namespace ).Delete (ctx , pod .Name , metav1.DeleteOptions {})
237+ if err != nil {
238+ // Pod is not found - ignore.
239+ if apierrors .IsNotFound (err ) {
240+ return nil
241+ }
242+
243+ // Pod is misconfigured - stop retry.
244+ if apierrors .IsInternalError (err ) {
245+ return backoff .Permanent (err )
246+ }
247+ }
248+
249+ // Other errors - retry.
250+ return err
251+ }
252+ if err := backoff .Retry (action , b ); err != nil {
253+ return fmt .Errorf ("deleting pod %s in namespace %s: %w" , pod .Name , pod .Namespace , err )
254+ }
255+ return nil
256+ }
257+
277258func isDaemonSetPod (p * v1.Pod ) bool {
278259 return isControlledBy (p , "DaemonSet" )
279260}
0 commit comments