From d761c4eb9fd09d73a2215ca30c125bebdc245efd Mon Sep 17 00:00:00 2001 From: omgrr Date: Fri, 12 Jan 2024 15:56:10 +0000 Subject: [PATCH 1/2] Reimplement retry-interval when draining nodes --- go.mod | 2 ++ go.sum | 4 ++++ pkg/service/nodes.go | 44 ++++++++++++++++++++++++--------------- pkg/service/nodes_test.go | 2 +- 4 files changed, 34 insertions(+), 18 deletions(-) diff --git a/go.mod b/go.mod index 2e0252ad..c78c5573 100644 --- a/go.mod +++ b/go.mod @@ -3,6 +3,7 @@ module github.com/keikoproj/lifecycle-manager go 1.19 require ( + github.com/avast/retry-go/v4 v4.5.1 github.com/aws/aws-sdk-go v1.48.11 github.com/keikoproj/aws-sdk-go-cache v0.0.2 github.com/keikoproj/inverse-exp-backoff v0.0.4 @@ -22,6 +23,7 @@ require ( github.com/MakeNowJust/heredoc v1.0.0 // indirect github.com/PuerkitoBio/purell v1.1.1 // indirect github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 // indirect + github.com/avast/retry-go v3.0.0+incompatible // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/cespare/xxhash/v2 v2.2.0 // indirect github.com/chai2010/gettext-go v1.0.2 // indirect diff --git a/go.sum b/go.sum index 4beb79f9..4f09444b 100644 --- a/go.sum +++ b/go.sum @@ -9,6 +9,10 @@ github.com/PuerkitoBio/purell v1.1.1/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbt github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 h1:d+Bc7a5rLufV/sSk/8dngufqelfh6jnri85riMAaF/M= github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE= github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio= +github.com/avast/retry-go v3.0.0+incompatible h1:4SOWQ7Qs+oroOTQOYnAHqelpCO0biHSxpiH9JdtuBj0= +github.com/avast/retry-go v3.0.0+incompatible/go.mod h1:XtSnn+n/sHqQIpZ10K1qAevBhOOCWBLXXy3hyiqqBrY= +github.com/avast/retry-go/v4 v4.5.1 h1:AxIx0HGi4VZ3I02jr78j5lZ3M6x1E0Ivxa6b0pUUh7o= +github.com/avast/retry-go/v4 v4.5.1/go.mod h1:/sipNsvNB3RRuT5iNcb6h73nw3IBmXJ/H3XrCQYSOpc= github.com/aws/aws-sdk-go v1.48.11 h1:9YbiSbaF/jWi+qLRl+J5dEhr2mcbDYHmKg2V7RBcD5M= github.com/aws/aws-sdk-go v1.48.11/go.mod h1:LF8svs817+Nz+DmiMQKTO3ubZ/6IaTpq3TjupRn3Eqk= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= diff --git a/pkg/service/nodes.go b/pkg/service/nodes.go index 17a4ad6a..13492c73 100644 --- a/pkg/service/nodes.go +++ b/pkg/service/nodes.go @@ -8,13 +8,13 @@ import ( "strings" "time" + retry "github.com/avast/retry-go" + "github.com/keikoproj/lifecycle-manager/pkg/log" v1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - drain "k8s.io/kubectl/pkg/drain" - - "github.com/keikoproj/lifecycle-manager/pkg/log" "k8s.io/client-go/kubernetes" + drain "k8s.io/kubectl/pkg/drain" ) func getNodeByInstance(k kubernetes.Interface, instanceID string) (v1.Node, bool) { @@ -76,20 +76,30 @@ func drainNode(kubeClient kubernetes.Interface, node *v1.Node, timeout, retryInt return nil } - for retryAttempts > 0 { - // create a copy of the node obj, since RunCordonOrUncordon() modifies the node obj - nodeCopy := node.DeepCopy() - err = drainNodeUtil(nodeCopy, int(timeout), kubeClient) - if err == nil { - log.Infof("drain succeeded, node %v", node.Name) - return nil - } - log.Errorf("failed to drain node %v, error: %v", node.Name, err) - retryAttempts -= 1 - if retryAttempts > 0 { - log.Infof("retrying drain, node %v", node.Name) - } - } + err = retry.Do( + func() error { + // create a copy of the node obj, since RunCordonOrUncordon() modifies the node obj + nodeCopy := node.DeepCopy() + err = drainNodeUtil(nodeCopy, int(timeout), kubeClient) + if err != nil { + log.Errorf("failed to drain node %v, error: %v", node.Name, err) + return err + } else { + log.Infof("drain succeeded, node %v", node.Name) + return nil + } + }, + retry.RetryIf(func(err error) bool { + if err != nil { + log.Infof("retrying drain, node %v", node.Name) + return true + } + + return false + }), + retry.Attempts(retryAttempts), + retry.Delay(time.Duration(retryInterval)*time.Second), + ) return err } diff --git a/pkg/service/nodes_test.go b/pkg/service/nodes_test.go index 047110d2..240d335f 100644 --- a/pkg/service/nodes_test.go +++ b/pkg/service/nodes_test.go @@ -192,7 +192,7 @@ func Test_DrainNodeNegative(t *testing.T) { }, } - err := drainNode(kubeClient, unjoinedNode, 10, 30, 3) + err := drainNode(kubeClient, unjoinedNode, 10, 0, 3) if err == nil { t.Fatalf("drainNode: expected error to have occured, %v", err) } From 89f486035b3293d7024096c39d57e63f16433dc8 Mon Sep 17 00:00:00 2001 From: omgrr Date: Fri, 12 Jan 2024 19:00:44 +0000 Subject: [PATCH 2/2] Fix reodering of packages Co-authored-by: Jerome Wisniewski --- pkg/service/nodes.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pkg/service/nodes.go b/pkg/service/nodes.go index 13492c73..1d8434ea 100644 --- a/pkg/service/nodes.go +++ b/pkg/service/nodes.go @@ -9,12 +9,13 @@ import ( "time" retry "github.com/avast/retry-go" - "github.com/keikoproj/lifecycle-manager/pkg/log" v1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/client-go/kubernetes" drain "k8s.io/kubectl/pkg/drain" + + "github.com/keikoproj/lifecycle-manager/pkg/log" + "k8s.io/client-go/kubernetes" ) func getNodeByInstance(k kubernetes.Interface, instanceID string) (v1.Node, bool) {