@@ -2,13 +2,13 @@ package actions
22
33import (
44 "context"
5- "errors"
65 "fmt"
76 "reflect"
87 "time"
98
109 "github.com/sirupsen/logrus"
1110 corev1 "k8s.io/api/core/v1"
11+ apierrors "k8s.io/apimachinery/pkg/api/errors"
1212 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1313 "k8s.io/client-go/kubernetes"
1414
@@ -39,7 +39,6 @@ func (h *CheckNodeStatusHandler) Handle(ctx context.Context, action *castai.Clus
3939 log := h .log .WithFields (logrus.Fields {
4040 "node_name" : req .NodeName ,
4141 "node_id" : req .NodeID ,
42- "provider_id" : req .ProviderId ,
4342 "node_status" : req .NodeStatus ,
4443 "type" : reflect .TypeOf (action .Data ().(* castai.ActionCheckNodeStatus )).String (),
4544 ActionIDLogField : action .ID ,
@@ -72,28 +71,42 @@ func (h *CheckNodeStatusHandler) checkNodeDeleted(ctx context.Context, log *logr
7271 b ,
7372 waitext .Forever ,
7473 func (ctx context.Context ) (bool , error ) {
75- n , err := getNodeByIDs ( ctx , h .clientset , req .NodeName , req . NodeID , req . ProviderId )
76- if n != nil {
77- return false , errNodeNotDeleted
74+ n , err := h .clientset . CoreV1 (). Nodes (). Get ( ctx , req .NodeName , metav1. GetOptions {} )
75+ if apierrors . IsNotFound ( err ) {
76+ return false , nil
7877 }
7978
80- if errors .Is (err , errNodeNotValid ) {
81- log .WithFields (map [string ]interface {}{
82- "node" : req .NodeName ,
83- "node_id" : req .NodeID ,
84- "provider_id" : req .ProviderId ,
85- }).Warnf ("node is not valid" )
86- return false , errNodeNotValid
87- }
79+ // If node is nil - deleted
80+ // If label is present and doesn't match - node was reused - deleted
81+ // If label is present and matches - node is not deleted
82+ // If label is not present and node is not nil - node is not deleted (potentially corrupted state).
8883
89- if errors . Is ( err , errNodeNotFound ) {
84+ if n == nil {
9085 return false , nil
9186 }
9287
88+ currentNodeID , ok := n .Labels [castai .LabelNodeID ]
89+ if ! ok {
90+ log .Info ("node doesn't have castai node id label" )
91+ }
92+ if currentNodeID != "" {
93+ if currentNodeID != req .NodeID {
94+ log .Info ("node name was reused. Original node is deleted" )
95+ return false , nil
96+ }
97+ if currentNodeID == req .NodeID {
98+ return false , fmt .Errorf ("current node id is equal to requested node id: %v %w" , req .NodeID , errNodeNotDeleted )
99+ }
100+ }
101+
102+ if n != nil {
103+ return false , errNodeNotDeleted
104+ }
105+
93106 return true , err
94107 },
95108 func (err error ) {
96- log .Warnf ("check node %s status failed, will retry: %v" , req .NodeName , err )
109+ h . log .Warnf ("check node %s status failed, will retry: %v" , req .NodeName , err )
97110 },
98111 )
99112}
@@ -115,7 +128,7 @@ func (h *CheckNodeStatusHandler) checkNodeReady(ctx context.Context, _ *logrus.E
115128 defer watch .Stop ()
116129 for r := range watch .ResultChan () {
117130 if node , ok := r .Object .(* corev1.Node ); ok {
118- if isNodeReady (node , req .NodeID , req . ProviderId ) {
131+ if isNodeReady (node , req .NodeID ) {
119132 return nil
120133 }
121134 }
@@ -124,11 +137,13 @@ func (h *CheckNodeStatusHandler) checkNodeReady(ctx context.Context, _ *logrus.E
124137 return fmt .Errorf ("timeout waiting for node %s to become ready" , req .NodeName )
125138}
126139
127- func isNodeReady (node * corev1.Node , castNodeID , providerID string ) bool {
140+ func isNodeReady (node * corev1.Node , castNodeID string ) bool {
128141 // if node has castai node id label, check if it matches the one we are waiting for
129142 // if it doesn't match, we can skip this node.
130- if err := isNodeIDProviderIDValid (node , castNodeID , providerID ); err != nil {
131- return false
143+ if val , ok := node .Labels [castai .LabelNodeID ]; ok {
144+ if val != "" && val != castNodeID {
145+ return false
146+ }
132147 }
133148 for _ , cond := range node .Status .Conditions {
134149 if cond .Type == corev1 .NodeReady && cond .Status == corev1 .ConditionTrue && ! containsUninitializedNodeTaint (node .Spec .Taints ) {
0 commit comments