Skip to content

Commit d7f419c

Browse files
authored
wait for node to report address before removing taint (#158)
* address/assigner: return assigned address * cmd/assignAddress: return assignedAddress * cmd/run: wait for address to be reported before removing taint
1 parent ab70f62 commit d7f419c

File tree

9 files changed

+405
-61
lines changed

9 files changed

+405
-61
lines changed

cmd/main.go

+71-9
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ func prepareLogger(level string, json bool) *logrus.Entry {
8282
return log
8383
}
8484

85-
func assignAddress(c context.Context, log *logrus.Entry, client kubernetes.Interface, assigner address.Assigner, node *types.Node, cfg *config.Config) error {
85+
func assignAddress(c context.Context, log *logrus.Entry, client kubernetes.Interface, assigner address.Assigner, node *types.Node, cfg *config.Config) (string, error) {
8686
ctx, cancel := context.WithCancel(c)
8787
defer cancel()
8888

@@ -101,22 +101,23 @@ func assignAddress(c context.Context, log *logrus.Entry, client kubernetes.Inter
101101
"retry-counter": retryCounter,
102102
"retry-attempts": cfg.RetryAttempts,
103103
}).Debug("assigning static public IP address to node")
104-
err := func(ctx context.Context) error {
104+
assignedAddress, err := func(ctx context.Context) (string, error) {
105105
if err := lock.Lock(ctx); err != nil {
106-
return errors.Wrap(err, "failed to acquire lock")
106+
return "", errors.Wrap(err, "failed to acquire lock")
107107
}
108108
log.Debug("lock acquired")
109109
defer func() {
110110
lock.Unlock(ctx) //nolint:errcheck
111111
log.Debug("lock released")
112112
}()
113-
if err := assigner.Assign(ctx, node.Instance, node.Zone, cfg.Filter, cfg.OrderBy); err != nil {
114-
return err //nolint:wrapcheck
113+
assignedAddress, err := assigner.Assign(ctx, node.Instance, node.Zone, cfg.Filter, cfg.OrderBy)
114+
if err != nil {
115+
return "", err //nolint:wrapcheck
115116
}
116-
return nil
117+
return assignedAddress, nil
117118
}(c)
118119
if err == nil || errors.Is(err, address.ErrStaticIPAlreadyAssigned) {
119-
return nil
120+
return assignedAddress, nil
120121
}
121122

122123
log.WithError(err).WithFields(logrus.Fields{
@@ -130,7 +131,64 @@ func assignAddress(c context.Context, log *logrus.Entry, client kubernetes.Inter
130131
continue
131132
case <-ctx.Done():
132133
// If the context is done, return an error indicating that the operation was cancelled
133-
return errors.Wrap(ctx.Err(), "context cancelled while assigning addresses")
134+
return "", errors.Wrap(ctx.Err(), "context cancelled while assigning addresses")
135+
}
136+
}
137+
return "", errors.New("reached maximum number of retries")
138+
}
139+
140+
func waitForAddressToBeReported(c context.Context, log *logrus.Entry, explorer nd.Explorer, node *types.Node, assignedAddress string, cfg *config.Config) error {
141+
ctx, cancel := context.WithCancel(c)
142+
defer cancel()
143+
144+
// ticker for retry interval
145+
ticker := time.NewTicker(cfg.RetryInterval)
146+
defer ticker.Stop()
147+
148+
for retryCounter := 0; retryCounter <= cfg.RetryAttempts; retryCounter++ {
149+
log.WithFields(logrus.Fields{
150+
"node": node.Name,
151+
"instance": node.Instance,
152+
"address": assignedAddress,
153+
"retry-counter": retryCounter,
154+
"retry-attempts": cfg.RetryAttempts,
155+
}).Debug("Waiting for node to report assigned address")
156+
157+
nodeInfo, err := explorer.GetNode(ctx, node.Name)
158+
if err == nil {
159+
for _, ip := range nodeInfo.ExternalIPs {
160+
if ip.String() == assignedAddress {
161+
log.WithFields(logrus.Fields{
162+
"node": node.Name,
163+
"instance": node.Instance,
164+
"address": assignedAddress,
165+
"retry-counter": retryCounter,
166+
"retry-attempts": cfg.RetryAttempts,
167+
}).Info("Node is reporting assigned address")
168+
return nil
169+
}
170+
}
171+
log.WithError(err).WithFields(logrus.Fields{
172+
"node": node.Name,
173+
"instance": node.Instance,
174+
"address": assignedAddress,
175+
}).Warn("Node is not yet reporting the assigned address")
176+
} else {
177+
log.WithError(err).WithFields(logrus.Fields{
178+
"node": node.Name,
179+
"instance": node.Instance,
180+
"address": assignedAddress,
181+
}).Error("failed to check if node is reporting the assigned address")
182+
}
183+
184+
log.Infof("retrying after %v", cfg.RetryInterval)
185+
186+
select {
187+
case <-ticker.C:
188+
continue
189+
case <-ctx.Done():
190+
// If the context is done, return an error indicating that the operation was cancelled
191+
return errors.Wrap(ctx.Err(), "context cancelled while waiting for node to report assigned address")
134192
}
135193
}
136194
return errors.New("reached maximum number of retries")
@@ -169,12 +227,16 @@ func run(c context.Context, log *logrus.Entry, cfg *config.Config) error {
169227
return errors.Wrap(err, "initializing assigner")
170228
}
171229

172-
err = assignAddress(ctx, log, clientset, assigner, n, cfg)
230+
assignedAddress, err := assignAddress(ctx, log, clientset, assigner, n, cfg)
173231
if err != nil {
174232
return errors.Wrap(err, "assigning static public IP address")
175233
}
176234

177235
if cfg.TaintKey != "" {
236+
if err := waitForAddressToBeReported(ctx, log, explorer, n, assignedAddress, cfg); err != nil {
237+
return errors.Wrap(err, "waiting for node to report assigned address")
238+
}
239+
178240
logger := log.WithField("taint-key", cfg.TaintKey)
179241
tainter := nd.NewTainter(clientset)
180242

0 commit comments

Comments
 (0)