Skip to content

Commit fa5b3a0

Browse files
Add exponential backoff retry for default PtpOperatorConfig creation
The operator could fail to create the default PtpOperatorConfig on startup due to a race condition: the local webhook server is ready but the Kubernetes Service endpoints are not yet populated, causing the API server's validating webhook call to fail with "no endpoints available". Without retry, the default config is never created and the linuxptp-daemon DaemonSet is never spawned. Wrap createDefaultOperatorConfig in wait.ExponentialBackoff (10 steps, 1s-30s cap) so transient webhook connectivity failures are retried gracefully. Use goroutine-local err variables to avoid a data race with mgr.Start, reuse the existing restConfig rather than calling ctrl.GetConfigOrDie() on each attempt, and check ctx.Done() so the retry loop terminates cleanly on manager shutdown. Generated-by: Cursor
1 parent 158f56c commit fa5b3a0

File tree

1 file changed

+24
-8
lines changed

1 file changed

+24
-8
lines changed

main.go

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ import (
4242
"k8s.io/apimachinery/pkg/runtime"
4343
"k8s.io/apimachinery/pkg/types"
4444
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
45+
"k8s.io/apimachinery/pkg/util/wait"
4546
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
4647
ctrl "sigs.k8s.io/controller-runtime"
4748
"sigs.k8s.io/controller-runtime/pkg/cache"
@@ -246,17 +247,32 @@ func main() {
246247
}
247248

248249
go func() {
249-
// Wait until the webhook server is ready.
250250
setupLog.Info("waiting for validating webhook to be ready")
251-
err = waitForWebhookServer(checker)
252-
if err != nil {
251+
if err := waitForWebhookServer(checker); err != nil {
253252
setupLog.Error(err, "unable to create default PtpOperatorConfig due to webhook not ready")
254-
} else {
255-
// create default before the webhook are setup
256-
err = createDefaultOperatorConfig(ctrl.GetConfigOrDie())
257-
if err != nil {
258-
setupLog.Error(err, "unable to create default PtpOperatorConfig")
253+
return
254+
}
255+
256+
backoff := wait.Backoff{
257+
Steps: 10,
258+
Duration: 1 * time.Second,
259+
Factor: 2.0,
260+
Cap: 30 * time.Second,
261+
}
262+
263+
if err := wait.ExponentialBackoff(backoff, func() (bool, error) {
264+
select {
265+
case <-ctx.Done():
266+
return false, ctx.Err()
267+
default:
268+
}
269+
if err := createDefaultOperatorConfig(restConfig); err != nil {
270+
setupLog.Info("retrying default PtpOperatorConfig creation", "error", err)
271+
return false, nil
259272
}
273+
return true, nil
274+
}); err != nil {
275+
setupLog.Error(err, "unable to create default PtpOperatorConfig after retries")
260276
}
261277
}()
262278
setupLog.Info("starting manager")

0 commit comments

Comments
 (0)