From 075628d10238704bafe1ab90959aa0976968cd75 Mon Sep 17 00:00:00 2001 From: Matt Wise Date: Sun, 29 Mar 2026 11:47:25 -0700 Subject: [PATCH] feat(config): expose leader election timing parameters Allow configuring leaseDuration, renewDeadline, and retryPeriod via CLI flags and Helm values. Defaults remain unchanged (15s/10s/2s). Validates that renewDeadline < leaseDuration and retryPeriod < renewDeadline at startup. Refs #21 Co-Authored-By: Claude Opus 4.6 --- .../templates/deployment.yaml | 9 ++++++ charts/vigil-controller/values.yaml | 9 ++++++ cmd/main.go | 29 +++++++++++++++---- 3 files changed, 42 insertions(+), 5 deletions(-) diff --git a/charts/vigil-controller/templates/deployment.yaml b/charts/vigil-controller/templates/deployment.yaml index d6c63de..444f44c 100644 --- a/charts/vigil-controller/templates/deployment.yaml +++ b/charts/vigil-controller/templates/deployment.yaml @@ -46,6 +46,15 @@ spec: {{- end }} {{- if .Values.controllerManager.leaderElection.enabled }} - --leader-elect + {{- with .Values.controllerManager.leaderElection.leaseDuration }} + - --leader-election-lease-duration={{ . }} + {{- end }} + {{- with .Values.controllerManager.leaderElection.renewDeadline }} + - --leader-election-renew-deadline={{ . }} + {{- end }} + {{- with .Values.controllerManager.leaderElection.retryPeriod }} + - --leader-election-retry-period={{ . }} + {{- end }} {{- end }} - --health-probe-bind-address={{ .Values.controllerManager.healthProbeBindAddress }} - --metrics-bind-address={{ .Values.controllerManager.metricsBindAddress }} diff --git a/charts/vigil-controller/values.yaml b/charts/vigil-controller/values.yaml index b78faa4..acbc3db 100644 --- a/charts/vigil-controller/values.yaml +++ b/charts/vigil-controller/values.yaml @@ -85,6 +85,15 @@ controllerManager: leaderElection: # -- Enable leader election enabled: true + # -- Duration that non-leader candidates will wait to force acquire leadership. + # Shorter values mean faster failover but more API server load. + leaseDuration: "15s" + # -- Duration the acting leader will retry refreshing leadership before giving up. + # Must be less than leaseDuration. + renewDeadline: "10s" + # -- Duration between leader election retry attempts. + # Must be less than renewDeadline. + retryPeriod: "2s" # -- Log level (debug, info, warn, error) logLevel: "info" # -- Health probe bind address diff --git a/cmd/main.go b/cmd/main.go index 64248d0..46774b3 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -38,6 +38,9 @@ func main() { var metricsAddr string var probeAddr string var enableLeaderElection bool + var leaseDuration time.Duration + var renewDeadline time.Duration + var retryPeriod time.Duration flag.StringVar(&configFile, "config", "/etc/vigil/config/config.yaml", "Path to the controller configuration file.") @@ -47,6 +50,12 @@ func main() { "The address the probe endpoint binds to.") flag.BoolVar(&enableLeaderElection, "leader-elect", false, "Enable leader election for controller manager.") + flag.DurationVar(&leaseDuration, "leader-election-lease-duration", 15*time.Second, + "Duration that non-leader candidates will wait to force acquire leadership.") + flag.DurationVar(&renewDeadline, "leader-election-renew-deadline", 10*time.Second, + "Duration the acting leader will retry refreshing leadership before giving up.") + flag.DurationVar(&retryPeriod, "leader-election-retry-period", 2*time.Second, + "Duration between leader election retry attempts.") opts := zap.Options{Development: true} opts.BindFlags(flag.CommandLine) @@ -54,6 +63,18 @@ func main() { ctrl.SetLogger(zap.New(zap.UseFlagOptions(&opts))) + // Validate leader election timing constraints + if renewDeadline >= leaseDuration { + setupLog.Error(nil, "leader-election-renew-deadline must be less than leader-election-lease-duration", + "renew-deadline", renewDeadline, "lease-duration", leaseDuration) + os.Exit(1) + } + if retryPeriod >= renewDeadline { + setupLog.Error(nil, "leader-election-retry-period must be less than leader-election-renew-deadline", + "retry-period", retryPeriod, "renew-deadline", renewDeadline) + os.Exit(1) + } + // Load configuration cfg, err := config.Load(configFile) if err != nil { @@ -82,9 +103,9 @@ func main() { HealthProbeBindAddress: probeAddr, LeaderElection: enableLeaderElection, LeaderElectionID: "vigil-controller.nextdoor.com", - LeaseDuration: ptr(15 * time.Second), - RenewDeadline: ptr(10 * time.Second), - RetryPeriod: ptr(2 * time.Second), + LeaseDuration: &leaseDuration, + RenewDeadline: &renewDeadline, + RetryPeriod: &retryPeriod, LeaderElectionReleaseOnCancel: true, }) if err != nil { @@ -150,5 +171,3 @@ func main() { os.Exit(1) } } - -func ptr[T any](v T) *T { return &v }