@@ -6,13 +6,14 @@ import (
66 "fmt"
77 "io/ioutil"
88 "net/http"
9- _ "net/http/pprof"
9+ "net/http/pprof"
1010 "os"
1111 "time"
1212
1313 "github.com/google/uuid"
1414 "github.com/sirupsen/logrus"
1515 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
16+ "k8s.io/apiserver/pkg/server/healthz"
1617 "k8s.io/client-go/kubernetes"
1718 "k8s.io/client-go/rest"
1819 "k8s.io/client-go/tools/clientcmd"
@@ -112,16 +113,6 @@ func run(
112113 })
113114 log .Infof ("running castai-cluster-controller version %v" , binVersion )
114115
115- if cfg .PprofPort != 0 {
116- go func () {
117- addr := fmt .Sprintf (":%d" , cfg .PprofPort )
118- log .Infof ("starting pprof server on %s" , addr )
119- if err := http .ListenAndServe (addr , http .DefaultServeMux ); err != nil {
120- log .Errorf ("failed to start pprof http server: %v" , err )
121- }
122- }()
123- }
124-
125116 actionsConfig := actions.Config {
126117 PollWaitInterval : 5 * time .Second ,
127118 PollTimeout : 5 * time .Minute ,
@@ -139,44 +130,62 @@ func run(
139130 helmClient ,
140131 )
141132
133+ httpMux := http .NewServeMux ()
134+ var checks []healthz.HealthChecker
135+ var leaderHealthCheck * leaderelection.HealthzAdaptor
142136 if cfg .LeaderElection .Enabled {
143- lock , err := newLeaseLock (clientset , cfg .LeaderElection .LockName , cfg .LeaderElection .Namespace )
144- if err != nil {
145- return err
137+ leaderHealthCheck = leaderelection .NewLeaderHealthzAdaptor (time .Minute * 2 )
138+ checks = append (checks , leaderHealthCheck )
139+ }
140+ healthz .InstallHandler (httpMux , checks ... )
141+ installPprofHandlers (httpMux )
142+
143+ // Start http server for pprof and health checks handlers.
144+ go func () {
145+ addr := fmt .Sprintf (":%d" , cfg .PprofPort )
146+ log .Infof ("starting pprof server on %s" , addr )
147+
148+ if err := http .ListenAndServe (addr , httpMux ); err != nil {
149+ log .Errorf ("failed to start pprof http server: %v" , err )
146150 }
151+ }()
152+
153+ if cfg .LeaderElection .Enabled {
147154 // Run actions service with leader election. Blocks.
148- runWithLeaderElection (ctx , log , lock , svc .Run )
149- return nil
155+ return runWithLeaderElection (ctx , log , clientset , leaderHealthCheck , cfg .LeaderElection , svc .Run )
150156 }
151157
152158 // Run action service. Blocks.
153159 svc .Run (ctx )
154160 return nil
155161}
156162
157- func newLeaseLock (client kubernetes.Interface , lockName , lockNamespace string ) (* resourcelock.LeaseLock , error ) {
163+ func runWithLeaderElection (
164+ ctx context.Context ,
165+ log logrus.FieldLogger ,
166+ clientset kubernetes.Interface ,
167+ watchDog * leaderelection.HealthzAdaptor ,
168+ cfg config.LeaderElection ,
169+ runFunc func (ctx context.Context ),
170+ ) error {
158171 id , err := os .Hostname ()
159172 if err != nil {
160- return nil , fmt .Errorf ("failed to determine hostname used in leader ID: %w" , err )
173+ return fmt .Errorf ("failed to determine hostname used in leader ID: %w" , err )
161174 }
162175 id = id + "_" + uuid .New ().String ()
163176
164- return & resourcelock.LeaseLock {
165- LeaseMeta : metav1.ObjectMeta {
166- Name : lockName ,
167- Namespace : lockNamespace ,
168- },
169- Client : client .CoordinationV1 (),
170- LockConfig : resourcelock.ResourceLockConfig {
171- Identity : id ,
172- },
173- }, nil
174- }
175-
176- func runWithLeaderElection (ctx context.Context , log logrus.FieldLogger , lock * resourcelock.LeaseLock , runFunc func (ctx context.Context )) {
177177 // Start the leader election code loop
178178 leaderelection .RunOrDie (ctx , leaderelection.LeaderElectionConfig {
179- Lock : lock ,
179+ Lock : & resourcelock.LeaseLock {
180+ LeaseMeta : metav1.ObjectMeta {
181+ Name : cfg .LockName ,
182+ Namespace : cfg .Namespace ,
183+ },
184+ Client : clientset .CoordinationV1 (),
185+ LockConfig : resourcelock.ResourceLockConfig {
186+ Identity : id ,
187+ },
188+ },
180189 // IMPORTANT: you MUST ensure that any code you have that
181190 // is protected by the lease must terminate **before**
182191 // you call cancel. Otherwise, you could have a background
@@ -187,25 +196,35 @@ func runWithLeaderElection(ctx context.Context, log logrus.FieldLogger, lock *re
187196 LeaseDuration : 60 * time .Second ,
188197 RenewDeadline : 15 * time .Second ,
189198 RetryPeriod : 5 * time .Second ,
199+ WatchDog : watchDog ,
190200 Callbacks : leaderelection.LeaderCallbacks {
191201 OnStartedLeading : func (ctx context.Context ) {
192- log .Infof ("started leader: %s" , lock . Identity () )
202+ log .Infof ("started leader: %s" , id )
193203 runFunc (ctx )
194204 },
195205 OnStoppedLeading : func () {
196- log .Infof ("leader lost: %s" , lock . Identity () )
206+ log .Infof ("leader lost: %s" , id )
197207 os .Exit (0 )
198208 },
199209 OnNewLeader : func (identity string ) {
200210 // We're notified when new leader elected.
201- if identity == lock . Identity () {
211+ if identity == id {
202212 // I just got the lock.
203213 return
204214 }
205215 log .Infof ("new leader elected: %s" , identity )
206216 },
207217 },
208218 })
219+ return nil
220+ }
221+
222+ func installPprofHandlers (mux * http.ServeMux ) {
223+ mux .HandleFunc ("/debug/pprof/" , pprof .Index )
224+ mux .HandleFunc ("/debug/pprof/cmdline" , pprof .Cmdline )
225+ mux .HandleFunc ("/debug/pprof/profile" , pprof .Profile )
226+ mux .HandleFunc ("/debug/pprof/symbol" , pprof .Symbol )
227+ mux .HandleFunc ("/debug/pprof/trace" , pprof .Trace )
209228}
210229
211230func kubeConfigFromEnv () (* rest.Config , error ) {
0 commit comments