@@ -79,6 +79,7 @@ import (
7979 "sigs.k8s.io/kueue/pkg/scheduler/preemption/fairsharing"
8080 "sigs.k8s.io/kueue/pkg/util/cert"
8181 "sigs.k8s.io/kueue/pkg/util/kubeversion"
82+ "sigs.k8s.io/kueue/pkg/util/roletracker"
8283 "sigs.k8s.io/kueue/pkg/util/useragent"
8384 "sigs.k8s.io/kueue/pkg/util/waitforpodsready"
8485 "sigs.k8s.io/kueue/pkg/version"
@@ -233,6 +234,15 @@ func main() {
233234 os .Exit (1 )
234235 }
235236
237+ var roleTracker * roletracker.RoleTracker
238+ if cfg .LeaderElection != nil && ptr .Deref (cfg .LeaderElection .LeaderElect , false ) {
239+ roleTracker = roletracker .NewRoleTracker (mgr .Elected ())
240+ go roleTracker .Start (ctx , setupLog )
241+ setupLog .Info ("RoleTracker: leader election enabled" )
242+ } else {
243+ setupLog .Info ("RoleTracker: running in standalone mode" )
244+ }
245+
236246 certsReady := make (chan struct {})
237247
238248 if cfg .InternalCertManagement != nil && * cfg .InternalCertManagement .Enable {
@@ -243,8 +253,8 @@ func main() {
243253 } else {
244254 close (certsReady )
245255 }
246- cacheOptions := []schdcache.Option {schdcache .WithPodsReadyTracking (blockForPodsReady (& cfg ))}
247- queueOptions := []qcache.Option {qcache .WithPodsReadyRequeuingTimestamp (podsReadyRequeuingTimestamp (& cfg ))}
256+ cacheOptions := []schdcache.Option {schdcache .WithPodsReadyTracking (blockForPodsReady (& cfg )), schdcache . WithRoleTracker ( roleTracker ) }
257+ queueOptions := []qcache.Option {qcache .WithPodsReadyRequeuingTimestamp (podsReadyRequeuingTimestamp (& cfg )), qcache . WithRoleTracker ( roleTracker ) }
248258 if cfg .Resources != nil && len (cfg .Resources .ExcludeResourcePrefixes ) > 0 {
249259 cacheOptions = append (cacheOptions , schdcache .WithExcludedResourcePrefixes (cfg .Resources .ExcludeResourcePrefixes ))
250260 queueOptions = append (queueOptions , qcache .WithExcludedResourcePrefixes (cfg .Resources .ExcludeResourcePrefixes ))
@@ -287,12 +297,12 @@ func main() {
287297 os .Exit (1 )
288298 }
289299
290- if err := setupControllers (ctx , mgr , cCache , queues , & cfg , serverVersionFetcher ); err != nil {
300+ if err := setupControllers (ctx , mgr , cCache , queues , & cfg , serverVersionFetcher , roleTracker ); err != nil {
291301 setupLog .Error (err , "Unable to setup controllers" )
292302 os .Exit (1 )
293303 }
294304
295- if failedWebhook , err := webhooks .Setup (mgr ); err != nil {
305+ if failedWebhook , err := webhooks .Setup (mgr , webhooks . WithRoleTracker ( roleTracker ) ); err != nil {
296306 setupLog .Error (err , "Unable to create webhook" , "webhook" , failedWebhook )
297307 os .Exit (1 )
298308 }
@@ -309,7 +319,7 @@ func main() {
309319 }()
310320 }
311321
312- if err := setupScheduler (mgr , cCache , queues , & cfg ); err != nil {
322+ if err := setupScheduler (mgr , cCache , queues , & cfg , roleTracker ); err != nil {
313323 setupLog .Error (err , "Could not setup scheduler" )
314324 os .Exit (1 )
315325 }
@@ -352,8 +362,8 @@ func setupIndexes(ctx context.Context, mgr ctrl.Manager, cfg *configapi.Configur
352362 return jobframework .SetupIndexes (ctx , mgr .GetFieldIndexer (), opts ... )
353363}
354364
355- func setupControllers (ctx context.Context , mgr ctrl.Manager , cCache * schdcache.Cache , queues * qcache.Manager , cfg * configapi.Configuration , serverVersionFetcher * kubeversion.ServerVersionFetcher ) error {
356- if failedCtrl , err := core .SetupControllers (mgr , queues , cCache , cfg ); err != nil {
365+ func setupControllers (ctx context.Context , mgr ctrl.Manager , cCache * schdcache.Cache , queues * qcache.Manager , cfg * configapi.Configuration , serverVersionFetcher * kubeversion.ServerVersionFetcher , roleTracker * roletracker. RoleTracker ) error {
366+ if failedCtrl , err := core .SetupControllers (mgr , queues , cCache , cfg , core . WithSetupRoleTracker ( roleTracker ) ); err != nil {
357367 return fmt .Errorf ("unable to create controller %s: %w" , failedCtrl , err )
358368 }
359369 if features .Enabled (features .FailureRecoveryPolicy ) {
@@ -413,7 +423,7 @@ func setupControllers(ctx context.Context, mgr ctrl.Manager, cCache *schdcache.C
413423 }
414424
415425 if features .Enabled (features .TopologyAwareScheduling ) {
416- if failedCtrl , err := tas .SetupControllers (mgr , queues , cCache , cfg ); err != nil {
426+ if failedCtrl , err := tas .SetupControllers (mgr , queues , cCache , cfg , tas . WithRoleTracker ( roleTracker ) ); err != nil {
417427 return fmt .Errorf ("could not setup TAS controller %s: %w" , failedCtrl , err )
418428 }
419429 }
@@ -429,6 +439,7 @@ func setupControllers(ctx context.Context, mgr ctrl.Manager, cCache *schdcache.C
429439 jobframework .WithCache (cCache ),
430440 jobframework .WithQueues (queues ),
431441 jobframework .WithObjectRetentionPolicies (cfg .ObjectRetentionPolicies ),
442+ jobframework .WithRoleTracker (roleTracker ),
432443 }
433444 nsSelector , err := metav1 .LabelSelectorAsSelector (cfg .ManagedJobsNamespaceSelector )
434445 if err != nil {
@@ -472,7 +483,7 @@ func setupProbeEndpoints(mgr ctrl.Manager, certsReady <-chan struct{}) error {
472483 return nil
473484}
474485
475- func setupScheduler (mgr ctrl.Manager , cCache * schdcache.Cache , queues * qcache.Manager , cfg * configapi.Configuration ) error {
486+ func setupScheduler (mgr ctrl.Manager , cCache * schdcache.Cache , queues * qcache.Manager , cfg * configapi.Configuration , roleTracker * roletracker. RoleTracker ) error {
476487 sched := scheduler .New (
477488 queues ,
478489 cCache ,
@@ -481,6 +492,7 @@ func setupScheduler(mgr ctrl.Manager, cCache *schdcache.Cache, queues *qcache.Ma
481492 scheduler .WithPodsReadyRequeuingTimestamp (podsReadyRequeuingTimestamp (cfg )),
482493 scheduler .WithFairSharing (cfg .FairSharing ),
483494 scheduler .WithAdmissionFairSharing (cfg .AdmissionFairSharing ),
495+ scheduler .WithRoleTracker (roleTracker ),
484496 )
485497 if err := mgr .Add (sched ); err != nil {
486498 return fmt .Errorf ("unable to add scheduler to manager: %w" , err )
0 commit comments