Wire up context, add stop channels, etc

adrianmoisey · adrianmoisey · commit 9f4a222fd2b0 · 2026-01-01T15:54:44.000+02:00
diff --git a/vertical-pod-autoscaler/pkg/admission-controller/main.go b/vertical-pod-autoscaler/pkg/admission-controller/main.go
@@ -17,6 +17,7 @@ limitations under the License.
 package main
 
 import (
+	"context"
 	"flag"
 	"fmt"
 	"net/http"
@@ -99,12 +100,14 @@ func main() {
 
 	config := common.CreateKubeConfigOrDie(commonFlags.KubeConfig, float32(commonFlags.KubeApiQps), int(commonFlags.KubeApiBurst))
 
+	ctx := context.Background()
+
 	vpaClient := vpa_clientset.NewForConfigOrDie(config)
 	vpaLister := vpa_api_util.NewVpasLister(vpaClient, make(chan struct{}), commonFlags.VpaObjectNamespace)
 	kubeClient := kube_client.NewForConfigOrDie(config)
 	factory := informers.NewSharedInformerFactory(kubeClient, defaultResyncPeriod)
-	targetSelectorFetcher := target.NewVpaTargetSelectorFetcher(config, kubeClient, factory)
-	controllerFetcher := controllerfetcher.NewControllerFetcher(config, kubeClient, factory, scaleCacheEntryFreshnessTime, scaleCacheEntryLifetime, scaleCacheEntryJitterFactor)
+	targetSelectorFetcher := target.NewVpaTargetSelectorFetcher(ctx, config, kubeClient, factory)
+	controllerFetcher := controllerfetcher.NewControllerFetcher(ctx, config, kubeClient, factory, scaleCacheEntryFreshnessTime, scaleCacheEntryLifetime, scaleCacheEntryJitterFactor)
 	podPreprocessor := pod.NewDefaultPreProcessor()
 	vpaPreprocessor := vpa.NewDefaultPreProcessor()
 	var limitRangeCalculator limitrange.LimitRangeCalculator
diff --git a/vertical-pod-autoscaler/pkg/recommender/app/app.go b/vertical-pod-autoscaler/pkg/recommender/app/app.go
@@ -101,10 +101,10 @@ func (app *RecommenderApp) Run(ctx context.Context, leaderElection componentbase
 	metrics_recommender.Register()
 	metrics_quality.Register()
 	metrics_resources.Register()
-	server.Initialize(&app.config.CommonFlags.EnableProfiling, healthCheck, &app.config.Address)
+	server.InitializeWithContext(ctx, &app.config.CommonFlags.EnableProfiling, healthCheck, &app.config.Address)
 
 	if !leaderElection.LeaderElect {
-		return app.run(ctx, healthCheck)
+		return app.run(ctx, stopCh, healthCheck)
 	} else {
 		id, err := os.Hostname()
 		if err != nil {
@@ -138,7 +138,7 @@ func (app *RecommenderApp) Run(ctx context.Context, leaderElection componentbase
 			ReleaseOnCancel: true,
 			Callbacks: leaderelection.LeaderCallbacks{
 				OnStartedLeading: func(_ context.Context) {
-					if err := app.run(ctx, healthCheck); err != nil {
+					if err := app.run(ctx, stopCh, healthCheck); err != nil {
 						klog.Fatalf("Error running recommender: %v", err)
 					}
 				},
@@ -152,16 +152,12 @@ func (app *RecommenderApp) Run(ctx context.Context, leaderElection componentbase
 	return nil
 }
 
-func (app *RecommenderApp) run(ctx context.Context, healthCheck *metrics.HealthCheck) error {
-	// Create a stop channel that will be used to signal shutdown
-	stopCh := make(chan struct{})
-	defer close(stopCh)
-
+func (app *RecommenderApp) run(ctx context.Context, stopCh chan struct{}, healthCheck *metrics.HealthCheck) error {
 	config := common.CreateKubeConfigOrDie(app.config.CommonFlags.KubeConfig, float32(app.config.CommonFlags.KubeApiQps), int(app.config.CommonFlags.KubeApiBurst))
 	kubeClient := kube_client.NewForConfigOrDie(config)
 	clusterState := model.NewClusterState(aggregateContainerStateGCInterval)
 	factory := informers.NewSharedInformerFactoryWithOptions(kubeClient, defaultResyncPeriod, informers.WithNamespace(app.config.CommonFlags.VpaObjectNamespace))
-	controllerFetcher := controllerfetcher.NewControllerFetcher(config, kubeClient, factory, scaleCacheEntryFreshnessTime, scaleCacheEntryLifetime, scaleCacheEntryJitterFactor)
+	controllerFetcher := controllerfetcher.NewControllerFetcher(ctx, config, kubeClient, factory, scaleCacheEntryFreshnessTime, scaleCacheEntryLifetime, scaleCacheEntryJitterFactor)
 	podLister, oomObserver := input.NewPodListerAndOOMObserver(ctx, kubeClient, app.config.CommonFlags.VpaObjectNamespace, stopCh)
 
 	factory.Start(stopCh)
@@ -223,7 +219,7 @@ func (app *RecommenderApp) run(ctx context.Context, healthCheck *metrics.HealthC
 		VpaLister:           vpa_api_util.NewVpasLister(vpa_clientset.NewForConfigOrDie(config), stopCh, app.config.CommonFlags.VpaObjectNamespace),
 		VpaCheckpointLister: vpa_api_util.NewVpaCheckpointLister(vpa_clientset.NewForConfigOrDie(config), stopCh, app.config.CommonFlags.VpaObjectNamespace),
 		ClusterState:        clusterState,
-		SelectorFetcher:     target.NewVpaTargetSelectorFetcher(config, kubeClient, factory),
+		SelectorFetcher:     target.NewVpaTargetSelectorFetcher(ctx, config, kubeClient, factory),
 		MemorySaveMode:      app.config.MemorySaver,
 		ControllerFetcher:   controllerFetcher,
 		RecommenderName:     app.config.RecommenderName,
diff --git a/vertical-pod-autoscaler/pkg/recommender/input/cluster_feeder.go b/vertical-pod-autoscaler/pkg/recommender/input/cluster_feeder.go
@@ -138,7 +138,12 @@ func WatchEvictionEventsWithRetries(ctx context.Context, kubeClient kube_client.
 				// Wait between attempts, retrying too often breaks API server.
 				waitTime := wait.Jitter(evictionWatchRetryWait, evictionWatchJitterFactor)
 				klog.V(1).InfoS("An attempt to watch eviction events finished", "waitTime", waitTime)
-				time.Sleep(waitTime)
+				// Use a timer that can be interrupted by context cancellation
+				select {
+				case <-ctx.Done():
+					return
+				case <-time.After(waitTime):
+				}
 			}
 		}
 	}()
diff --git a/vertical-pod-autoscaler/pkg/target/controller_fetcher/controller_fetcher.go b/vertical-pod-autoscaler/pkg/target/controller_fetcher/controller_fetcher.go
@@ -112,7 +112,7 @@ func (f *controllerFetcher) Start(ctx context.Context, loopPeriod time.Duration)
 }
 
 // NewControllerFetcher returns a new instance of controllerFetcher
-func NewControllerFetcher(config *rest.Config, kubeClient kube_client.Interface, factory informers.SharedInformerFactory, betweenRefreshes, lifeTime time.Duration, jitterFactor float64) *controllerFetcher {
+func NewControllerFetcher(ctx context.Context, config *rest.Config, kubeClient kube_client.Interface, factory informers.SharedInformerFactory, betweenRefreshes, lifeTime time.Duration, jitterFactor float64) *controllerFetcher {
 	discoveryClient, err := discovery.NewDiscoveryClientForConfig(config)
 	if err != nil {
 		klog.ErrorS(err, "Could not create discoveryClient")
@@ -122,9 +122,9 @@ func NewControllerFetcher(config *rest.Config, kubeClient kube_client.Interface,
 	restClient := kubeClient.CoreV1().RESTClient()
 	cachedDiscoveryClient := cacheddiscovery.NewMemCacheClient(discoveryClient)
 	mapper := restmapper.NewDeferredDiscoveryRESTMapper(cachedDiscoveryClient)
-	go wait.Until(func() {
+	go wait.UntilWithContext(ctx, func(ctx context.Context) {
 		mapper.Reset()
-	}, discoveryResetPeriod, make(chan struct{}))
+	}, discoveryResetPeriod)
 
 	informersMap := map[wellKnownController]cache.SharedIndexInformer{
 		daemonSet:             factory.Apps().V1().DaemonSets().Informer(),
diff --git a/vertical-pod-autoscaler/pkg/target/fetcher.go b/vertical-pod-autoscaler/pkg/target/fetcher.go
@@ -67,7 +67,7 @@ const (
 )
 
 // NewVpaTargetSelectorFetcher returns new instance of VpaTargetSelectorFetcher
-func NewVpaTargetSelectorFetcher(config *rest.Config, kubeClient kube_client.Interface, factory informers.SharedInformerFactory) VpaTargetSelectorFetcher {
+func NewVpaTargetSelectorFetcher(ctx context.Context, config *rest.Config, kubeClient kube_client.Interface, factory informers.SharedInformerFactory) VpaTargetSelectorFetcher {
 	discoveryClient, err := discovery.NewDiscoveryClientForConfig(config)
 	if err != nil {
 		klog.ErrorS(err, "Could not create discoveryClient")
@@ -77,9 +77,9 @@ func NewVpaTargetSelectorFetcher(config *rest.Config, kubeClient kube_client.Int
 	restClient := kubeClient.CoreV1().RESTClient()
 	cachedDiscoveryClient := cacheddiscovery.NewMemCacheClient(discoveryClient)
 	mapper := restmapper.NewDeferredDiscoveryRESTMapper(cachedDiscoveryClient)
-	go wait.Until(func() {
+	go wait.UntilWithContext(ctx, func(ctx context.Context) {
 		mapper.Reset()
-	}, discoveryResetPeriod, make(chan struct{}))
+	}, discoveryResetPeriod)
 
 	informersMap := map[wellKnownController]cache.SharedIndexInformer{
 		daemonSet:             factory.Apps().V1().DaemonSets().Informer(),
diff --git a/vertical-pod-autoscaler/pkg/updater/main.go b/vertical-pod-autoscaler/pkg/updater/main.go
@@ -175,12 +175,13 @@ func defaultLeaderElectionConfiguration() componentbaseconfig.LeaderElectionConf
 func run(healthCheck *metrics.HealthCheck, commonFlag *common.CommonFlags) {
 	stopCh := make(chan struct{})
 	defer close(stopCh)
+	ctx := context.Background()
 	config := common.CreateKubeConfigOrDie(commonFlag.KubeConfig, float32(commonFlag.KubeApiQps), int(commonFlag.KubeApiBurst))
 	kubeClient := kube_client.NewForConfigOrDie(config)
 	vpaClient := vpa_clientset.NewForConfigOrDie(config)
 	factory := informers.NewSharedInformerFactory(kubeClient, defaultResyncPeriod)
-	targetSelectorFetcher := target.NewVpaTargetSelectorFetcher(config, kubeClient, factory)
-	controllerFetcher := controllerfetcher.NewControllerFetcher(config, kubeClient, factory, scaleCacheEntryFreshnessTime, scaleCacheEntryLifetime, scaleCacheEntryJitterFactor)
+	targetSelectorFetcher := target.NewVpaTargetSelectorFetcher(ctx, config, kubeClient, factory)
+	controllerFetcher := controllerfetcher.NewControllerFetcher(ctx, config, kubeClient, factory, scaleCacheEntryFreshnessTime, scaleCacheEntryLifetime, scaleCacheEntryJitterFactor)
 	var limitRangeCalculator limitrange.LimitRangeCalculator
 	limitRangeCalculator, err := limitrange.NewLimitsRangeCalculator(factory)
 	if err != nil {
diff --git a/vertical-pod-autoscaler/pkg/utils/metrics/quality/quality.go b/vertical-pod-autoscaler/pkg/utils/metrics/quality/quality.go
@@ -120,15 +120,15 @@ var (
 
 // Register initializes all VPA quality metrics
 func Register() {
-	prometheus.MustRegister(usageRecommendationRelativeDiff)
-	prometheus.MustRegister(usageMissingRecommendationCounter)
-	prometheus.MustRegister(cpuRecommendationOverUsageDiff)
-	prometheus.MustRegister(memoryRecommendationOverUsageDiff)
-	prometheus.MustRegister(cpuRecommendationLowerOrEqualUsageDiff)
-	prometheus.MustRegister(memoryRecommendationLowerOrEqualUsageDiff)
-	prometheus.MustRegister(cpuRecommendations)
-	prometheus.MustRegister(memoryRecommendations)
-	prometheus.MustRegister(relativeRecommendationChange)
+	_ = prometheus.Register(usageRecommendationRelativeDiff)
+	_ = prometheus.Register(usageMissingRecommendationCounter)
+	_ = prometheus.Register(cpuRecommendationOverUsageDiff)
+	_ = prometheus.Register(memoryRecommendationOverUsageDiff)
+	_ = prometheus.Register(cpuRecommendationLowerOrEqualUsageDiff)
+	_ = prometheus.Register(memoryRecommendationLowerOrEqualUsageDiff)
+	_ = prometheus.Register(cpuRecommendations)
+	_ = prometheus.Register(memoryRecommendations)
+	_ = prometheus.Register(relativeRecommendationChange)
 }
 
 // observeUsageRecommendationRelativeDiff records relative diff between usage and
diff --git a/vertical-pod-autoscaler/pkg/utils/metrics/recommender/recommender.go b/vertical-pod-autoscaler/pkg/utils/metrics/recommender/recommender.go
@@ -115,7 +115,19 @@ type ObjectCounter struct {
 
 // Register initializes all metrics for VPA Recommender
 func Register() {
-	prometheus.MustRegister(vpaObjectCount, recommendationLatency, functionLatency, aggregateContainerStatesCount, metricServerResponses, prometheusClientRequestsCount, prometheusClientRequestsDuration)
+	collectors := []prometheus.Collector{
+		vpaObjectCount,
+		recommendationLatency,
+		functionLatency,
+		aggregateContainerStatesCount,
+		metricServerResponses,
+		prometheusClientRequestsCount,
+		prometheusClientRequestsDuration,
+	}
+	for _, c := range collectors {
+		// Ignore AlreadyRegisteredError
+		_ = prometheus.Register(c)
+	}
 }
 
 // NewExecutionTimer provides a timer for Recommender's RunOnce execution
diff --git a/vertical-pod-autoscaler/pkg/utils/metrics/resources/resources.go b/vertical-pod-autoscaler/pkg/utils/metrics/resources/resources.go
@@ -56,7 +56,7 @@ var (
 
 // Register initializes all metrics for VPA resources
 func Register() {
-	prometheus.MustRegister(getResourcesCount)
+	_ = prometheus.Register(getResourcesCount)
 }
 
 // RecordGetResourcesCount records how many times VPA requested the resources (
diff --git a/vertical-pod-autoscaler/pkg/utils/server/server.go b/vertical-pod-autoscaler/pkg/utils/server/server.go
@@ -18,6 +18,7 @@ limitations under the License.
 package server
 
 import (
+	"context"
 	"net/http"
 	"net/http/pprof"
 
@@ -29,6 +30,12 @@ import (
 
 // Initialize sets up Prometheus to expose metrics & (optionally) health-check and profiling on the given address
 func Initialize(enableProfiling *bool, healthCheck *metrics.HealthCheck, address *string) {
+	InitializeWithContext(context.Background(), enableProfiling, healthCheck, address)
+}
+
+// InitializeWithContext sets up Prometheus to expose metrics & (optionally) health-check and profiling on the given address.
+// The server will shut down gracefully when the context is canceled.
+func InitializeWithContext(ctx context.Context, enableProfiling *bool, healthCheck *metrics.HealthCheck, address *string) {
 	go func() {
 		mux := http.NewServeMux()
 
@@ -45,8 +52,23 @@ func Initialize(enableProfiling *bool, healthCheck *metrics.HealthCheck, address
 			mux.HandleFunc("/debug/pprof/trace", pprof.Trace)
 		}
 
-		err := http.ListenAndServe(*address, mux)
-		klog.ErrorS(err, "Failed to start metrics")
-		klog.FlushAndExit(klog.ExitFlushTimeout, 1)
+		server := &http.Server{
+			Addr:    *address,
+			Handler: mux,
+		}
+
+		// Start server shutdown when context is canceled
+		go func() {
+			<-ctx.Done()
+			if err := server.Shutdown(context.Background()); err != nil {
+				klog.ErrorS(err, "Failed to shutdown metrics server")
+			}
+		}()
+
+		err := server.ListenAndServe()
+		if err != nil && err != http.ErrServerClosed {
+			klog.ErrorS(err, "Failed to start metrics")
+			klog.FlushAndExit(klog.ExitFlushTimeout, 1)
+		}
 	}()
 }

Original file line number	Diff line number	Diff line change
`@@ -138,7 +138,12 @@ func WatchEvictionEventsWithRetries(ctx context.Context, kubeClient kube_client.`
`138`	`138`	`// Wait between attempts, retrying too often breaks API server.`
`139`	`139`	`waitTime := wait.Jitter(evictionWatchRetryWait, evictionWatchJitterFactor)`
`140`	`140`	`klog.V(1).InfoS("An attempt to watch eviction events finished", "waitTime", waitTime)`
`141`		`- time.Sleep(waitTime)`
	`141`	`+ // Use a timer that can be interrupted by context cancellation`
	`142`	`+ select {`
	`143`	`+ case <-ctx.Done():`
	`144`	`+ return`
	`145`	`+ case <-time.After(waitTime):`
	`146`	`+ }`
`142`	`147`	`}`
`143`	`148`	`}`
`144`	`149`	`}()`
Original file line number	Diff line number	Diff line change
`@@ -56,7 +56,7 @@ var (`
`56`	`56`
`57`	`57`	`// Register initializes all metrics for VPA resources`
`58`	`58`	`func Register() {`
`59`		`- prometheus.MustRegister(getResourcesCount)`
	`59`	`+ _ = prometheus.Register(getResourcesCount)`
`60`	`60`	`}`
`61`	`61`
`62`	`62`	`// RecordGetResourcesCount records how many times VPA requested the resources (`