From 545d88a836d432c3cfae17ca781ca573b46a25de Mon Sep 17 00:00:00 2001 From: Amanuel Engeda <74629455+engedaam@users.noreply.github.com> Date: Thu, 4 Apr 2024 13:07:02 -0700 Subject: [PATCH] feat: Detect Drift on NodeClaims on changes to NodeClass (#1147) --- .../nodeclaim/disruption/controller.go | 28 +++++++++++++------ pkg/operator/operator.go | 9 ++++++ pkg/utils/nodeclaim/nodeclaim.go | 22 ++++++++++++++- 3 files changed, 50 insertions(+), 9 deletions(-) diff --git a/pkg/controllers/nodeclaim/disruption/controller.go b/pkg/controllers/nodeclaim/disruption/controller.go index b666307c55..6a746fe719 100644 --- a/pkg/controllers/nodeclaim/disruption/controller.go +++ b/pkg/controllers/nodeclaim/disruption/controller.go @@ -23,6 +23,7 @@ import ( v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/equality" "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/types" "k8s.io/utils/clock" controllerruntime "sigs.k8s.io/controller-runtime" @@ -48,7 +49,8 @@ type nodeClaimReconciler interface { // Controller is a disruption controller that adds StatusConditions to nodeclaims when they meet certain disruption conditions // e.g. When the NodeClaim has surpassed its owning provisioner's expirationTTL, then it is marked as "Expired" in the StatusConditions type Controller struct { - kubeClient client.Client + kubeClient client.Client + cloudProvider cloudprovider.CloudProvider drift *Drift expiration *Expiration @@ -58,10 +60,11 @@ type Controller struct { // NewController constructs a nodeclaim disruption controller func NewController(clk clock.Clock, kubeClient client.Client, cluster *state.Cluster, cloudProvider cloudprovider.CloudProvider) operatorcontroller.Controller { return operatorcontroller.Typed[*v1beta1.NodeClaim](kubeClient, &Controller{ - kubeClient: kubeClient, - drift: &Drift{cloudProvider: cloudProvider}, - expiration: &Expiration{kubeClient: kubeClient, clock: clk}, - emptiness: &Emptiness{kubeClient: kubeClient, cluster: cluster, clock: clk}, + kubeClient: kubeClient, + cloudProvider: cloudProvider, + drift: &Drift{cloudProvider: cloudProvider}, + expiration: &Expiration{kubeClient: kubeClient, clock: clk}, + emptiness: &Emptiness{kubeClient: kubeClient, cluster: cluster, clock: clk}, }) } @@ -114,7 +117,7 @@ func (c *Controller) Name() string { } func (c *Controller) Builder(_ context.Context, m manager.Manager) operatorcontroller.Builder { - return operatorcontroller.Adapt(controllerruntime. + builder := controllerruntime. NewControllerManagedBy(m). For(&v1beta1.NodeClaim{}). WithOptions(controller.Options{MaxConcurrentReconciles: 10}). @@ -125,6 +128,15 @@ func (c *Controller) Builder(_ context.Context, m manager.Manager) operatorcontr Watches( &v1.Pod{}, nodeclaimutil.PodEventHandler(c.kubeClient), - ), - ) + ) + for _, ncGVK := range c.cloudProvider.GetSupportedNodeClasses() { + nodeclass := &unstructured.Unstructured{} + nodeclass.SetGroupVersionKind(ncGVK) + builder = builder.Watches( + nodeclass, + nodeclaimutil.NodeClassEventHandler(c.kubeClient), + ) + } + + return operatorcontroller.Adapt(builder) } diff --git a/pkg/operator/operator.go b/pkg/operator/operator.go index 64a1320c68..6971b70a1a 100644 --- a/pkg/operator/operator.go +++ b/pkg/operator/operator.go @@ -190,6 +190,15 @@ func NewOperator() (context.Context, *Operator) { lo.Must0(mgr.GetFieldIndexer().IndexField(ctx, &v1beta1.NodeClaim{}, "status.providerID", func(o client.Object) []string { return []string{o.(*v1beta1.NodeClaim).Status.ProviderID} }), "failed to setup nodeclaim provider id indexer") + lo.Must0(mgr.GetFieldIndexer().IndexField(ctx, &v1beta1.NodeClaim{}, "spec.nodeClassRef.apiVersion", func(o client.Object) []string { + return []string{o.(*v1beta1.NodeClaim).Spec.NodeClassRef.APIVersion} + }), "failed to setup nodeclaim nodeclassref apiversion indexer") + lo.Must0(mgr.GetFieldIndexer().IndexField(ctx, &v1beta1.NodeClaim{}, "spec.nodeClassRef.kind", func(o client.Object) []string { + return []string{o.(*v1beta1.NodeClaim).Spec.NodeClassRef.Kind} + }), "failed to setup nodeclaim nodeclassref kind indexer") + lo.Must0(mgr.GetFieldIndexer().IndexField(ctx, &v1beta1.NodeClaim{}, "spec.nodeClassRef.name", func(o client.Object) []string { + return []string{o.(*v1beta1.NodeClaim).Spec.NodeClassRef.Name} + }), "failed to setup nodeclaim nodeclassref name indexer") lo.Must0(mgr.AddReadyzCheck("manager", func(req *http.Request) error { return lo.Ternary(mgr.GetCache().WaitForCacheSync(req.Context()), nil, fmt.Errorf("failed to sync caches")) diff --git a/pkg/utils/nodeclaim/nodeclaim.go b/pkg/utils/nodeclaim/nodeclaim.go index 42c7a7a279..906c04ff61 100644 --- a/pkg/utils/nodeclaim/nodeclaim.go +++ b/pkg/utils/nodeclaim/nodeclaim.go @@ -73,7 +73,7 @@ func NodeEventHandler(c client.Client) handler.EventHandler { }) } -// NodePoolEventHandler is a watcher on v1beta1.NodeClaim that maps Provisioner to NodeClaims based +// NodePoolEventHandler is a watcher on v1beta1.NodeClaim that maps NodePool to NodeClaims based // on the v1beta1.NodePoolLabelKey and enqueues reconcile.Requests for the NodeClaim func NodePoolEventHandler(c client.Client) handler.EventHandler { return handler.EnqueueRequestsFromMapFunc(func(ctx context.Context, o client.Object) (requests []reconcile.Request) { @@ -89,6 +89,26 @@ func NodePoolEventHandler(c client.Client) handler.EventHandler { }) } +// NodeClassEventHandler is a watcher on v1beta1.NodeClaim that maps NodeClass to NodeClaims based +// on the nodeClassRef and enqueues reconcile.Requests for the NodeClaim +func NodeClassEventHandler(c client.Client) handler.EventHandler { + return handler.EnqueueRequestsFromMapFunc(func(ctx context.Context, o client.Object) (requests []reconcile.Request) { + nodeClaimList := &v1beta1.NodeClaimList{} + if err := c.List(ctx, nodeClaimList, client.MatchingFields{ + "spec.nodeClassRef.apiVersion": o.GetObjectKind().GroupVersionKind().GroupVersion().String(), + "spec.nodeClassRef.kind": o.GetObjectKind().GroupVersionKind().Kind, + "spec.nodeClassRef.name": o.GetName(), + }); err != nil { + return requests + } + return lo.Map(nodeClaimList.Items, func(n v1beta1.NodeClaim, _ int) reconcile.Request { + return reconcile.Request{ + NamespacedName: client.ObjectKeyFromObject(&n), + } + }) + }) +} + // NodeNotFoundError is an error returned when no v1.Nodes are found matching the passed providerID type NodeNotFoundError struct { ProviderID string