Skip to content

Commit 6642110

Browse files
authored
cherrypick(v0.32): feat: Add Versioned for NodePool Hash to Prevent Drift on NodePool CRD Upgrade (#1131)
1 parent 8861740 commit 6642110

File tree

8 files changed

+207
-7
lines changed

8 files changed

+207
-7
lines changed

pkg/apis/v1beta1/labels.go

+1
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ const (
4444
ProviderCompatabilityAnnotationKey = CompatabilityGroup + "/provider"
4545
ManagedByAnnotationKey = Group + "/managed-by"
4646
NodePoolHashAnnotationKey = Group + "/nodepool-hash"
47+
NodePoolHashVersionAnnotationKey = Group + "/nodepool-hash-version"
4748
)
4849

4950
// Karpenter specific finalizers

pkg/apis/v1beta1/nodepool.go

+6
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,12 @@ type NodePool struct {
144144
IsProvisioner bool `json:"-"`
145145
}
146146

147+
// We need to bump the NodePoolHashVersion when we make an update to the NodePool CRD under these conditions:
148+
// 1. A field changes its default value for an existing field that is already hashed
149+
// 2. A field is added to the hash calculation with an already-set value
150+
// 3. A field is removed from the hash calculations
151+
const NodePoolHashVersion = "v1"
152+
147153
func (in *NodePool) Hash() string {
148154
return fmt.Sprint(lo.Must(hashstructure.Hash(in.Spec.Template, hashstructure.FormatV2, &hashstructure.HashOptions{
149155
SlicesAsSets: true,

pkg/controllers/nodeclaim/disruption/drift.go

+14-1
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ func (d *Drift) isDrifted(ctx context.Context, nodePool *v1beta1.NodePool, nodeC
109109
return driftedReason, nil
110110
}
111111

112-
// Eligible fields for static drift are described in the docs
112+
// Eligible fields for drift are described in the docs
113113
// https://karpenter.sh/docs/concepts/deprovisioning/#drift
114114
func areStaticFieldsDrifted(nodePool *v1beta1.NodePool, nodeClaim *v1beta1.NodeClaim) cloudprovider.DriftReason {
115115
var ownerHashKey string
@@ -123,6 +123,19 @@ func areStaticFieldsDrifted(nodePool *v1beta1.NodePool, nodeClaim *v1beta1.NodeC
123123
if !foundHashNodePool || !foundHashNodeClaim {
124124
return ""
125125
}
126+
127+
if !nodePool.IsProvisioner {
128+
nodeClaimVersionHash, foundVersionHashNodeClaim := nodeClaim.Annotations[v1beta1.NodePoolHashVersionAnnotationKey]
129+
nodePoolVersionHash, foundVersionHashNodePool := nodePool.Annotations[v1beta1.NodePoolHashVersionAnnotationKey]
130+
if !foundVersionHashNodePool || !foundVersionHashNodeClaim {
131+
return ""
132+
}
133+
// validate that the version of the crd is the same
134+
if nodePoolVersionHash != nodeClaimVersionHash {
135+
return ""
136+
}
137+
}
138+
126139
if nodePoolHash != nodeClaimHash {
127140
if nodeClaim.IsMachine {
128141
return ProvisionerDrifted

pkg/controllers/nodeclaim/disruption/nodeclaim_drift_test.go

+23-3
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,12 @@ var _ = Describe("NodeClaim/Drift", func() {
120120
It("should detect static drift before cloud provider drift", func() {
121121
cp.Drifted = "drifted"
122122
nodePool.Annotations = lo.Assign(nodePool.Annotations, map[string]string{
123-
v1beta1.NodePoolHashAnnotationKey: "123456789",
123+
v1beta1.NodePoolHashAnnotationKey: "test-123456789",
124+
v1beta1.NodePoolHashVersionAnnotationKey: v1beta1.NodePoolHashVersion,
125+
})
126+
nodeClaim.Annotations = lo.Assign(nodeClaim.Annotations, map[string]string{
127+
v1beta1.NodePoolHashAnnotationKey: "test-123",
128+
v1beta1.NodePoolHashVersionAnnotationKey: v1beta1.NodePoolHashVersion,
124129
})
125130
ExpectApplied(ctx, env.Client, nodePool, nodeClaim)
126131
ExpectReconcileSucceeded(ctx, nodeClaimDisruptionController, client.ObjectKeyFromObject(nodeClaim))
@@ -425,8 +430,9 @@ var _ = Describe("NodeClaim/Drift", func() {
425430
Template: v1beta1.NodeClaimTemplate{
426431
ObjectMeta: v1beta1.ObjectMeta{
427432
Annotations: map[string]string{
428-
"keyAnnotation": "valueAnnotation",
429-
"keyAnnotation2": "valueAnnotation2",
433+
"keyAnnotation": "valueAnnotation",
434+
"keyAnnotation2": "valueAnnotation2",
435+
v1beta1.NodePoolHashVersionAnnotationKey: v1beta1.NodePoolHashVersion,
430436
},
431437
Labels: map[string]string{
432438
"keyLabel": "valueLabel",
@@ -493,5 +499,19 @@ var _ = Describe("NodeClaim/Drift", func() {
493499
nodeClaim = ExpectExists(ctx, env.Client, nodeClaim)
494500
Expect(nodeClaim.StatusConditions().GetCondition(v1beta1.Drifted)).To(BeNil())
495501
})
502+
It("should not return drifted if the NodeClaim's karpenter.sh/nodepool-hash-version annotation does not match the NodePool's", func() {
503+
nodePool.ObjectMeta.Annotations = map[string]string{
504+
v1beta1.NodePoolHashAnnotationKey: "test-hash-1",
505+
v1beta1.NodePoolHashVersionAnnotationKey: "test-version-1",
506+
}
507+
nodeClaim.ObjectMeta.Annotations = map[string]string{
508+
v1beta1.NodePoolHashAnnotationKey: "test-hash-2",
509+
v1beta1.NodePoolHashVersionAnnotationKey: "test-version-2",
510+
}
511+
ExpectApplied(ctx, env.Client, nodePool, nodeClaim)
512+
ExpectReconcileSucceeded(ctx, nodeClaimDisruptionController, client.ObjectKeyFromObject(nodeClaim))
513+
nodeClaim = ExpectExists(ctx, env.Client, nodeClaim)
514+
Expect(nodeClaim.StatusConditions().GetCondition(v1beta1.Drifted)).To(BeNil())
515+
})
496516
})
497517
})

pkg/controllers/nodepool/hash/controller.go

+49
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import (
1818
"context"
1919

2020
"github.com/samber/lo"
21+
"go.uber.org/multierr"
2122
"k8s.io/apimachinery/pkg/api/equality"
2223
controllerruntime "sigs.k8s.io/controller-runtime"
2324
"sigs.k8s.io/controller-runtime/pkg/client"
@@ -47,6 +48,12 @@ func NewController(kubeClient client.Client) *Controller {
4748
// Reconcile the resource
4849
func (c *Controller) Reconcile(ctx context.Context, np *v1beta1.NodePool) (reconcile.Result, error) {
4950
stored := np.DeepCopy()
51+
52+
if np.Annotations[v1beta1.NodePoolHashVersionAnnotationKey] != v1beta1.NodePoolHashVersion {
53+
if err := c.updateNodeClaimHash(ctx, np); err != nil {
54+
return reconcile.Result{}, err
55+
}
56+
}
5057
np.Annotations = lo.Assign(np.Annotations, nodepoolutil.HashAnnotation(np))
5158

5259
if !equality.Semantic.DeepEqual(stored, np) {
@@ -107,3 +114,45 @@ func (c *NodePoolController) Builder(_ context.Context, m manager.Manager) corec
107114
WithOptions(controller.Options{MaxConcurrentReconciles: 10}),
108115
)
109116
}
117+
118+
// Updating `nodepool-hash-version` annotation inside the controller means a breaking change has made to the hash function calculating
119+
// `nodepool-hash` on both the NodePool and NodeClaim, automatically making the `nodepool-hash` on the NodeClaim different from
120+
// NodePool. Since we can not rely on the hash on the NodeClaims, we will need to re-calculate the hash and update the annotation.
121+
// Look at designs/drift-hash-version.md for more information.
122+
func (c *Controller) updateNodeClaimHash(ctx context.Context, np *v1beta1.NodePool) error {
123+
if np.IsProvisioner {
124+
return nil
125+
}
126+
ncList := &v1beta1.NodeClaimList{}
127+
if err := c.kubeClient.List(ctx, ncList, client.MatchingLabels(map[string]string{v1beta1.NodePoolLabelKey: np.Name})); err != nil {
128+
return err
129+
}
130+
131+
errs := make([]error, len(ncList.Items))
132+
for i := range ncList.Items {
133+
nc := ncList.Items[i]
134+
stored := nc.DeepCopy()
135+
136+
if nc.Annotations[v1beta1.NodePoolHashVersionAnnotationKey] != v1beta1.NodePoolHashVersion {
137+
nc.Annotations = lo.Assign(nc.Annotations, map[string]string{
138+
v1beta1.NodePoolHashVersionAnnotationKey: v1beta1.NodePoolHashVersion,
139+
})
140+
141+
// Any NodeClaim that is already drifted will remain drifted if the karpenter.sh/nodepool-hash-version doesn't match
142+
// Since the hashing mechanism has changed we will not be able to determine if the drifted status of the node has changed
143+
if nc.StatusConditions().GetCondition(v1beta1.Drifted) == nil {
144+
nc.Annotations = lo.Assign(nc.Annotations, map[string]string{
145+
v1beta1.NodePoolHashAnnotationKey: np.Hash(),
146+
})
147+
}
148+
149+
if !equality.Semantic.DeepEqual(stored, nc) {
150+
if err := c.kubeClient.Patch(ctx, &nc, client.MergeFrom(stored)); err != nil {
151+
errs[i] = client.IgnoreNotFound(err)
152+
}
153+
}
154+
}
155+
}
156+
157+
return multierr.Combine(errs...)
158+
}

pkg/controllers/nodepool/hash/nodepool_test.go

+105
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ import (
2828
"github.com/aws/karpenter-core/pkg/apis/v1beta1"
2929
"github.com/aws/karpenter-core/pkg/test"
3030
. "github.com/aws/karpenter-core/pkg/test/expectations"
31+
32+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3133
)
3234

3335
var _ = Describe("NodePool Static Drift Hash", func() {
@@ -105,4 +107,107 @@ var _ = Describe("NodePool Static Drift Hash", func() {
105107

106108
Expect(nodePool.Annotations).To(HaveKeyWithValue(v1beta1.NodePoolHashAnnotationKey, expectedHash))
107109
})
110+
It("should update nodepool hash version when the nodepool hash version is out of sync with the controller hash version", func() {
111+
nodePool.Annotations = map[string]string{
112+
v1beta1.NodePoolHashAnnotationKey: "abceduefed",
113+
v1beta1.NodePoolHashVersionAnnotationKey: "test",
114+
}
115+
ExpectApplied(ctx, env.Client, nodePool)
116+
117+
ExpectReconcileSucceeded(ctx, nodePoolController, client.ObjectKeyFromObject(nodePool))
118+
nodePool = ExpectExists(ctx, env.Client, nodePool)
119+
120+
expectedHash := nodePool.Hash()
121+
Expect(nodePool.Annotations).To(HaveKeyWithValue(v1beta1.NodePoolHashAnnotationKey, expectedHash))
122+
Expect(nodePool.Annotations).To(HaveKeyWithValue(v1beta1.NodePoolHashVersionAnnotationKey, v1beta1.NodePoolHashVersion))
123+
})
124+
It("should update nodepool hash versions on all nodeclaims when the hash versions don't match the controller hash version", func() {
125+
nodePool.Annotations = map[string]string{
126+
v1beta1.NodePoolHashAnnotationKey: "abceduefed",
127+
v1beta1.NodePoolHashVersionAnnotationKey: "test",
128+
}
129+
nodeClaimOne := test.NodeClaim(v1beta1.NodeClaim{
130+
ObjectMeta: metav1.ObjectMeta{
131+
Labels: map[string]string{v1beta1.NodePoolLabelKey: nodePool.Name},
132+
Annotations: map[string]string{
133+
v1beta1.NodePoolHashAnnotationKey: "123456",
134+
v1beta1.NodePoolHashVersionAnnotationKey: "test",
135+
},
136+
},
137+
})
138+
nodeClaimTwo := test.NodeClaim(v1beta1.NodeClaim{
139+
ObjectMeta: metav1.ObjectMeta{
140+
Labels: map[string]string{v1beta1.NodePoolLabelKey: nodePool.Name},
141+
Annotations: map[string]string{
142+
v1beta1.NodePoolHashAnnotationKey: "123456",
143+
v1beta1.NodePoolHashVersionAnnotationKey: "test",
144+
},
145+
},
146+
})
147+
148+
ExpectApplied(ctx, env.Client, nodePool, nodeClaimOne, nodeClaimTwo)
149+
150+
ExpectReconcileSucceeded(ctx, nodePoolController, client.ObjectKeyFromObject(nodePool))
151+
nodePool = ExpectExists(ctx, env.Client, nodePool)
152+
nodeClaimOne = ExpectExists(ctx, env.Client, nodeClaimOne)
153+
nodeClaimTwo = ExpectExists(ctx, env.Client, nodeClaimTwo)
154+
155+
expectedHash := nodePool.Hash()
156+
Expect(nodeClaimOne.Annotations).To(HaveKeyWithValue(v1beta1.NodePoolHashAnnotationKey, expectedHash))
157+
Expect(nodeClaimOne.Annotations).To(HaveKeyWithValue(v1beta1.NodePoolHashVersionAnnotationKey, v1beta1.NodePoolHashVersion))
158+
Expect(nodeClaimTwo.Annotations).To(HaveKeyWithValue(v1beta1.NodePoolHashAnnotationKey, expectedHash))
159+
Expect(nodeClaimTwo.Annotations).To(HaveKeyWithValue(v1beta1.NodePoolHashVersionAnnotationKey, v1beta1.NodePoolHashVersion))
160+
})
161+
It("should not update nodepool hash on all nodeclaims when the hash versions match the controller hash version", func() {
162+
nodePool.Annotations = map[string]string{
163+
v1beta1.NodePoolHashAnnotationKey: "abceduefed",
164+
v1beta1.NodePoolHashVersionAnnotationKey: "test-version",
165+
}
166+
nodeClaim := test.NodeClaim(v1beta1.NodeClaim{
167+
ObjectMeta: metav1.ObjectMeta{
168+
Labels: map[string]string{v1beta1.NodePoolLabelKey: nodePool.Name},
169+
Annotations: map[string]string{
170+
v1beta1.NodePoolHashAnnotationKey: "1234564654",
171+
v1beta1.NodePoolHashVersionAnnotationKey: v1beta1.NodePoolHashVersion,
172+
},
173+
},
174+
})
175+
ExpectApplied(ctx, env.Client, nodePool, nodeClaim)
176+
177+
ExpectReconcileSucceeded(ctx, nodePoolController, client.ObjectKeyFromObject(nodePool))
178+
nodePool = ExpectExists(ctx, env.Client, nodePool)
179+
nodeClaim = ExpectExists(ctx, env.Client, nodeClaim)
180+
181+
expectedHash := nodePool.Hash()
182+
183+
// Expect NodeClaims to have been updated to the original hash
184+
Expect(nodePool.Annotations).To(HaveKeyWithValue(v1beta1.NodePoolHashAnnotationKey, expectedHash))
185+
Expect(nodePool.Annotations).To(HaveKeyWithValue(v1beta1.NodePoolHashVersionAnnotationKey, v1beta1.NodePoolHashVersion))
186+
Expect(nodeClaim.Annotations).To(HaveKeyWithValue(v1beta1.NodePoolHashAnnotationKey, "1234564654"))
187+
Expect(nodeClaim.Annotations).To(HaveKeyWithValue(v1beta1.NodePoolHashVersionAnnotationKey, v1beta1.NodePoolHashVersion))
188+
})
189+
It("should not update nodepool hash on the nodeclaim if it's drifted", func() {
190+
nodePool.Annotations = map[string]string{
191+
v1beta1.NodePoolHashAnnotationKey: "abceduefed",
192+
v1beta1.NodePoolHashVersionAnnotationKey: "test",
193+
}
194+
nodeClaim := test.NodeClaim(v1beta1.NodeClaim{
195+
ObjectMeta: metav1.ObjectMeta{
196+
Labels: map[string]string{v1beta1.NodePoolLabelKey: nodePool.Name},
197+
Annotations: map[string]string{
198+
v1beta1.NodePoolHashAnnotationKey: "123456",
199+
v1beta1.NodePoolHashVersionAnnotationKey: "test",
200+
},
201+
},
202+
})
203+
nodeClaim.StatusConditions().MarkTrue(v1beta1.Drifted)
204+
ExpectApplied(ctx, env.Client, nodePool, nodeClaim)
205+
206+
ExpectReconcileSucceeded(ctx, nodePoolController, client.ObjectKeyFromObject(nodePool))
207+
nodeClaim = ExpectExists(ctx, env.Client, nodeClaim)
208+
209+
// Expect NodeClaims hash to not have been updated
210+
Expect(nodeClaim.Annotations).To(HaveKeyWithValue(v1beta1.NodePoolHashAnnotationKey, "123456"))
211+
Expect(nodeClaim.Annotations).To(HaveKeyWithValue(v1beta1.NodePoolHashVersionAnnotationKey, v1beta1.NodePoolHashVersion))
212+
})
108213
})

pkg/controllers/provisioning/scheduling/nodeclaimtemplate.go

+5-2
Original file line numberDiff line numberDiff line change
@@ -70,8 +70,11 @@ func (i *NodeClaimTemplate) ToNodeClaim(nodePool *v1beta1.NodePool) *v1beta1.Nod
7070
nc := &v1beta1.NodeClaim{
7171
ObjectMeta: metav1.ObjectMeta{
7272
GenerateName: fmt.Sprintf("%s-", i.OwnerKey.Name),
73-
Annotations: lo.Assign(i.Annotations, map[string]string{v1beta1.NodePoolHashAnnotationKey: nodePool.Hash()}),
74-
Labels: i.Labels,
73+
Annotations: lo.Assign(i.Annotations, map[string]string{
74+
v1beta1.NodePoolHashAnnotationKey: nodePool.Hash(),
75+
v1beta1.NodePoolHashVersionAnnotationKey: v1beta1.NodePoolHashVersion,
76+
}),
77+
Labels: i.Labels,
7578
OwnerReferences: []metav1.OwnerReference{
7679
{
7780
APIVersion: v1beta1.SchemeGroupVersion.String(),

pkg/utils/nodepool/nodepool.go

+4-1
Original file line numberDiff line numberDiff line change
@@ -159,5 +159,8 @@ func HashAnnotation(nodePool *v1beta1.NodePool) map[string]string {
159159
provisioner := provisionerutil.New(nodePool)
160160
return map[string]string{v1alpha5.ProvisionerHashAnnotationKey: provisioner.Hash()}
161161
}
162-
return map[string]string{v1beta1.NodePoolHashAnnotationKey: nodePool.Hash()}
162+
return map[string]string{
163+
v1beta1.NodePoolHashAnnotationKey: nodePool.Hash(),
164+
v1beta1.NodePoolHashVersionAnnotationKey: v1beta1.NodePoolHashVersion,
165+
}
163166
}

0 commit comments

Comments
 (0)