Skip to content

Commit 1b57177

Browse files
authored
Use strict match with MapToInstanceTypes (#7923)
1 parent 3136d9e commit 1b57177

File tree

4 files changed

+106
-6
lines changed

4 files changed

+106
-6
lines changed

pkg/controllers/nodeclass/validation.go

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -331,12 +331,20 @@ func (v *Validation) mockOptions(ctx context.Context, nodeClaim *karpv1.NodeClai
331331
}
332332
return v.amiResolver.Resolve(nodeClass, nodeClaim, []*cloudprovider.InstanceType{
333333
{
334-
Name: "m5.large",
335-
Requirements: scheduling.NewRequirements(scheduling.NewRequirement(corev1.LabelArchStable, corev1.NodeSelectorOpIn, karpv1.ArchitectureAmd64)),
334+
Name: "m5.large",
335+
Requirements: scheduling.NewRequirements(
336+
scheduling.NewRequirement(corev1.LabelArchStable, corev1.NodeSelectorOpIn, karpv1.ArchitectureAmd64),
337+
scheduling.NewRequirement(corev1.LabelOSStable, corev1.NodeSelectorOpExists),
338+
scheduling.NewRequirement(corev1.LabelWindowsBuild, corev1.NodeSelectorOpExists),
339+
),
336340
},
337341
{
338-
Name: "m6g.large",
339-
Requirements: scheduling.NewRequirements(scheduling.NewRequirement(corev1.LabelArchStable, corev1.NodeSelectorOpIn, karpv1.ArchitectureArm64)),
342+
Name: "m6g.large",
343+
Requirements: scheduling.NewRequirements(
344+
scheduling.NewRequirement(corev1.LabelArchStable, corev1.NodeSelectorOpIn, karpv1.ArchitectureArm64),
345+
scheduling.NewRequirement(corev1.LabelOSStable, corev1.NodeSelectorOpExists),
346+
scheduling.NewRequirement(corev1.LabelWindowsBuild, corev1.NodeSelectorOpExists),
347+
),
340348
},
341349
}, karpv1.CapacityTypeOnDemand, amiOptions)
342350
}

pkg/controllers/providers/instancetype/capacity/suite_test.go

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,9 @@ var nodeClass *v1.EC2NodeClass
6262
var nodeClaim *karpv1.NodeClaim
6363
var node *corev1.Node
6464

65+
var standardAMI v1.AMI
66+
var nvidiaAMI v1.AMI
67+
6568
func TestAWS(t *testing.T) {
6669
ctx = TestContextWithLogger(t)
6770
RegisterFailHandler(Fail)
@@ -101,6 +104,37 @@ var _ = BeforeEach(func() {
101104
})
102105
Expect(awsEnv.InstanceTypesProvider.UpdateInstanceTypes(ctx)).To(Succeed())
103106
Expect(awsEnv.InstanceTypesProvider.UpdateInstanceTypeOfferings(ctx)).To(Succeed())
107+
standardAMI = v1.AMI{
108+
Name: "standard-ami",
109+
ID: "ami-standard-test",
110+
Requirements: []corev1.NodeSelectorRequirement{
111+
{
112+
Key: corev1.LabelArchStable,
113+
Operator: corev1.NodeSelectorOpIn,
114+
Values: []string{karpv1.ArchitectureAmd64},
115+
},
116+
{
117+
Key: v1.LabelInstanceGPUCount,
118+
Operator: corev1.NodeSelectorOpDoesNotExist,
119+
},
120+
},
121+
}
122+
nvidiaAMI = v1.AMI{
123+
Name: "nvidia-ami",
124+
ID: "ami-nvidia-test",
125+
Requirements: []corev1.NodeSelectorRequirement{
126+
{
127+
Key: corev1.LabelArchStable,
128+
Operator: corev1.NodeSelectorOpIn,
129+
Values: []string{karpv1.ArchitectureAmd64},
130+
},
131+
{
132+
Key: v1.LabelInstanceGPUCount,
133+
Operator: corev1.NodeSelectorOpExists,
134+
},
135+
},
136+
}
137+
nodeClass.Status.AMIs = []v1.AMI{standardAMI, nvidiaAMI}
104138
})
105139

106140
var _ = AfterEach(func() {
@@ -118,6 +152,7 @@ var _ = Describe("CapacityCache", func() {
118152
corev1.LabelInstanceTypeStable: "t3.medium",
119153
karpv1.NodeRegisteredLabelKey: "true",
120154
"karpenter.k8s.aws/ec2nodeclass": nodeClass.Name,
155+
corev1.LabelArchStable: karpv1.ArchitectureAmd64,
121156
},
122157
},
123158
Capacity: corev1.ResourceList{
@@ -174,4 +209,62 @@ var _ = Describe("CapacityCache", func() {
174209
mem.Sub(resource.MustParse(fmt.Sprintf("%dMi", int64(math.Ceil(float64(mem.Value())*options.FromContext(ctx).VMMemoryOverheadPercent/1024/1024)))))
175210
Expect(i.Capacity.Memory().Value()).To(Equal(mem.Value()), "Expected capacity to match VMMemoryOverheadPercent calculation")
176211
})
212+
213+
It("should properly update discovered capacity when matching AMI is not the first in the list", func() {
214+
// Update nodeClass AMIs for this test
215+
nodeClass.Status.AMIs = []v1.AMI{standardAMI}
216+
ExpectApplied(ctx, env.Client, nodeClass)
217+
218+
// Get available instance types from the test environment
219+
availableInstanceTypes, err := awsEnv.InstanceTypesProvider.List(ctx, nodeClass)
220+
Expect(err).To(BeNil())
221+
Expect(availableInstanceTypes).ToNot(BeEmpty(), "No instance types available in test environment")
222+
223+
// Choose the first instance type for testing
224+
testInstanceType := availableInstanceTypes[0]
225+
instanceTypeName := testInstanceType.Name
226+
227+
// Create a test node with the discovered instance type
228+
memoryCapacity := resource.MustParse("4Gi")
229+
230+
testNodeClassNvidiaFirst := test.EC2NodeClass()
231+
testNodeClassNvidiaFirst.Status.AMIs = []v1.AMI{nvidiaAMI, standardAMI}
232+
ExpectApplied(ctx, env.Client, testNodeClassNvidiaFirst)
233+
234+
testNodeClaim := coretest.NodeClaim()
235+
testNodeClaim.Spec.NodeClassRef = &karpv1.NodeClassReference{
236+
Group: object.GVK(testNodeClassNvidiaFirst).Group,
237+
Kind: object.GVK(testNodeClassNvidiaFirst).Kind,
238+
Name: testNodeClassNvidiaFirst.Name,
239+
}
240+
testNodeClaim.Status.Capacity = corev1.ResourceList{
241+
corev1.ResourceMemory: memoryCapacity,
242+
}
243+
testNodeClaim.Labels[corev1.LabelInstanceTypeStable] = instanceTypeName
244+
testNodeClaim.Labels[corev1.LabelArchStable] = karpv1.ArchitectureAmd64
245+
testNodeClaim.Status.ImageID = "ami-standard-test"
246+
ExpectApplied(ctx, env.Client, testNodeClaim)
247+
248+
testNode := coretest.NodeClaimLinkedNode(testNodeClaim)
249+
ExpectApplied(ctx, env.Client, testNode)
250+
251+
ExpectObjectReconciled(ctx, env.Client, controller, testNode)
252+
253+
// Verify that the cache was updated by getting the instance types and checking the memory capacity
254+
instanceTypesAfterUpdateReversed, err := awsEnv.InstanceTypesProvider.List(ctx, testNodeClassNvidiaFirst)
255+
Expect(err).To(BeNil())
256+
257+
// Find our instance type and verify its memory capacity was updated
258+
found := false
259+
for _, it := range instanceTypesAfterUpdateReversed {
260+
if it.Name == instanceTypeName {
261+
found = true
262+
// Memory capacity should now match what we set on the node
263+
memValue := it.Capacity.Memory().Value()
264+
Expect(memValue).To(Equal(memoryCapacity.Value()))
265+
break
266+
}
267+
}
268+
Expect(found).To(BeTrue())
269+
})
177270
})

pkg/providers/amifamily/ami.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,6 @@ func MapToInstanceTypes(instanceTypes []*cloudprovider.InstanceType, amis []v1.A
194194
for _, ami := range amis {
195195
if err := instanceType.Requirements.Compatible(
196196
scheduling.NewNodeSelectorRequirements(ami.Requirements...),
197-
scheduling.AllowUndefinedWellKnownLabels,
198197
); err == nil {
199198
amiIDs[ami.ID] = append(amiIDs[ami.ID], instanceType)
200199
break

pkg/providers/instancetype/instancetype.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -285,7 +285,7 @@ func (p *DefaultProvider) UpdateInstanceTypeCapacityFromNode(ctx context.Context
285285
instanceTypeName := node.Labels[corev1.LabelInstanceTypeStable]
286286
amiMap := amifamily.MapToInstanceTypes([]*cloudprovider.InstanceType{{
287287
Name: instanceTypeName,
288-
Requirements: scheduling.NewNodeSelectorRequirementsWithMinValues(nodeClaim.Spec.Requirements...),
288+
Requirements: scheduling.NewLabelRequirements(node.Labels),
289289
}}, nodeClass.Status.AMIs)
290290
// Ensure NodeClaim AMI is current
291291
if !lo.ContainsBy(amiMap[nodeClaim.Status.ImageID], func(i *cloudprovider.InstanceType) bool {

0 commit comments

Comments
 (0)