Skip to content

Commit aaf6b9a

Browse files
authored
fix: don't drift when launched into open ODCR (#7867)
1 parent 62f49f3 commit aaf6b9a

File tree

8 files changed

+58
-11
lines changed

8 files changed

+58
-11
lines changed

pkg/cloudprovider/cloudprovider.go

+6-1
Original file line numberDiff line numberDiff line change
@@ -385,7 +385,12 @@ func (c *CloudProvider) instanceToNodeClaim(i *instance.Instance, instanceType *
385385

386386
if instanceType != nil {
387387
for key, req := range instanceType.Requirements {
388-
if req.Len() == 1 {
388+
// We only want to add a label based on the instance type requirements if there is a single value for that
389+
// requirement. For example, we can't add a label for zone based on this if the requirement is compatible with
390+
// three. Capacity reservation IDs are a special case since we don't have a way to represent that the label may or
391+
// may not exist. Since this requirement will be present regardless of the capacity type, we can't insert it here.
392+
// Otherwise, you may end up with spot and on-demand NodeClaims with a reservation ID label.
393+
if req.Len() == 1 && req.Key != cloudprovider.ReservationIDLabel {
389394
labels[key] = req.Values()[0]
390395
}
391396
}

pkg/controllers/nodeclaim/capacityreservation/suite_test.go

+3
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@ import (
4242
. "sigs.k8s.io/karpenter/pkg/test/expectations"
4343
"sigs.k8s.io/karpenter/pkg/test/v1alpha1"
4444
. "sigs.k8s.io/karpenter/pkg/utils/testing"
45+
46+
coreoptions "sigs.k8s.io/karpenter/pkg/operator/options"
4547
)
4648

4749
var ctx context.Context
@@ -59,6 +61,7 @@ func TestAWS(t *testing.T) {
5961
var _ = BeforeSuite(func() {
6062
env = coretest.NewEnvironment(coretest.WithCRDs(apis.CRDs...), coretest.WithCRDs(v1alpha1.CRDs...), coretest.WithFieldIndexers(coretest.NodeProviderIDFieldIndexer(ctx)))
6163
ctx = options.ToContext(ctx, test.Options())
64+
ctx = coreoptions.ToContext(ctx, coretest.Options(coretest.OptionsFields{FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(true)}}))
6265
ctx, stop = context.WithCancel(ctx)
6366
awsEnv = test.NewEnvironment(ctx, env)
6467

pkg/controllers/nodeclaim/garbagecollection/suite_test.go

+3
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ import (
4646
. "github.com/onsi/gomega"
4747
. "sigs.k8s.io/karpenter/pkg/test/expectations"
4848
. "sigs.k8s.io/karpenter/pkg/utils/testing"
49+
50+
coreoptions "sigs.k8s.io/karpenter/pkg/operator/options"
4951
)
5052

5153
var ctx context.Context
@@ -63,6 +65,7 @@ func TestAPIs(t *testing.T) {
6365
var _ = BeforeSuite(func() {
6466
ctx = options.ToContext(ctx, test.Options())
6567
env = coretest.NewEnvironment(coretest.WithCRDs(apis.CRDs...), coretest.WithCRDs(v1alpha1.CRDs...))
68+
ctx = coreoptions.ToContext(ctx, coretest.Options(coretest.OptionsFields{FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(true)}}))
6669
awsEnv = test.NewEnvironment(ctx, env)
6770
cloudProvider = cloudprovider.New(awsEnv.InstanceTypesProvider, awsEnv.InstanceProvider, events.NewRecorder(&record.FakeRecorder{}),
6871
env.Client, awsEnv.AMIProvider, awsEnv.SecurityGroupProvider, awsEnv.CapacityReservationProvider)

pkg/controllers/nodeclaim/tagging/suite_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,7 @@ var _ = Describe("TaggingController", func() {
219219
v1.EKSClusterNameTagKey: options.FromContext(ctx).ClusterName,
220220
}
221221
ec2Instance := lo.Must(awsEnv.EC2API.Instances.Load(*ec2Instance.InstanceId)).(ec2types.Instance)
222-
instanceTags := instance.NewInstance(ec2Instance).Tags
222+
instanceTags := instance.NewInstance(ctx, ec2Instance).Tags
223223

224224
for tag, value := range expectedTags {
225225
if lo.Contains(customTags, tag) {

pkg/providers/instance/instance.go

+4-4
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ func (p *DefaultProvider) Get(ctx context.Context, id string) (*Instance, error)
164164
if err != nil {
165165
return nil, fmt.Errorf("failed to describe ec2 instances, %w", err)
166166
}
167-
instances, err := instancesFromOutput(out)
167+
instances, err := instancesFromOutput(ctx, out)
168168
if err != nil {
169169
return nil, fmt.Errorf("getting instances from output, %w", err)
170170
}
@@ -202,7 +202,7 @@ func (p *DefaultProvider) List(ctx context.Context) ([]*Instance, error) {
202202
}
203203
out.Reservations = append(out.Reservations, page.Reservations...)
204204
}
205-
instances, err := instancesFromOutput(out)
205+
instances, err := instancesFromOutput(ctx, out)
206206
return instances, cloudprovider.IgnoreNodeClaimNotFoundError(err)
207207
}
208208

@@ -612,7 +612,7 @@ func filterExoticInstanceTypes(instanceTypes []*cloudprovider.InstanceType) []*c
612612
return instanceTypes
613613
}
614614

615-
func instancesFromOutput(out *ec2.DescribeInstancesOutput) ([]*Instance, error) {
615+
func instancesFromOutput(ctx context.Context, out *ec2.DescribeInstancesOutput) ([]*Instance, error) {
616616
if len(out.Reservations) == 0 {
617617
return nil, cloudprovider.NewNodeClaimNotFoundError(fmt.Errorf("instance not found"))
618618
}
@@ -626,7 +626,7 @@ func instancesFromOutput(out *ec2.DescribeInstancesOutput) ([]*Instance, error)
626626
sort.Slice(instances, func(i, j int) bool {
627627
return aws.ToString(instances[i].InstanceId) < aws.ToString(instances[j].InstanceId)
628628
})
629-
return lo.Map(instances, func(i ec2types.Instance, _ int) *Instance { return NewInstance(i) }), nil
629+
return lo.Map(instances, func(i ec2types.Instance, _ int) *Instance { return NewInstance(ctx, i) }), nil
630630
}
631631

632632
func combineFleetErrors(fleetErrs []ec2types.CreateFleetError) (errs error) {

pkg/providers/instance/suite_test.go

+24
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,30 @@ var _ = Describe("InstanceProvider", func() {
256256
Expect(createFleetInput.LaunchTemplateConfigs).To(HaveLen(1))
257257
Expect(createFleetInput.LaunchTemplateConfigs[0].Overrides).To(HaveLen(1))
258258
})
259+
It("should treat instances which launched into open ODCRs as on-demand when the ReservedCapacity gate is disabled", func() {
260+
id := fake.InstanceID()
261+
awsEnv.EC2API.DescribeInstancesBehavior.Output.Set(&ec2.DescribeInstancesOutput{
262+
Reservations: []ec2types.Reservation{{
263+
Instances: []ec2types.Instance{{
264+
State: &ec2types.InstanceState{Name: ec2types.InstanceStateNameRunning},
265+
PrivateDnsName: lo.ToPtr(fake.PrivateDNSName()),
266+
Placement: &ec2types.Placement{AvailabilityZone: lo.ToPtr(fake.DefaultRegion)},
267+
LaunchTime: lo.ToPtr(time.Now().Add(-time.Minute)),
268+
InstanceId: &id,
269+
InstanceType: "m5.large",
270+
CapacityReservationId: lo.ToPtr("cr-foo"),
271+
}},
272+
}},
273+
})
274+
275+
coreoptions.FromContext(ctx).FeatureGates.ReservedCapacity = false
276+
nodeClaims, err := cloudProvider.List(ctx)
277+
Expect(err).ToNot(HaveOccurred())
278+
Expect(nodeClaims).To(HaveLen(1))
279+
Expect(nodeClaims[0].Status.ProviderID).To(ContainSubstring(id))
280+
Expect(nodeClaims[0].Labels).To(HaveKeyWithValue(karpv1.CapacityTypeLabelKey, karpv1.CapacityTypeOnDemand))
281+
Expect(nodeClaims[0].Labels).ToNot(HaveKey(v1.LabelCapacityReservationID))
282+
})
259283
It("should return all NodePool-owned instances from List", func() {
260284
ids := sets.New[string]()
261285
// Provision instances that have the karpenter.sh/nodepool key

pkg/providers/instance/types.go

+12-3
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,14 @@ limitations under the License.
1515
package instance
1616

1717
import (
18+
"context"
1819
"time"
1920

2021
ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types"
2122
"github.com/samber/lo"
2223

2324
karpv1 "sigs.k8s.io/karpenter/pkg/apis/v1"
25+
"sigs.k8s.io/karpenter/pkg/operator/options"
2426
)
2527

2628
// Instance is an internal data representation of either an ec2.Instance or an ec2.FleetInstance
@@ -40,18 +42,25 @@ type Instance struct {
4042
EFAEnabled bool
4143
}
4244

43-
func NewInstance(out ec2types.Instance) *Instance {
45+
func NewInstance(ctx context.Context, out ec2types.Instance) *Instance {
4446
return &Instance{
4547
LaunchTime: lo.FromPtr(out.LaunchTime),
4648
State: out.State.Name,
4749
ID: lo.FromPtr(out.InstanceId),
4850
ImageID: lo.FromPtr(out.ImageId),
4951
Type: out.InstanceType,
5052
Zone: lo.FromPtr(out.Placement.AvailabilityZone),
53+
// NOTE: Only set the capacity type to reserved and assign a reservation ID if the feature gate is enabled. It's
54+
// possible for these to be set if the instance launched into an open ODCR, but treating it as reserved would induce
55+
// drift.
5156
CapacityType: lo.If(out.SpotInstanceRequestId != nil, karpv1.CapacityTypeSpot).
52-
ElseIf(out.CapacityReservationId != nil, karpv1.CapacityTypeReserved).
57+
ElseIf(out.CapacityReservationId != nil && options.FromContext(ctx).FeatureGates.ReservedCapacity, karpv1.CapacityTypeReserved).
5358
Else(karpv1.CapacityTypeOnDemand),
54-
CapacityReservationID: lo.FromPtr(out.CapacityReservationId),
59+
CapacityReservationID: lo.Ternary(
60+
options.FromContext(ctx).FeatureGates.ReservedCapacity,
61+
lo.FromPtr(out.CapacityReservationId),
62+
"",
63+
),
5564
SecurityGroupIDs: lo.Map(out.SecurityGroups, func(securitygroup ec2types.GroupIdentifier, _ int) string {
5665
return lo.FromPtr(securitygroup.GroupId)
5766
}),

test/suites/integration/tags_test.go

+5-2
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ import (
2929
corev1 "k8s.io/api/core/v1"
3030
"sigs.k8s.io/controller-runtime/pkg/client"
3131
karpv1 "sigs.k8s.io/karpenter/pkg/apis/v1"
32+
"sigs.k8s.io/karpenter/pkg/operator/options"
3233
coretest "sigs.k8s.io/karpenter/pkg/test"
3334

3435
v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1"
@@ -110,7 +111,8 @@ var _ = Describe("Tags", func() {
110111
g.Expect(nodeClaim.Annotations).To(HaveKeyWithValue(v1.AnnotationClusterNameTaggedCompatability, "true"))
111112
}, time.Minute).Should(Succeed())
112113

113-
nodeInstance := instance.NewInstance(env.GetInstance(node.Name))
114+
ctx := options.ToContext(env.Context, &options.Options{})
115+
nodeInstance := instance.NewInstance(ctx, env.GetInstance(node.Name))
114116
Expect(nodeInstance.Tags).To(HaveKeyWithValue("Name", node.Name))
115117
Expect(nodeInstance.Tags).To(HaveKeyWithValue("karpenter.sh/nodeclaim", nodeClaim.Name))
116118
Expect(nodeInstance.Tags).To(HaveKeyWithValue("eks:eks-cluster-name", env.ClusterName))
@@ -147,7 +149,8 @@ var _ = Describe("Tags", func() {
147149
g.Expect(nodeClaim.Annotations).To(HaveKeyWithValue(v1.AnnotationClusterNameTaggedCompatability, "true"))
148150
}, time.Minute).Should(Succeed())
149151

150-
nodeInstance := instance.NewInstance(env.GetInstance(node.Name))
152+
ctx := options.ToContext(env.Context, &options.Options{})
153+
nodeInstance := instance.NewInstance(ctx, env.GetInstance(node.Name))
151154
Expect(nodeInstance.Tags).To(HaveKeyWithValue("Name", "custom-name"))
152155
Expect(nodeInstance.Tags).To(HaveKeyWithValue("karpenter.sh/nodeclaim", nodeClaim.Name))
153156
Expect(nodeInstance.Tags).To(HaveKeyWithValue("eks:eks-cluster-name", env.ClusterName))

0 commit comments

Comments
 (0)