Skip to content

Commit eff767c

Browse files
authored
feat: on-demand capacity reservation support (#7726)
1 parent 974a323 commit eff767c

File tree

81 files changed

+3649
-710
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

81 files changed

+3649
-710
lines changed

Makefile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,9 @@ HELM_OPTS ?= --set serviceAccount.annotations.eks\\.amazonaws\\.com/role-arn=${K
1717
--set controller.resources.requests.memory=1Gi \
1818
--set controller.resources.limits.cpu=1 \
1919
--set controller.resources.limits.memory=1Gi \
20-
--set settings.featureGates.spotToSpotConsolidation=true \
2120
--set settings.featureGates.nodeRepair=true \
21+
--set settings.featureGates.reservedCapacity=true \
22+
--set settings.featureGates.spotToSpotConsolidation=true \
2223
--create-namespace
2324

2425
# CR for local builds of Karpenter

charts/karpenter-crd/templates/karpenter.k8s.aws_ec2nodeclasses.yaml

Lines changed: 78 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,39 @@ spec:
239239
x-kubernetes-validations:
240240
- message: must have only one blockDeviceMappings with rootVolume
241241
rule: self.filter(x, has(x.rootVolume)?x.rootVolume==true:false).size() <= 1
242+
capacityReservationSelectorTerms:
243+
description: |-
244+
CapacityReservationSelectorTerms is a list of capacity reservation selector terms. Each term is ORed together to
245+
determine the set of eligible capacity reservations.
246+
items:
247+
properties:
248+
id:
249+
description: ID is the capacity reservation id in EC2
250+
pattern: ^cr-[0-9a-z]+$
251+
type: string
252+
ownerID:
253+
description: Owner is the owner id for the ami.
254+
pattern: ^[0-9]{12}$
255+
type: string
256+
tags:
257+
additionalProperties:
258+
type: string
259+
description: |-
260+
Tags is a map of key/value tags used to select capacity reservations.
261+
Specifying '*' for a value selects all values for a given tag key.
262+
maxProperties: 20
263+
type: object
264+
x-kubernetes-validations:
265+
- message: empty tag keys or values aren't supported
266+
rule: self.all(k, k != '' && self[k] != '')
267+
type: object
268+
maxItems: 30
269+
type: array
270+
x-kubernetes-validations:
271+
- message: expected at least one, got none, ['tags', 'id']
272+
rule: self.all(x, has(x.tags) || has(x.id))
273+
- message: '''id'' is mutually exclusive, cannot be set along with tags in a capacity reservation selector term'
274+
rule: '!self.all(x, has(x.id) && (has(x.tags) || has(x.ownerID)))'
242275
context:
243276
description: |-
244277
Context is a Reserved field in EC2 APIs
@@ -469,7 +502,7 @@ spec:
469502
- message: immutable field changed
470503
rule: self == oldSelf
471504
securityGroupSelectorTerms:
472-
description: SecurityGroupSelectorTerms is a list of or security group selector terms. The terms are ORed.
505+
description: SecurityGroupSelectorTerms is a list of security group selector terms. The terms are ORed.
473506
items:
474507
description: |-
475508
SecurityGroupSelectorTerm defines selection logic for a security group used by Karpenter to launch nodes.
@@ -503,12 +536,12 @@ spec:
503536
rule: self.size() != 0
504537
- message: expected at least one, got none, ['tags', 'id', 'name']
505538
rule: self.all(x, has(x.tags) || has(x.id) || has(x.name))
506-
- message: '''id'' is mutually exclusive, cannot be set with a combination of other fields in securityGroupSelectorTerms'
539+
- message: '''id'' is mutually exclusive, cannot be set with a combination of other fields in a security group selector term'
507540
rule: '!self.all(x, has(x.id) && (has(x.tags) || has(x.name)))'
508-
- message: '''name'' is mutually exclusive, cannot be set with a combination of other fields in securityGroupSelectorTerms'
541+
- message: '''name'' is mutually exclusive, cannot be set with a combination of other fields in a security group selector term'
509542
rule: '!self.all(x, has(x.name) && (has(x.tags) || has(x.id)))'
510543
subnetSelectorTerms:
511-
description: SubnetSelectorTerms is a list of or subnet selector terms. The terms are ORed.
544+
description: SubnetSelectorTerms is a list of subnet selector terms. The terms are ORed.
512545
items:
513546
description: |-
514547
SubnetSelectorTerm defines selection logic for a subnet used by Karpenter to launch nodes.
@@ -537,7 +570,7 @@ spec:
537570
rule: self.size() != 0
538571
- message: expected at least one, got none, ['tags', 'id']
539572
rule: self.all(x, has(x.tags) || has(x.id))
540-
- message: '''id'' is mutually exclusive, cannot be set with a combination of other fields in subnetSelectorTerms'
573+
- message: '''id'' is mutually exclusive, cannot be set with a combination of other fields in a subnet selector term'
541574
rule: '!self.all(x, has(x.id) && has(x.tags))'
542575
tags:
543576
additionalProperties:
@@ -640,6 +673,46 @@ spec:
640673
- requirements
641674
type: object
642675
type: array
676+
capacityReservations:
677+
description: |-
678+
CapacityReservations contains the current capacity reservation values that are available to this NodeClass under the
679+
CapacityReservation selectors.
680+
items:
681+
properties:
682+
availabilityZone:
683+
description: The availability zone the capacity reservation is available in.
684+
type: string
685+
endTime:
686+
description: |-
687+
The time at which the capacity reservation expires. Once expired, the reserved capacity is released and Karpenter
688+
will no longer be able to launch instances into that reservation.
689+
format: date-time
690+
type: string
691+
id:
692+
description: The id for the capacity reservation.
693+
pattern: ^cr-[0-9a-z]+$
694+
type: string
695+
instanceMatchCriteria:
696+
description: Indicates the type of instance launches the capacity reservation accepts.
697+
enum:
698+
- open
699+
- targeted
700+
type: string
701+
instanceType:
702+
description: The instance type for the capacity reservation.
703+
type: string
704+
ownerID:
705+
description: The ID of the AWS account that owns the capacity reservation.
706+
pattern: ^[0-9]{12}$
707+
type: string
708+
required:
709+
- availabilityZone
710+
- id
711+
- instanceMatchCriteria
712+
- instanceType
713+
- ownerID
714+
type: object
715+
type: array
643716
conditions:
644717
description: Conditions contains signals for health and readiness
645718
items:

charts/karpenter-crd/templates/karpenter.sh_nodeclaims.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ spec:
137137
- message: label "kubernetes.io/hostname" is restricted
138138
rule: self != "kubernetes.io/hostname"
139139
- message: label domain "karpenter.k8s.aws" is restricted
140-
rule: self in ["karpenter.k8s.aws/ec2nodeclass", "karpenter.k8s.aws/instance-encryption-in-transit-supported", "karpenter.k8s.aws/instance-category", "karpenter.k8s.aws/instance-hypervisor", "karpenter.k8s.aws/instance-family", "karpenter.k8s.aws/instance-generation", "karpenter.k8s.aws/instance-local-nvme", "karpenter.k8s.aws/instance-size", "karpenter.k8s.aws/instance-cpu", "karpenter.k8s.aws/instance-cpu-manufacturer", "karpenter.k8s.aws/instance-cpu-sustained-clock-speed-mhz", "karpenter.k8s.aws/instance-memory", "karpenter.k8s.aws/instance-ebs-bandwidth", "karpenter.k8s.aws/instance-network-bandwidth", "karpenter.k8s.aws/instance-gpu-name", "karpenter.k8s.aws/instance-gpu-manufacturer", "karpenter.k8s.aws/instance-gpu-count", "karpenter.k8s.aws/instance-gpu-memory", "karpenter.k8s.aws/instance-accelerator-name", "karpenter.k8s.aws/instance-accelerator-manufacturer", "karpenter.k8s.aws/instance-accelerator-count"] || !self.find("^([^/]+)").endsWith("karpenter.k8s.aws")
140+
rule: self in ["karpenter.k8s.aws/capacity-reservation-id", "karpenter.k8s.aws/ec2nodeclass", "karpenter.k8s.aws/instance-encryption-in-transit-supported", "karpenter.k8s.aws/instance-category", "karpenter.k8s.aws/instance-hypervisor", "karpenter.k8s.aws/instance-family", "karpenter.k8s.aws/instance-generation", "karpenter.k8s.aws/instance-local-nvme", "karpenter.k8s.aws/instance-size", "karpenter.k8s.aws/instance-cpu", "karpenter.k8s.aws/instance-cpu-manufacturer", "karpenter.k8s.aws/instance-cpu-sustained-clock-speed-mhz", "karpenter.k8s.aws/instance-memory", "karpenter.k8s.aws/instance-ebs-bandwidth", "karpenter.k8s.aws/instance-network-bandwidth", "karpenter.k8s.aws/instance-gpu-name", "karpenter.k8s.aws/instance-gpu-manufacturer", "karpenter.k8s.aws/instance-gpu-count", "karpenter.k8s.aws/instance-gpu-memory", "karpenter.k8s.aws/instance-accelerator-name", "karpenter.k8s.aws/instance-accelerator-manufacturer", "karpenter.k8s.aws/instance-accelerator-count"] || !self.find("^([^/]+)").endsWith("karpenter.k8s.aws")
141141
minValues:
142142
description: |-
143143
This field is ALPHA and can be dropped or replaced at any time

charts/karpenter-crd/templates/karpenter.sh_nodepools.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,7 @@ spec:
210210
- message: label "kubernetes.io/hostname" is restricted
211211
rule: self.all(x, x != "kubernetes.io/hostname")
212212
- message: label domain "karpenter.k8s.aws" is restricted
213-
rule: self.all(x, x in ["karpenter.k8s.aws/ec2nodeclass", "karpenter.k8s.aws/instance-encryption-in-transit-supported", "karpenter.k8s.aws/instance-category", "karpenter.k8s.aws/instance-hypervisor", "karpenter.k8s.aws/instance-family", "karpenter.k8s.aws/instance-generation", "karpenter.k8s.aws/instance-local-nvme", "karpenter.k8s.aws/instance-size", "karpenter.k8s.aws/instance-cpu", "karpenter.k8s.aws/instance-cpu-manufacturer", "karpenter.k8s.aws/instance-cpu-sustained-clock-speed-mhz", "karpenter.k8s.aws/instance-memory", "karpenter.k8s.aws/instance-ebs-bandwidth", "karpenter.k8s.aws/instance-network-bandwidth", "karpenter.k8s.aws/instance-gpu-name", "karpenter.k8s.aws/instance-gpu-manufacturer", "karpenter.k8s.aws/instance-gpu-count", "karpenter.k8s.aws/instance-gpu-memory", "karpenter.k8s.aws/instance-accelerator-name", "karpenter.k8s.aws/instance-accelerator-manufacturer", "karpenter.k8s.aws/instance-accelerator-count"] || !x.find("^([^/]+)").endsWith("karpenter.k8s.aws"))
213+
rule: self.all(x, x in ["karpenter.k8s.aws/capacity-reservation-id", "karpenter.k8s.aws/ec2nodeclass", "karpenter.k8s.aws/instance-encryption-in-transit-supported", "karpenter.k8s.aws/instance-category", "karpenter.k8s.aws/instance-hypervisor", "karpenter.k8s.aws/instance-family", "karpenter.k8s.aws/instance-generation", "karpenter.k8s.aws/instance-local-nvme", "karpenter.k8s.aws/instance-size", "karpenter.k8s.aws/instance-cpu", "karpenter.k8s.aws/instance-cpu-manufacturer", "karpenter.k8s.aws/instance-cpu-sustained-clock-speed-mhz", "karpenter.k8s.aws/instance-memory", "karpenter.k8s.aws/instance-ebs-bandwidth", "karpenter.k8s.aws/instance-network-bandwidth", "karpenter.k8s.aws/instance-gpu-name", "karpenter.k8s.aws/instance-gpu-manufacturer", "karpenter.k8s.aws/instance-gpu-count", "karpenter.k8s.aws/instance-gpu-memory", "karpenter.k8s.aws/instance-accelerator-name", "karpenter.k8s.aws/instance-accelerator-manufacturer", "karpenter.k8s.aws/instance-accelerator-count"] || !x.find("^([^/]+)").endsWith("karpenter.k8s.aws"))
214214
type: object
215215
spec:
216216
description: |-
@@ -283,7 +283,7 @@ spec:
283283
- message: label "kubernetes.io/hostname" is restricted
284284
rule: self != "kubernetes.io/hostname"
285285
- message: label domain "karpenter.k8s.aws" is restricted
286-
rule: self in ["karpenter.k8s.aws/ec2nodeclass", "karpenter.k8s.aws/instance-encryption-in-transit-supported", "karpenter.k8s.aws/instance-category", "karpenter.k8s.aws/instance-hypervisor", "karpenter.k8s.aws/instance-family", "karpenter.k8s.aws/instance-generation", "karpenter.k8s.aws/instance-local-nvme", "karpenter.k8s.aws/instance-size", "karpenter.k8s.aws/instance-cpu", "karpenter.k8s.aws/instance-cpu-manufacturer", "karpenter.k8s.aws/instance-cpu-sustained-clock-speed-mhz", "karpenter.k8s.aws/instance-memory", "karpenter.k8s.aws/instance-ebs-bandwidth", "karpenter.k8s.aws/instance-network-bandwidth", "karpenter.k8s.aws/instance-gpu-name", "karpenter.k8s.aws/instance-gpu-manufacturer", "karpenter.k8s.aws/instance-gpu-count", "karpenter.k8s.aws/instance-gpu-memory", "karpenter.k8s.aws/instance-accelerator-name", "karpenter.k8s.aws/instance-accelerator-manufacturer", "karpenter.k8s.aws/instance-accelerator-count"] || !self.find("^([^/]+)").endsWith("karpenter.k8s.aws")
286+
rule: self in ["karpenter.k8s.aws/capacity-reservation-id", "karpenter.k8s.aws/ec2nodeclass", "karpenter.k8s.aws/instance-encryption-in-transit-supported", "karpenter.k8s.aws/instance-category", "karpenter.k8s.aws/instance-hypervisor", "karpenter.k8s.aws/instance-family", "karpenter.k8s.aws/instance-generation", "karpenter.k8s.aws/instance-local-nvme", "karpenter.k8s.aws/instance-size", "karpenter.k8s.aws/instance-cpu", "karpenter.k8s.aws/instance-cpu-manufacturer", "karpenter.k8s.aws/instance-cpu-sustained-clock-speed-mhz", "karpenter.k8s.aws/instance-memory", "karpenter.k8s.aws/instance-ebs-bandwidth", "karpenter.k8s.aws/instance-network-bandwidth", "karpenter.k8s.aws/instance-gpu-name", "karpenter.k8s.aws/instance-gpu-manufacturer", "karpenter.k8s.aws/instance-gpu-count", "karpenter.k8s.aws/instance-gpu-memory", "karpenter.k8s.aws/instance-accelerator-name", "karpenter.k8s.aws/instance-accelerator-manufacturer", "karpenter.k8s.aws/instance-accelerator-count"] || !self.find("^([^/]+)").endsWith("karpenter.k8s.aws")
287287
minValues:
288288
description: |-
289289
This field is ALPHA and can be dropped or replaced at any time

charts/karpenter/templates/deployment.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ spec:
107107
divisor: "0"
108108
resource: limits.memory
109109
- name: FEATURE_GATES
110-
value: "SpotToSpotConsolidation={{ .Values.settings.featureGates.spotToSpotConsolidation }},NodeRepair={{ .Values.settings.featureGates.nodeRepair }}"
110+
value: "ReservedCapacity={{ .Values.settings.featureGates.reservedCapacity }},SpotToSpotConsolidation={{ .Values.settings.featureGates.spotToSpotConsolidation }},NodeRepair={{ .Values.settings.featureGates.nodeRepair }}"
111111
{{- with .Values.settings.batchMaxDuration }}
112112
- name: BATCH_MAX_DURATION
113113
value: "{{ . }}"

charts/karpenter/values.yaml

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -184,9 +184,12 @@ settings:
184184
# -- Feature Gate configuration values. Feature Gates will follow the same graduation process and requirements as feature gates
185185
# in Kubernetes. More information here https://kubernetes.io/docs/reference/command-line-tools-reference/feature-gates/#feature-gates-for-alpha-or-beta-features
186186
featureGates:
187-
# -- spotToSpotConsolidation is ALPHA and is disabled by default.
188-
# Setting this to true will enable spot replacement consolidation for both single and multi-node consolidation.
189-
spotToSpotConsolidation: false
190187
# -- nodeRepair is ALPHA and is disabled by default.
191188
# Setting this to true will enable node repair.
192189
nodeRepair: false
190+
# -- reservedCapacity is ALPHA and is disabled by default.
191+
# Setting this will enable native on-demand capacity reservation support.
192+
reservedCapacity: false
193+
# -- spotToSpotConsolidation is ALPHA and is disabled by default.
194+
# Setting this to true will enable spot replacement consolidation for both single and multi-node consolidation.
195+
spotToSpotConsolidation: false

cmd/controller/main.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ limitations under the License.
1515
package main
1616

1717
import (
18+
v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1"
1819
"github.com/aws/karpenter-provider-aws/pkg/cloudprovider"
1920
"github.com/aws/karpenter-provider-aws/pkg/controllers"
2021
"github.com/aws/karpenter-provider-aws/pkg/operator"
@@ -23,6 +24,7 @@ import (
2324
corecontrollers "sigs.k8s.io/karpenter/pkg/controllers"
2425
"sigs.k8s.io/karpenter/pkg/controllers/state"
2526
coreoperator "sigs.k8s.io/karpenter/pkg/operator"
27+
karpoptions "sigs.k8s.io/karpenter/pkg/operator/options"
2628
)
2729

2830
func main() {
@@ -35,10 +37,15 @@ func main() {
3537
op.GetClient(),
3638
op.AMIProvider,
3739
op.SecurityGroupProvider,
40+
op.CapacityReservationProvider,
3841
)
3942
cloudProvider := metrics.Decorate(awsCloudProvider)
4043
clusterState := state.NewCluster(op.Clock, op.GetClient(), cloudProvider)
4144

45+
if karpoptions.FromContext(ctx).FeatureGates.ReservedCapacity {
46+
v1.CapacityReservationsEnabled = true
47+
}
48+
4249
op.
4350
WithControllers(ctx, corecontrollers.NewControllers(
4451
ctx,
@@ -69,6 +76,7 @@ func main() {
6976
op.LaunchTemplateProvider,
7077
op.VersionProvider,
7178
op.InstanceTypesProvider,
79+
op.CapacityReservationProvider,
7280
)...).
7381
Start(ctx)
7482
}

0 commit comments

Comments
 (0)