Skip to content

feat: Added check to filter supported nodes by ami's arch type #7933

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ require (
github.com/pelletier/go-toml/v2 v2.2.3
github.com/prometheus/client_golang v1.21.1
github.com/samber/lo v1.49.1
github.com/stretchr/testify v1.10.0
go.uber.org/multierr v1.11.0
go.uber.org/zap v1.27.0
golang.org/x/sync v0.12.0
Expand Down Expand Up @@ -95,6 +96,7 @@ require (
github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f // indirect
github.com/olekukonko/tablewriter v0.0.5 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
github.com/prometheus/client_model v0.6.1 // indirect
github.com/prometheus/common v0.62.0 // indirect
github.com/prometheus/procfs v0.15.1 // indirect
Expand Down
22 changes: 22 additions & 0 deletions pkg/providers/amifamily/ami.go
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,28 @@ func MapToInstanceTypes(instanceTypes []*cloudprovider.InstanceType, amis []v1.A
return amiIDs
}

// FilterInstanceTypesByAMICompatibility filters instance types to only include those that are compatible with at least one AMI
// This prevents launching instances that don't have compatible AMIs available
func FilterInstanceTypesByAMICompatibility(instanceTypes []*cloudprovider.InstanceType, amis []v1.AMI) []*cloudprovider.InstanceType {
if len(amis) == 0 {
return []*cloudprovider.InstanceType{}
}

compatible := []*cloudprovider.InstanceType{}
for _, instanceType := range instanceTypes {
for _, ami := range amis {
if err := instanceType.Requirements.Compatible(
scheduling.NewNodeSelectorRequirements(ami.Requirements...),
scheduling.AllowUndefinedWellKnownLabels,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we allow our Unknown WellKnown labels to be compatible here? Can we check the other places we are doing instance type compatibility with AMIs?

Also, I wonder if we can just re-leverage MapInstanceTypes and then just pull-out the instance types that we support

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@gnana997 Any luck with this update?

); err == nil {
compatible = append(compatible, instanceType)
break
}
}
}
return compatible
}

// Compare two AMI's based on their deprecation status, creation time or name
// If both AMIs are deprecated, compare creation time and return the one with the newer creation time
// If both AMIs are non-deprecated, compare creation time and return the one with the newer creation time
Expand Down
161 changes: 161 additions & 0 deletions pkg/providers/amifamily/ami_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
/*
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package amifamily_test

import (
"testing"

v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1"
"github.com/aws/karpenter-provider-aws/pkg/providers/amifamily"
"github.com/stretchr/testify/assert"
corev1 "k8s.io/api/core/v1"
"sigs.k8s.io/karpenter/pkg/cloudprovider"
"sigs.k8s.io/karpenter/pkg/scheduling"
)

func TestFilterInstanceTypesByAMICompatibility(t *testing.T) {
testCases := []struct {
name string
instanceTypes []*cloudprovider.InstanceType
amis []v1.AMI
expected int
}{
{
name: "amd64 instance type with amd64 ami",
instanceTypes: []*cloudprovider.InstanceType{
{
Name: "m5.large",
Requirements: scheduling.NewRequirements(
scheduling.NewRequirement(corev1.LabelArchStable, corev1.NodeSelectorOpIn, "amd64"),
),
},
},
amis: []v1.AMI{
{
ID: "ami-123",
Requirements: []corev1.NodeSelectorRequirement{
{
Key: corev1.LabelArchStable,
Operator: corev1.NodeSelectorOpIn,
Values: []string{"amd64"},
},
},
},
},
expected: 1,
},
{
name: "arm64 instance type with amd64 ami",
instanceTypes: []*cloudprovider.InstanceType{
{
Name: "m6g.large",
Requirements: scheduling.NewRequirements(
scheduling.NewRequirement(corev1.LabelArchStable, corev1.NodeSelectorOpIn, "arm64"),
),
},
},
amis: []v1.AMI{
{
ID: "ami-123",
Requirements: []corev1.NodeSelectorRequirement{
{
Key: corev1.LabelArchStable,
Operator: corev1.NodeSelectorOpIn,
Values: []string{"amd64"},
},
},
},
},
expected: 0,
},
{
name: "mixed instance types with both amis",
instanceTypes: []*cloudprovider.InstanceType{
{
Name: "m5.large",
Requirements: scheduling.NewRequirements(
scheduling.NewRequirement(corev1.LabelArchStable, corev1.NodeSelectorOpIn, "amd64"),
),
},
{
Name: "m6g.large",
Requirements: scheduling.NewRequirements(
scheduling.NewRequirement(corev1.LabelArchStable, corev1.NodeSelectorOpIn, "arm64"),
),
},
},
amis: []v1.AMI{
{
ID: "ami-123",
Requirements: []corev1.NodeSelectorRequirement{
{
Key: corev1.LabelArchStable,
Operator: corev1.NodeSelectorOpIn,
Values: []string{"amd64"},
},
},
},
{
ID: "ami-456",
Requirements: []corev1.NodeSelectorRequirement{
{
Key: corev1.LabelArchStable,
Operator: corev1.NodeSelectorOpIn,
Values: []string{"arm64"},
},
},
},
},
expected: 2,
},
{
name: "empty instance types",
instanceTypes: []*cloudprovider.InstanceType{},
amis: []v1.AMI{
{
ID: "ami-123",
Requirements: []corev1.NodeSelectorRequirement{
{
Key: corev1.LabelArchStable,
Operator: corev1.NodeSelectorOpIn,
Values: []string{"amd64"},
},
},
},
},
expected: 0,
},
{
name: "empty amis",
instanceTypes: []*cloudprovider.InstanceType{
{
Name: "m5.large",
Requirements: scheduling.NewRequirements(
scheduling.NewRequirement(corev1.LabelArchStable, corev1.NodeSelectorOpIn, "amd64"),
),
},
},
amis: []v1.AMI{},
expected: 0,
},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
compatible := amifamily.FilterInstanceTypesByAMICompatibility(tc.instanceTypes, tc.amis)
assert.Len(t, compatible, tc.expected)
})
}
}
20 changes: 16 additions & 4 deletions pkg/providers/instancetype/instancetype.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ import (
"sync/atomic"

"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/util/sets"
"sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/karpenter/pkg/scheduling"

awscache "github.com/aws/karpenter-provider-aws/pkg/cache"
Expand All @@ -32,14 +34,12 @@ import (
"github.com/mitchellh/hashstructure/v2"
"github.com/patrickmn/go-cache"
corev1 "k8s.io/api/core/v1"
"sigs.k8s.io/controller-runtime/pkg/log"
karpv1 "sigs.k8s.io/karpenter/pkg/apis/v1"

"github.com/aws/aws-sdk-go-v2/aws"
"github.com/aws/aws-sdk-go-v2/service/ec2"
ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types"
"github.com/samber/lo"
"k8s.io/apimachinery/pkg/util/sets"

v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1"
sdk "github.com/aws/karpenter-provider-aws/pkg/aws"
Expand Down Expand Up @@ -157,12 +157,24 @@ func (p *DefaultProvider) List(ctx context.Context, nodeClass *v1.EC2NodeClass)
// date capacity information. Rather than incurring a cache miss each time an instance is launched into a reserved
// offering (or terminated), offerings are injected to the cached instance types on each call. Note that on-demand and
// spot offerings are still cached - only reserved offerings are generated each time.
return p.offeringProvider.InjectOfferings(
instanceTypesWithOfferings := p.offeringProvider.InjectOfferings(
ctx,
instanceTypes,
nodeClass,
p.allZones,
), nil
)

// Filter instance types that don't have compatible AMIs to prevent architecture mismatches
filteredInstanceTypes := amifamily.FilterInstanceTypesByAMICompatibility(instanceTypesWithOfferings, nodeClass.Status.AMIs)
if len(filteredInstanceTypes) == 0 {
log.FromContext(ctx).WithValues(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think this is a log -- this is an error IMO

"nodeclass", nodeClass.Name,
"ami-count", len(nodeClass.Status.AMIs),
"instance-type-count", len(instanceTypesWithOfferings),
).V(4).Info("no instance types are compatible with available AMIs, check NodePool and NodeClass architecture requirements")
}

return filteredInstanceTypes, nil
}

func (p *DefaultProvider) resolveInstanceTypes(
Expand Down