Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf: clear instance type cache after ICE #7517

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 27 additions & 3 deletions pkg/providers/instancetype/instancetype.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,11 @@ type DefaultProvider struct {
instanceTypesSeqNum uint64
// instanceTypesOfferingsSeqNum is a monotonically increasing change counter used to avoid the expensive hashing operation on instance types
instanceTypesOfferingsSeqNum uint64

// mutex required here to allow synchronization with other operations (not possible with atomics only)
muLastUnavailableOfferingsSeqNum sync.Mutex
// lastUnavailableOfferingsSeqNum is the most recently seen seq num of the unavailable offerings cache, used to track changes
lastUnavailableOfferingsSeqNum uint64
}

func NewDefaultProvider(instanceTypesCache *cache.Cache, discoveredCapacityCache *cache.Cache, ec2api sdk.EC2API, subnetProvider subnet.Provider, instanceTypesResolver Resolver) *DefaultProvider {
Expand Down Expand Up @@ -116,13 +121,17 @@ func (p *DefaultProvider) List(ctx context.Context, nodeClass *v1.EC2NodeClass)
// Compute hash key against node class AMIs (used to force cache rebuild when AMIs change)
amiHash, _ := hashstructure.Hash(nodeClass.Status.AMIs, hashstructure.FormatV2, &hashstructure.HashOptions{SlicesAsSets: true})

key := fmt.Sprintf("%d-%d-%016x-%016x-%016x",
// Store first observed value of seqNum before instance type resolution to track modification
unavailableOfferingsSeqNum := p.instanceTypesResolver.GetUnavailableOfferingsSeqNum()

key := fmt.Sprintf("%d-%d-%016x-%016x-%s",
p.instanceTypesSeqNum,
p.instanceTypesOfferingsSeqNum,
amiHash,
subnetZonesHash,
p.instanceTypesResolver.CacheKey(nodeClass),
)

if item, ok := p.instanceTypesCache.Get(key); ok {
// Ensure what's returned from this function is a shallow-copy of the slice (not a deep-copy of the data itself)
// so that modifications to the ordering of the data don't affect the original
Expand Down Expand Up @@ -183,7 +192,22 @@ func (p *DefaultProvider) List(ctx context.Context, nodeClass *v1.EC2NodeClass)
}
return it
})
p.instanceTypesCache.SetDefault(key, result)

p.muLastUnavailableOfferingsSeqNum.Lock()

// Flush the cache if the current unavailable offerings seq num has changed since the last flush
if seqNum := p.instanceTypesResolver.GetUnavailableOfferingsSeqNum(); p.lastUnavailableOfferingsSeqNum < seqNum {
p.instanceTypesCache.Flush()
p.lastUnavailableOfferingsSeqNum = seqNum
}

// Only cache the result if the seq num has not changed since the key was formed
if p.lastUnavailableOfferingsSeqNum == unavailableOfferingsSeqNum {
p.instanceTypesCache.SetDefault(key, result)
}

p.muLastUnavailableOfferingsSeqNum.Unlock()

return result, nil
}

Expand Down Expand Up @@ -220,7 +244,7 @@ func (p *DefaultProvider) UpdateInstanceTypes(ctx context.Context) error {
}

if p.cm.HasChanged("instance-types", instanceTypes) {
// Only update instanceTypesSeqNun with the instance types have been changed
// Only update instanceTypesSeqNum with the instance types have been changed
// This is to not create new keys with duplicate instance types option
atomic.AddUint64(&p.instanceTypesSeqNum, 1)
log.FromContext(ctx).WithValues(
Expand Down
6 changes: 6 additions & 0 deletions pkg/providers/instancetype/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ type Resolver interface {
CacheKey(nodeClass *v1.EC2NodeClass) string
// Resolve generates an InstanceType based on raw InstanceTypeInfo and NodeClass setting data
Resolve(ctx context.Context, info ec2types.InstanceTypeInfo, zoneData []ZoneData, nodeClass *v1.EC2NodeClass) *cloudprovider.InstanceType
// GetUnavailableOfferingsSeqNum returns the current seq num of the unavailable offerings cache
GetUnavailableOfferingsSeqNum() uint64
Comment on lines +66 to +67
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The instancetype provider could alternatively directly access the unavailable offerings cache for this, allowing this interface to remain unmodified but requiring a new field to be added to the instancetype provider constructor.

}

type DefaultResolver struct {
Expand Down Expand Up @@ -108,6 +110,10 @@ func (d *DefaultResolver) Resolve(ctx context.Context, info ec2types.InstanceTyp
kc.SystemReserved, kc.EvictionHard, kc.EvictionSoft, nodeClass.AMIFamily(), d.createOfferings(ctx, info, zoneData))
}

func (d *DefaultResolver) GetUnavailableOfferingsSeqNum() uint64 {
return d.unavailableOfferings.SeqNum
}

// createOfferings creates a set of mutually exclusive offerings for a given instance type. This provider maintains an
// invariant that each offering is mutually exclusive. Specifically, there is an offering for each permutation of zone
// and capacity type. ZoneID is also injected into the offering requirements, when available, but there is a 1-1
Expand Down