Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions cluster-autoscaler/cloudprovider/oci/common/oci_ref.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ package common
import (
apiv1 "k8s.io/api/core/v1"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/instancepools/consts"
"k8s.io/klog/v2"

"strings"
)

Expand Down Expand Up @@ -45,8 +47,10 @@ func NodeToOciRef(n *apiv1.Node) (OciRef, error) {
func getNodeShape(node *apiv1.Node) string {
// First check for the deprecated label
if shape, ok := node.Labels[apiv1.LabelInstanceType]; ok {
klog.V(5).Infof("Extracting node shape %s from label %s", shape, apiv1.LabelInstanceType)
return shape
} else if shape, ok := node.Labels[apiv1.LabelInstanceTypeStable]; ok {
klog.V(5).Infof("Extracting node shape %s from label %s", shape, apiv1.LabelInstanceTypeStable)
return shape
}
return ""
Expand All @@ -56,8 +60,10 @@ func getNodeShape(node *apiv1.Node) string {
func getNodeAZ(node *apiv1.Node) string {
// First check for the deprecated label
if az, ok := node.Labels[apiv1.LabelZoneFailureDomain]; ok {
klog.V(5).Infof("Extracting availability domain %s from label %s", az, apiv1.LabelZoneFailureDomain)
return az
} else if az, ok := node.Labels[apiv1.LabelTopologyZone]; ok {
klog.V(5).Infof("Extracting availability domain %s from label %s", az, apiv1.LabelTopologyZone)
return az
}
return ""
Expand All @@ -67,6 +73,7 @@ func getNodeAZ(node *apiv1.Node) string {
func getNodeInternalAddress(node *apiv1.Node) string {
for _, address := range node.Status.Addresses {
if address.Type == apiv1.NodeInternalIP {
klog.V(5).Infof("Extracting node internal IP %s from node %s", address.Address, node.Name)
return address.Address
}
}
Expand All @@ -77,6 +84,7 @@ func getNodeInternalAddress(node *apiv1.Node) string {
func getNodeExternalAddress(node *apiv1.Node) string {
for _, address := range node.Status.Addresses {
if address.Type == apiv1.NodeExternalIP {
klog.V(5).Infof("Extracting node external IP %s from node %s", address.Address, node.Name)
return address.Address
}
}
Expand All @@ -96,17 +104,20 @@ func getNodeInstancePoolID(node *apiv1.Node) string {
poolIDSuffixLabel, _ := node.Labels[consts.InstancePoolIDLabelSuffix]

if poolIDPrefixLabel != "" && poolIDSuffixLabel != "" {
klog.V(5).Infof("Extracting instance-pool %s from labels %s + %s", poolIDPrefixLabel+"."+poolIDSuffixLabel, consts.InstancePoolIDLabelPrefix, consts.InstancePoolIDLabelSuffix)
return poolIDPrefixLabel + "." + poolIDSuffixLabel
}

poolIDAnnotation, _ := node.Annotations[consts.OciInstancePoolIDAnnotation]
klog.V(5).Infof("Extracting instance-pool %s from annotation %s", poolIDAnnotation, consts.OciInstanceIDAnnotation)
return poolIDAnnotation
}

// getNodeInstanceID returns the instance ID if set as a label or annotation or an empty string if is not found.
func getNodeInstanceID(node *apiv1.Node) string {
providerID := strings.TrimPrefix(node.Spec.ProviderID, "oci://")
if len(providerID) != 0 {
klog.V(5).Infof("Extracting instance-id %s from .spec.providerID", providerID)
return providerID
}

Expand All @@ -119,9 +130,11 @@ func getNodeInstanceID(node *apiv1.Node) string {
instanceSuffixLabel, _ := node.Labels[consts.InstanceIDLabelSuffix]

if instancePrefixLabel != "" && instanceSuffixLabel != "" {
klog.V(5).Infof("Extracting instance-id %s from labels %s + %s", instancePrefixLabel+"."+instanceSuffixLabel, consts.InstanceIDLabelPrefix, consts.InstanceIDLabelSuffix)
return instancePrefixLabel + "." + instanceSuffixLabel
}

instanceIDAnnotation, _ := node.Annotations[consts.OciInstanceIDAnnotation]
klog.V(5).Infof("Extracting instance-id %s from annotation %s", instanceIDAnnotation, consts.OciInstanceIDAnnotation)
return instanceIDAnnotation
}
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ func (ocp *OciCloudProvider) NodeGroupForNode(n *apiv1.Node) (cloudprovider.Node

ociRef, err := ocicommon.NodeToOciRef(n)
if err != nil {
klog.V(4).Infof("NodeGroupForNode: ref conversion for node %s failed: %v", n.Name, err)
return nil, err
}

Expand All @@ -61,32 +62,33 @@ func (ocp *OciCloudProvider) NodeGroupForNode(n *apiv1.Node) (cloudprovider.Node
// this instance may not be a part of an instance pool, or it may be part of a instance pool that the autoscaler does not manage
if errors.Cause(err) == errInstanceInstancePoolNotFound {
// should not be processed by cluster autoscaler
klog.V(4).Infof("NodeGroupForNode: node %s is not a member of any of the specified instance-pool(s)", n.Name)
return nil, nil
}

klog.V(4).Infof("NodeGroupForNode: %s belongs to instance-pool %s", n.Name, ng.Id())
return ng, err
}

// HasInstance returns whether a given node has a corresponding instance in this cloud provider
func (ocp *OciCloudProvider) HasInstance(node *apiv1.Node) (bool, error) {
instance, err := ocicommon.NodeToOciRef(node)
if err != nil {
klog.V(4).Infof("HasInstance: ref conversion for node %s failed: %v", node.Name, err)
return false, err
}
instancePool, err := ocp.poolManager.GetInstancePoolForInstance(instance)
if err != nil {
klog.V(4).Infof("HasInstance: instance-pool check for node %s failed: %v", node.Name, err)
return false, err
}
instances, err := ocp.poolManager.GetInstancePoolNodes(*instancePool)
if err != nil {
return false, err
if instancePool == nil || instancePool.Id() == "" {
klog.V(4).Infof("HasInstance: node %s is not a member of any of the specified instance-pool(s)", node.Name)
return false, nil
}
for _, i := range instances {
if i.Id == instance.InstanceID {
return true, nil
}
}
return false, nil

klog.V(4).Infof("HasInstance: node %s belongs to instance-pool %s", node.Name, instancePool.Id())
return true, nil
}

// Pricing returns pricing model for this cloud provider or error if not available.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ func (c *instancePoolCache) rebuild(staticInstancePools map[string]*InstancePool
InstancePoolId: common.String(id),
})
if err != nil {
klog.Errorf("get instance pool %s failed: %v", id, err)
klog.Errorf("get instance-pool %s failed: %v", id, err)
return err
}
klog.V(6).Infof("GetInstancePool() response %v", getInstancePoolResp.InstancePool)
Expand All @@ -105,6 +105,7 @@ func (c *instancePoolCache) rebuild(staticInstancePools map[string]*InstancePool
Page: page,
})
if err != nil {
klog.V(4).Infof("ListInstancePoolInstances for %s failed: %v", id, err)
return err
}

Expand All @@ -118,7 +119,7 @@ func (c *instancePoolCache) rebuild(staticInstancePools map[string]*InstancePool
// Compare instance pool's size with the latest number of InstanceSummaries. If found, look for unrecoverable
// errors such as quota or capacity issues in scaling pool.
if len(*c.instanceSummaryCache[id]) < *c.poolCache[id].Size {
klog.V(4).Infof("Instance pool %s has only %d instances created while requested count is %d. ",
klog.V(4).Infof("instance-pool %s has only %d instances created while requested count is %d. ",
*getInstancePoolResp.InstancePool.DisplayName, len(*c.instanceSummaryCache[id]), *c.poolCache[id].Size)

if getInstancePoolResp.LifecycleState != core.InstancePoolLifecycleStateRunning {
Expand Down Expand Up @@ -166,6 +167,7 @@ func (c *instancePoolCache) removeInstance(instancePool InstancePoolNodeGroup, i
klog.Warning("instanceID is not set - skipping removal.")
return false
}
klog.V(4).Infof("detaching instance %s from instance-pool: %v", instanceID, instancePool.Id())

var err error
if strings.Contains(instanceID, consts.InstanceIDUnfulfilled) {
Expand Down Expand Up @@ -213,15 +215,15 @@ func (c *instancePoolCache) findInstanceByDetails(ociInstance ocicommon.OciRef)

if c.unownedInstances[ociInstance] {
// We already know this instance is not part of a configured pool. Return early and avoid additional API calls.
klog.V(4).Infof("Node " + ociInstance.Name + " is known to not be a member of any of the specified instance pool(s)")
klog.V(4).Info("Node " + ociInstance.Name + " is known to not be a member of any of the specified instance-pool(s)")
return nil, errInstanceInstancePoolNotFound
}

// Look for the instance in each of the specified pool(s)
for _, nextInstancePool := range c.poolCache {
// Skip searching instance pool if we happen tp know (prior labels) the pool ID and this is not it
if (ociInstance.InstancePoolID != "") && (ociInstance.InstancePoolID != *nextInstancePool.Id) {
klog.V(5).Infof("skipping over instance pool %s since it is not the one we are looking for", *nextInstancePool.Id)
klog.V(5).Infof("skipping over instance-pool %s since it is not the one we are looking for (%s)", *nextInstancePool.Id, ociInstance.InstancePoolID)
continue
}

Expand All @@ -236,6 +238,7 @@ func (c *instancePoolCache) findInstanceByDetails(ociInstance ocicommon.OciRef)

listInstancePoolInstances, err := c.computeManagementClient.ListInstancePoolInstances(context.Background(), listInstancePoolInstancesReq)
if err != nil {
klog.V(4).Infof("ListInstancePoolInstances for %s failed: %v", *nextInstancePool.Id, err)
return nil, err
}

Expand All @@ -254,7 +257,7 @@ func (c *instancePoolCache) findInstanceByDetails(ociInstance ocicommon.OciRef)
}
// Skip this instance if we happen to know (prior labels) the instance ID and this is not it
if (ociInstance.InstanceID != "") && (ociInstance.InstanceID != *poolMember.Id) {
klog.V(5).Infof("skipping over instance %s since it is not the one we are looking for", *poolMember.Id)
klog.V(5).Infof("skipping over instance %s since it is not the one we are looking for (%s)", *poolMember.Id, ociInstance.InstanceID)
continue
}

Expand Down Expand Up @@ -285,7 +288,7 @@ func (c *instancePoolCache) findInstanceByDetails(ociInstance ocicommon.OciRef)
if *poolMember.Id == ociInstance.InstanceID ||
(getVnicResp.Vnic.PrivateIp != nil && *getVnicResp.Vnic.PrivateIp == ociInstance.PrivateIPAddress) ||
(getVnicResp.Vnic.PublicIp != nil && *getVnicResp.Vnic.PublicIp == ociInstance.PublicIPAddress) {
klog.V(4).Info(*poolMember.DisplayName, " is a member of "+*nextInstancePool.Id)
klog.V(4).Infof("findInstanceByDetails: %s belongs to instance-pool %s", *poolMember.DisplayName, *nextInstancePool.Id)
// Return a complete instance details.
if ociInstance.Name == "" {
ociInstance.Name = *poolMember.DisplayName
Expand All @@ -307,7 +310,7 @@ func (c *instancePoolCache) findInstanceByDetails(ociInstance ocicommon.OciRef)
}

c.unownedInstances[ociInstance] = true
klog.V(4).Infof(ociInstance.Name + " is not a member of any of the specified instance pool(s)")
klog.V(4).Info("findInstanceByDetails node " + ociInstance.Name + " is not a member of any of the specified instance-pool(s)")
return nil, errInstanceInstancePoolNotFound
}

Expand All @@ -321,7 +324,7 @@ func (c *instancePoolCache) getInstancePool(id string) (*core.InstancePool, erro
func (c *instancePoolCache) getInstancePoolWithoutLock(id string) (*core.InstancePool, error) {
instancePool := c.poolCache[id]
if instancePool == nil {
return nil, errors.New("instance pool was not found in the cache")
return nil, errors.New("instance-pool was not found in the cache")
}

return instancePool, nil
Expand All @@ -345,7 +348,7 @@ func (c *instancePoolCache) getInstanceSummaries(poolID string) (*[]core.Instanc
func (c *instancePoolCache) getInstanceSummariesWithoutLock(poolID string) (*[]core.InstanceSummary, error) {
instanceSummaries := c.instanceSummaryCache[poolID]
if instanceSummaries == nil {
return nil, errors.New("instance summaries for instance pool id " + poolID + " were not found in cache")
return nil, errors.New("instance summaries for instance-pool id " + poolID + " were not found in cache")
}

return instanceSummaries, nil
Expand All @@ -363,11 +366,13 @@ func (c *instancePoolCache) setSize(instancePoolID string, size int) error {
if instancePoolID == "" {
return errors.New("instance-pool is required")
}
klog.V(4).Infof("adjusting size of instance-pool %s to: %d", instancePoolID, size)

getInstancePoolResp, err := c.computeManagementClient.GetInstancePool(context.Background(), core.GetInstancePoolRequest{
InstancePoolId: common.String(instancePoolID),
})
if err != nil {
klog.V(4).Infof("GetInstancePool for %s failed: %v", common.String(instancePoolID), err)
return err
}

Expand All @@ -384,6 +389,7 @@ func (c *instancePoolCache) setSize(instancePoolID string, size int) error {
UpdateInstancePoolDetails: updateDetails,
})
if err != nil {
klog.V(4).Infof("UpdateInstancePool for %s failed: %v", common.String(instancePoolID), err)
return err
}

Expand Down Expand Up @@ -425,15 +431,15 @@ func (c *instancePoolCache) waitForState(ctx context.Context, instancePoolID str
InstancePoolId: common.String(instancePoolID),
})
if err != nil {
klog.Errorf("getInstancePool failed. Retrying: %+v", err)
klog.Errorf("getInstancePool failed for %s. Retrying: %+v", instancePoolID, err)
return false, err
} else if getInstancePoolResp.LifecycleState != desiredState {
deadline, _ := ctx.Deadline()
klog.V(4).Infof("waiting for instance-pool %s to enter state: %s (current state: %s) (remaining time %v)",
instancePoolID, desiredState, getInstancePoolResp.LifecycleState, deadline.Sub(time.Now()).Round(time.Second))
return false, nil
}
klog.V(3).Infof("instance pool %s is in desired state: %s", instancePoolID, desiredState)
klog.V(3).Infof("instance-pool %s is in desired state: %s", instancePoolID, desiredState)

return true, nil
}, ctx.Done()) // context timeout
Expand Down Expand Up @@ -522,7 +528,7 @@ func (c *instancePoolCache) monitorScalingProgress(ctx context.Context, target i
Page: page,
})
if err != nil {
klog.Errorf("list instance pool instances for pool %s failed: %v", instancePoolID, err)
klog.Errorf("list instance-pool instances for %s failed: %v", instancePoolID, err)
errCh <- err
return
}
Expand Down Expand Up @@ -572,6 +578,7 @@ func (c *instancePoolCache) getSize(id string) (int, error) {
return -1, errors.New("target size not found")
}

klog.V(4).Infof("instance-pool %s size is %d", id, *pool.Size)
return *pool.Size, nil
}

Expand Down
Loading
Loading