diff --git a/.github/workflows/gsk-release.yaml b/.github/workflows/gsk-release.yaml new file mode 100644 index 000000000000..14094261b7df --- /dev/null +++ b/.github/workflows/gsk-release.yaml @@ -0,0 +1,39 @@ +name: Build and push Docker image for gridscale's Cluster Autoscaler + +on: + push: + tags: + - "*" + +jobs: + build-and-push: + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v2 + + - name: Set up Go + uses: actions/setup-go@v2 + with: + go-version: 1.22.4 + + - name: Change directory to cluster-autoscaler and build binary + working-directory: ./cluster-autoscaler + run: | + make build-arch-amd64 + + - name: Login to Docker registry + uses: docker/login-action@v1 + with: + registry: registry.kubecuddle.io + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + + - name: Build and push Docker image + uses: docker/build-push-action@v2 + with: + context: ./cluster-autoscaler + file: ./cluster-autoscaler/Dockerfile.amd64 + push: true + platforms: linux/arm64 + tags: registry.kubecuddle.io/k8s/cluster-autoscaler:${{ github.ref_name }} diff --git a/cluster-autoscaler/FAQ.md b/cluster-autoscaler/FAQ.md index 6eef6433d797..e551de73cf0d 100644 --- a/cluster-autoscaler/FAQ.md +++ b/cluster-autoscaler/FAQ.md @@ -1,4 +1,6 @@ +**NOTE**: This is a forked version of Cluster Autoscaler made for gridscale managed Kubernetes clusters. Some of the information in this document may not apply to the original version of Cluster Autoscaler. Please use the offical product documentation provided by gridscale. + # Frequently Asked Questions # Older versions diff --git a/cluster-autoscaler/README.md b/cluster-autoscaler/README.md index 195e092f48d1..08755468af55 100644 --- a/cluster-autoscaler/README.md +++ b/cluster-autoscaler/README.md @@ -3,6 +3,8 @@ # Introduction +**NOTE**: This is a forked version of Cluster Autoscaler made for gridscale managed Kubernetes clusters. + Cluster Autoscaler is a tool that automatically adjusts the size of the Kubernetes cluster when one of the following conditions is true: * there are pods that failed to run in the cluster due to insufficient resources. diff --git a/cluster-autoscaler/cloudprovider/builder/builder_all.go b/cluster-autoscaler/cloudprovider/builder/builder_all.go index 91252196a99f..d523c7b9ec8a 100644 --- a/cluster-autoscaler/cloudprovider/builder/builder_all.go +++ b/cluster-autoscaler/cloudprovider/builder/builder_all.go @@ -36,6 +36,7 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/exoscale" "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/externalgrpc" "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/gce" + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/gridscale" "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/hetzner" "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/huaweicloud" "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/ionoscloud" @@ -87,8 +88,8 @@ var AvailableCloudProviders = []string{ cloudprovider.VolcengineProviderName, } -// DefaultCloudProvider is GCE. -const DefaultCloudProvider = cloudprovider.GceProviderName +// DefaultCloudProvider is gridscale. +const DefaultCloudProvider = cloudprovider.GridscaleProviderName func buildCloudProvider(opts config.AutoscalingOptions, do cloudprovider.NodeGroupDiscoveryOptions, @@ -153,6 +154,8 @@ func buildCloudProvider(opts config.AutoscalingOptions, return rancher.BuildRancher(opts, do, rl) case cloudprovider.VolcengineProviderName: return volcengine.BuildVolcengine(opts, do, rl) + case cloudprovider.GridscaleProviderName: + return gridscale.BuildGridscale(opts, do, rl) } return nil } diff --git a/cluster-autoscaler/cloudprovider/cloud_provider.go b/cluster-autoscaler/cloudprovider/cloud_provider.go index 33719ac63f31..7ceb67a205a5 100644 --- a/cluster-autoscaler/cloudprovider/cloud_provider.go +++ b/cluster-autoscaler/cloudprovider/cloud_provider.go @@ -90,6 +90,8 @@ const ( CivoProviderName = "civo" // RancherProviderName gets the provider name of rancher RancherProviderName = "rancher" + // GridscaleProviderName gets the provider name of gridscale + GridscaleProviderName = "gridscale" ) // GpuConfig contains the label, type and the resource name for a GPU. diff --git a/cluster-autoscaler/cloudprovider/gridscale/README.md b/cluster-autoscaler/cloudprovider/gridscale/README.md new file mode 100644 index 000000000000..1b2b1b3fe363 --- /dev/null +++ b/cluster-autoscaler/cloudprovider/gridscale/README.md @@ -0,0 +1,38 @@ + +gridscale managed Kubernetes cluster-autoscaler is a tool that automatically adjusts the size of the Kubernetes cluster when the load changes. When the load is high, the cluster-autoscaler increases the size of the cluster, and when the load is low, it decreases the size of the cluster. + +**Note**: The cluster-autoscaler currently supports gridscale managed Kubernetes clusters with version ~> 1.25. + +**Note 2**: Currently, gridscale managed k8s only supports scaling down the last nodes. Due to that limitation of the gridscale API, a forked version of the cluster-autoscaler is used. + +## cluster-autoscaler deployment +### Prerequisites +1. A gridscale managed Kubernetes cluster. +2. Create an gridscale API token via panel. +3. kubectl is installed on your local machine. +4. kubectl is configured to access your gridscale managed Kubernetes cluster. + +### Deploy cluster-autoscaler +1. Download the cluster-autoscaler manifest file from [here](https://github.com/gridscale/autoscaler/blob/gsk-autoscaler-1.25.1/cluster-autoscaler/cloudprovider/gridscale/cluster-autoscaler-autodiscover.yaml) and save it as `cluster-autoscaler-autodiscover.yaml`. +2. If you use namespace `gsk-autoscaler` in your `cluster-autoscaler-autodiscover.yaml`, create a new namespace called `gsk-autoscaler` by running the following command: +```bash +$ kubectl create namespace gsk-autoscaler +``` +3. Insert your base64 encoded gridscale API user and token in the manifest file. +4. Insert your gridscale kubernetes cluster UUID in environment variable `CLUSTER_UUID` in the manifest file. +5. Change environment variable `CLUSTER_MAX_NODE_COUNT` in the manifest file to the maximum number of nodes you want to scale up to. (Optional) you can also change the minimum number of nodes by changing environment variable `CLUSTER_MIN_NODE_COUNT` (Default: 1) in the manifest file. +6. To configure parameters of the cluster-autoscaler, you can add flags to the command in the manifest file. All available flags and their default values can be found [here](https://github.com/gridscale/autoscaler/blob/gsk-autoscaler-1.25.1/cluster-autoscaler/FAQ.md#what-are-the-parameters-to-ca). +7. Deploy the cluster-autoscaler by running the following command: +```bash +$ kubectl apply -f cluster-autoscaler-autodiscover.yaml +``` +8. You can check the autoscaling activity by reading the configmap `cluster-autoscaler-status` in namespace `kube-system`, i.e.: +```bash +$ kubectl get configmap cluster-autoscaler-status -n gsk-autoscaler -o yaml +``` + +**Note**: the cluster-autoscaler will be deployed in namespace called `gsk-autoscaler`. + +## FAQ +### After upgrading my gridscle managed Kubernetes cluster, the cluster-autoscaler is not working anymore. What should I do? +Please make sure that the minor version of the cluster-autoscaler matches the minor version of your gridscale managed Kubernetes cluster. If not, please redeploy the cluster-autoscaler with the correct version. \ No newline at end of file diff --git a/cluster-autoscaler/cloudprovider/gridscale/cluster-autoscaler-autodiscover.yaml b/cluster-autoscaler/cloudprovider/gridscale/cluster-autoscaler-autodiscover.yaml new file mode 100644 index 000000000000..d0d109bd7d77 --- /dev/null +++ b/cluster-autoscaler/cloudprovider/gridscale/cluster-autoscaler-autodiscover.yaml @@ -0,0 +1,199 @@ +apiVersion: v1 +kind: Secret +metadata: + name: cloud-config + namespace: gsk-autoscaler +data: + # insert your base64 encoded gridscale access id and key here, ensure there's no trailing newline: + # such as: echo -n "token" | base64 + gridscale-uuid: "" + gridscale-token: "" +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + labels: + k8s-addon: cluster-autoscaler.addons.k8s.io + k8s-app: cluster-autoscaler + name: cluster-autoscaler + namespace: gsk-autoscaler +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: cluster-autoscaler + labels: + k8s-addon: cluster-autoscaler.addons.k8s.io + k8s-app: cluster-autoscaler +rules: + - apiGroups: [""] + resources: ["events", "endpoints"] + verbs: ["create", "patch"] + - apiGroups: [""] + resources: ["pods/eviction"] + verbs: ["create"] + - apiGroups: [""] + resources: ["pods/status"] + verbs: ["update"] + - apiGroups: [""] + resources: ["endpoints"] + resourceNames: ["cluster-autoscaler"] + verbs: ["get", "update"] + - apiGroups: [""] + resources: ["nodes"] + verbs: ["watch", "list", "get", "update"] + - apiGroups: [""] + resources: + - "namespaces" + - "pods" + - "services" + - "replicationcontrollers" + - "persistentvolumeclaims" + - "persistentvolumes" + verbs: ["watch", "list", "get"] + - apiGroups: ["extensions"] + resources: ["replicasets", "daemonsets"] + verbs: ["watch", "list", "get"] + - apiGroups: ["policy"] + resources: ["poddisruptionbudgets"] + verbs: ["watch", "list"] + - apiGroups: ["apps"] + resources: ["statefulsets", "replicasets", "daemonsets"] + verbs: ["watch", "list", "get"] + - apiGroups: ["storage.k8s.io"] + resources: + ["storageclasses", "csinodes", "csidrivers", "csistoragecapacities"] + verbs: ["watch", "list", "get"] + - apiGroups: ["batch", "extensions"] + resources: ["jobs"] + verbs: ["get", "list", "watch", "patch"] + - apiGroups: ["coordination.k8s.io"] + resources: ["leases"] + verbs: ["create"] + - apiGroups: ["coordination.k8s.io"] + resourceNames: ["cluster-autoscaler"] + resources: ["leases"] + verbs: ["get", "update"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: cluster-autoscaler + namespace: gsk-autoscaler + labels: + k8s-addon: cluster-autoscaler.addons.k8s.io + k8s-app: cluster-autoscaler +rules: + - apiGroups: [""] + resources: ["configmaps"] + verbs: ["create", "list", "watch"] + - apiGroups: [""] + resources: ["configmaps"] + resourceNames: + ["cluster-autoscaler-status", "cluster-autoscaler-priority-expander"] + verbs: ["delete", "get", "update", "watch"] + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: cluster-autoscaler + labels: + k8s-addon: cluster-autoscaler.addons.k8s.io + k8s-app: cluster-autoscaler +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: cluster-autoscaler +subjects: + - kind: ServiceAccount + name: cluster-autoscaler + namespace: gsk-autoscaler + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: cluster-autoscaler + namespace: gsk-autoscaler + labels: + k8s-addon: cluster-autoscaler.addons.k8s.io + k8s-app: cluster-autoscaler +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: cluster-autoscaler +subjects: + - kind: ServiceAccount + name: cluster-autoscaler + namespace: gsk-autoscaler + +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: cluster-autoscaler + namespace: gsk-autoscaler + labels: + app: cluster-autoscaler +spec: + replicas: 1 + selector: + matchLabels: + app: cluster-autoscaler + template: + metadata: + labels: + app: cluster-autoscaler + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "8085" + spec: + priorityClassName: system-cluster-critical + securityContext: + runAsNonRoot: true + runAsUser: 65534 + fsGroup: 65534 + seccompProfile: + type: RuntimeDefault + serviceAccountName: cluster-autoscaler + containers: + - image: registry.kubecuddle.io/k8s/cluster-autoscaler:gsk-v1.30.2 + name: cluster-autoscaler + resources: + limits: + cpu: 100m + memory: 600Mi + requests: + cpu: 100m + memory: 600Mi + command: + - ./cluster-autoscaler + - --v=4 + - --stderrthreshold=info + - --namespace=gsk-autoscaler + - --daemonset-eviction-for-occupied-nodes=false + env: + - name: API_UUID + valueFrom: + secretKeyRef: + name: cloud-config + key: gridscale-uuid + - name: API_TOKEN + valueFrom: + secretKeyRef: + name: cloud-config + key: gridscale-token + - name: CLUSTER_UUID + value: "00000000-0000-0000-0000-000000000000" + - name: CLUSTER_MIN_NODE_COUNT + value: "1" + - name: CLUSTER_MAX_NODE_COUNT + value: "10" + imagePullPolicy: "Always" + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true diff --git a/cluster-autoscaler/cloudprovider/gridscale/gridscale_cloud_provider.go b/cluster-autoscaler/cloudprovider/gridscale/gridscale_cloud_provider.go new file mode 100644 index 000000000000..e3b4b6903d07 --- /dev/null +++ b/cluster-autoscaler/cloudprovider/gridscale/gridscale_cloud_provider.go @@ -0,0 +1,200 @@ +/* +Copyright 2019 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package gridscale + +import ( + "fmt" + "io" + "os" + "strings" + + apiv1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" + "k8s.io/autoscaler/cluster-autoscaler/config" + "k8s.io/autoscaler/cluster-autoscaler/utils/errors" + "k8s.io/klog/v2" +) + +const gridscaleProviderIDPrefix = "gridscale.io://" + +var _ cloudprovider.CloudProvider = (*gridscaleCloudProvider)(nil) + +// gridscaleCloudProvider implements CloudProvider interface. +type gridscaleCloudProvider struct { + manager *Manager + resourceLimiter *cloudprovider.ResourceLimiter +} + +func newgridscaleCloudProvider(manager *Manager, rl *cloudprovider.ResourceLimiter) *gridscaleCloudProvider { + return &gridscaleCloudProvider{ + manager: manager, + resourceLimiter: rl, + } +} + +// Name returns name of the cloud provider. +func (d *gridscaleCloudProvider) Name() string { + return cloudprovider.GridscaleProviderName +} + +// NodeGroups returns all node groups configured for this cloud provider. +func (d *gridscaleCloudProvider) NodeGroups() []cloudprovider.NodeGroup { + nodeGroups := make([]cloudprovider.NodeGroup, len(d.manager.nodeGroups)) + for i, ng := range d.manager.nodeGroups { + nodeGroups[i] = ng + } + return nodeGroups +} + +// NodeGroupForNode returns the node group for the given node, nil if the node +// should not be processed by cluster autoscaler, or non-nil error if such +// occurred. Must be implemented. +func (d *gridscaleCloudProvider) NodeGroupForNode(node *apiv1.Node) (cloudprovider.NodeGroup, error) { + providerID := node.Spec.ProviderID + nodeID := toNodeID(providerID) + + klog.V(5).Infof("checking nodegroup for node ID: %q", nodeID) + + // NOTE(arslan): the number of node groups per cluster is usually very + // small. So even though this looks like quadratic runtime, it's OK to + // proceed with this. + for _, group := range d.manager.nodeGroups { + klog.V(5).Infof("iterating over node group %q", group.Id()) + nodes, err := group.Nodes() + if err != nil { + return nil, err + } + + for _, node := range nodes { + klog.V(6).Infof("checking node has: %q want: %q. %v", node.Id, providerID, node.Id == providerID) + // CA uses node.Spec.ProviderID when looking for (un)registered nodes, + // so we need to use it here too. + if node.Id != providerID { + klog.V(5).Infof("CONTINUE checking nodegroup for node ID: %q", node.Id) + continue + } + + return group, nil + } + } + + // there is no "ErrNotExist" error, so we have to return a nil error + return nil, nil +} + +// HasInstance returns whether a given node has a corresponding instance in this cloud provider +func (d *gridscaleCloudProvider) HasInstance(node *apiv1.Node) (bool, error) { + return true, cloudprovider.ErrNotImplemented +} + +// Pricing returns pricing model for this cloud provider or error if not +// available. Implementation optional. +func (d *gridscaleCloudProvider) Pricing() (cloudprovider.PricingModel, errors.AutoscalerError) { + return nil, cloudprovider.ErrNotImplemented +} + +// GetAvailableMachineTypes get all machine types that can be requested from +// the cloud provider. Implementation optional. +func (d *gridscaleCloudProvider) GetAvailableMachineTypes() ([]string, error) { + return []string{}, nil +} + +// NewNodeGroup builds a theoretical node group based on the node definition +// provided. The node group is not automatically created on the cloud provider +// side. The node group is not returned by NodeGroups() until it is created. +// Implementation optional. +func (d *gridscaleCloudProvider) NewNodeGroup( + machineType string, + labels map[string]string, + systemLabels map[string]string, + taints []apiv1.Taint, + extraResources map[string]resource.Quantity, +) (cloudprovider.NodeGroup, error) { + return nil, cloudprovider.ErrNotImplemented +} + +// GetResourceLimiter returns struct containing limits (max, min) for +// resources (cores, memory etc.). +func (d *gridscaleCloudProvider) GetResourceLimiter() (*cloudprovider.ResourceLimiter, error) { + return d.resourceLimiter, nil +} + +// GPULabel returns the label added to nodes with GPU resource. +func (d *gridscaleCloudProvider) GPULabel() string { + return "" +} + +// GetAvailableGPUTypes return all available GPU types cloud provider supports. +func (d *gridscaleCloudProvider) GetAvailableGPUTypes() map[string]struct{} { + return nil +} + +// Cleanup cleans up open resources before the cloud provider is destroyed, +// i.e. go routines etc. +func (d *gridscaleCloudProvider) Cleanup() error { + return nil +} + +// Refresh is called before every main loop and can be used to dynamically +// update cloud provider state. In particular the list of node groups returned +// by NodeGroups() can change as a result of CloudProvider.Refresh(). +func (d *gridscaleCloudProvider) Refresh() error { + klog.V(4).Info("Refreshing node group cache") + return d.manager.Refresh() +} + +func (d *gridscaleCloudProvider) GetNodeGpuConfig(node *apiv1.Node) *cloudprovider.GpuConfig { + return nil +} + +// BuildGridscale builds the gridscale cloud provider. +func BuildGridscale( + opts config.AutoscalingOptions, + do cloudprovider.NodeGroupDiscoveryOptions, + rl *cloudprovider.ResourceLimiter, +) cloudprovider.CloudProvider { + var configFile io.ReadCloser + if opts.CloudConfig != "" { + var err error + configFile, err = os.Open(opts.CloudConfig) + if err != nil { + klog.Fatalf("Couldn't open cloud provider configuration %s: %#v", opts.CloudConfig, err) + } + defer configFile.Close() + } + + manager, err := newManager() + if err != nil { + klog.Fatalf("Failed to create gridscale manager: %v", err) + } + + // the cloud provider automatically uses all node pools in gridscale. + // This means we don't use the cloudprovider.NodeGroupDiscoveryOptions + // flags (which can be set via '--node-group-auto-discovery' or '-nodes') + return newgridscaleCloudProvider(manager, rl) +} + +// toProviderID returns a provider ID from the given node ID. +func toProviderID(nodeID string) string { + return fmt.Sprintf("%s%s", gridscaleProviderIDPrefix, nodeID) +} + +// toNodeID returns a node or droplet ID from the given provider ID. +func toNodeID(providerID string) string { + return strings.TrimPrefix(providerID, gridscaleProviderIDPrefix) +} diff --git a/cluster-autoscaler/cloudprovider/gridscale/gridscale_manager.go b/cluster-autoscaler/cloudprovider/gridscale/gridscale_manager.go new file mode 100644 index 000000000000..6fd4b9df9fb3 --- /dev/null +++ b/cluster-autoscaler/cloudprovider/gridscale/gridscale_manager.go @@ -0,0 +1,164 @@ +/* +Copyright 2019 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package gridscale + +import ( + "context" + "errors" + "fmt" + "os" + "strconv" + + "github.com/gridscale/gsclient-go/v3" +) + +const ( + defaultGridscaleAPIURL = "https://api.gridscale.io" + defaultDelayIntervalMilliSecs = 5000 + defaultMaxNumberOfRetries = 5 + defaultMinNodeCount = 1 + + gridscaleK8sActiveStatus = "active" +) + +type nodeGroupClient interface { + GetPaaSService(ctx context.Context, id string) (gsclient.PaaSService, error) + + UpdatePaaSService(ctx context.Context, id string, body gsclient.PaaSServiceUpdateRequest) error + + GetServerList(ctx context.Context) ([]gsclient.Server, error) +} + +// Manager handles gridscale communication and data caching of +// node groups (node pools in DOKS) +type Manager struct { + client nodeGroupClient + clusterUUID string + nodeGroups []*NodeGroup + maxNodeCount int + minNodeCount int +} + +func newManager() (*Manager, error) { + gridscaleUUID := os.Getenv("API_UUID") + if gridscaleUUID == "" { + return nil, errors.New("env var API_UUID is not provided") + } + gridscaleToken := os.Getenv("API_TOKEN") + if gridscaleToken == "" { + return nil, errors.New("env var API_TOKEN is not provided") + } + gskClusterUUID := os.Getenv("CLUSTER_UUID") + if gskClusterUUID == "" { + return nil, errors.New("env var CLUSTER_UUID is not provided") + } + minNodeCount := defaultMinNodeCount + minNodeCountStr := os.Getenv("CLUSTER_MIN_NODE_COUNT") + if minNodeCountStr != "" { + var err error + // convert minNodeCount to int + minNodeCount, err = strconv.Atoi(minNodeCountStr) + if err != nil { + return nil, fmt.Errorf("env var CLUSTER_MIN_NODE_COUNT is not a valid integer: %v", err) + } + } + // min node count must be at least 1 + if minNodeCount < 1 { + return nil, errors.New("env var CLUSTER_MIN_NODE_COUNT must be at least 1") + } + maxNodeCountStr := os.Getenv("CLUSTER_MAX_NODE_COUNT") + if maxNodeCountStr == "" { + return nil, errors.New("env var CLUSTER_MAX_NODE_COUNT is not provided") + } + // convert maxNodeCount to int + maxNodeCount, err := strconv.Atoi(maxNodeCountStr) + if err != nil { + return nil, fmt.Errorf("env var CLUSTER_MAX_NODE_COUNT is not a valid integer: %v", err) + } + // max node count must be larger than min node count + if maxNodeCount < minNodeCount { + return nil, errors.New("env var CLUSTER_MAX_NODE_COUNT must be larger than CLUSTER_MIN_NODE_COUNT") + } + apiURL := defaultGridscaleAPIURL + envVarApiURL := os.Getenv("API_API_URL") + if envVarApiURL != "" { + apiURL = envVarApiURL + } + gsConfig := gsclient.NewConfiguration(apiURL, gridscaleUUID, gridscaleToken, false, true, defaultDelayIntervalMilliSecs, defaultMaxNumberOfRetries) + client := gsclient.NewClient(gsConfig) + // check if gsk cluster exists + _, err = client.GetPaaSService(context.Background(), gskClusterUUID) + if err != nil { + return nil, fmt.Errorf("failed to get gsk cluster: %v", err) + } + m := &Manager{ + client: client, + clusterUUID: gskClusterUUID, + nodeGroups: make([]*NodeGroup, 0), + maxNodeCount: maxNodeCount, + minNodeCount: minNodeCount, + } + + return m, nil +} + +// Refresh refreshes the cache holding the nodegroups. This is called by the CA +// based on the `--scan-interval`. By default it's 10 seconds. +func (m *Manager) Refresh() error { + ctx := context.Background() + + k8sCluster, err := m.client.GetPaaSService(ctx, m.clusterUUID) + if err != nil { + return err + } + // if k8s cluster's status is not active, return error + if k8sCluster.Properties.Status != gridscaleK8sActiveStatus { + return fmt.Errorf("k8s cluster status is not active: %s", k8sCluster.Properties.Status) + } + nodePools, ok := k8sCluster.Properties.Parameters["pools"].([]interface{}) + if !ok { + return errors.New("'pools' is not found in cluster parameters") + } + nodeGroupList := make([]*NodeGroup, 0) + for _, pool := range nodePools { + nodePoolProperties, ok := pool.(map[string]interface{}) + if !ok { + return errors.New("node pool properties is not a map") + } + nodePoolName, ok := nodePoolProperties["name"].(string) + if !ok { + return errors.New("'name' is not found in node pool properties") + } + nodePoolCount, ok := nodePoolProperties["count"].(float64) + if !ok { + return errors.New("'count' is not found in node pool properties") + } + nodeGroup := &NodeGroup{ + id: fmt.Sprintf("%s-%s", m.clusterUUID, nodePoolName), + name: nodePoolName, + clusterUUID: m.clusterUUID, + client: m.client, + nodeCount: int(nodePoolCount), + minSize: m.minNodeCount, + maxSize: m.maxNodeCount, + } + nodeGroupList = append(nodeGroupList, nodeGroup) + } + + m.nodeGroups = nodeGroupList + return nil +} diff --git a/cluster-autoscaler/cloudprovider/gridscale/gridscale_node_group.go b/cluster-autoscaler/cloudprovider/gridscale/gridscale_node_group.go new file mode 100644 index 000000000000..7fae55336cea --- /dev/null +++ b/cluster-autoscaler/cloudprovider/gridscale/gridscale_node_group.go @@ -0,0 +1,403 @@ +/* +Copyright 2019 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package gridscale + +import ( + "context" + "errors" + "fmt" + "strings" + + "github.com/gridscale/gsclient-go/v3" + apiv1 "k8s.io/api/core/v1" + "k8s.io/klog/v2" + + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" + "k8s.io/autoscaler/cluster-autoscaler/config" + schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework" +) + +var ( + // ErrNodePoolNotExist is return if no node pool exists for a given cluster ID + ErrNodePoolNotExist = errors.New("node pool does not exist") +) + +// NodeGroup implements cloudprovider.NodeGroup interface. NodeGroup contains +// configuration info and functions to control a set of nodes that have the +// same capacity and set of labels. +type NodeGroup struct { + id string + name string + clusterUUID string + client nodeGroupClient + nodeCount int + minSize int + maxSize int +} + +// MaxSize returns maximum size of the node group. +func (n *NodeGroup) MaxSize() int { + return n.maxSize +} + +// MinSize returns minimum size of the node group. +func (n *NodeGroup) MinSize() int { + return n.minSize +} + +// TargetSize returns the current target size of the node group. It is possible +// that the number of nodes in Kubernetes is different at the moment but should +// be equal to Size() once everything stabilizes (new nodes finish startup and +// registration or removed nodes are deleted completely). Implementation +// required. +func (n *NodeGroup) TargetSize() (int, error) { + return n.nodeCount, nil +} + +// IncreaseSize increases the size of the node group. To delete a node you need +// to explicitly name it and use DeleteNode. This function should wait until +// node group size is updated. Implementation required. +func (n *NodeGroup) IncreaseSize(delta int) error { + if delta <= 0 { + return fmt.Errorf("delta must be positive, have: %d", delta) + } + + targetSize := n.nodeCount + delta + + if targetSize > n.MaxSize() { + return fmt.Errorf("size increase is too large. current: %d desired: %d max: %d", + n.nodeCount, targetSize, n.MaxSize()) + } + + ctx := context.Background() + k8sCluster, err := n.client.GetPaaSService(ctx, n.clusterUUID) + if err != nil { + return err + } + paramenters := k8sCluster.Properties.Parameters + // Update the node count of the node group + nodePools, ok := paramenters["pools"].([]interface{}) + if !ok { + return errors.New("'pools' is not found in cluster parameters") + } + // find the node pool that we want to update + for i, pool := range nodePools { + nodePoolProperties, ok := pool.(map[string]interface{}) + if !ok { + return errors.New("node pool properties is not a map") + } + nodePoolName, ok := nodePoolProperties["name"].(string) + if !ok { + return errors.New("'name' is not found in node pool properties") + } + if nodePoolName == n.name { + nodePoolProperties["count"] = targetSize + nodePools[i] = nodePoolProperties + break + } + } + paramenters["pools"] = nodePools + updateRequestBody := gsclient.PaaSServiceUpdateRequest{ + Parameters: paramenters, + } + err = n.client.UpdatePaaSService(ctx, n.clusterUUID, updateRequestBody) + if err != nil { + return err + } + // update internal cache + n.nodeCount = targetSize + return nil +} + +// AtomicIncreaseSize is not implemented. +func (n *NodeGroup) AtomicIncreaseSize(delta int) error { + return cloudprovider.ErrNotImplemented +} + +// DeleteNodes deletes nodes from this node group (and also increasing the size +// of the node group with that). Error is returned either on failure or if the +// given node doesn't belong to this node group. This function should wait +// until node group size is updated. Implementation required. +func (n *NodeGroup) DeleteNodes(nodes []*apiv1.Node) error { + klog.V(4).Infof("Deleting nodes: %v from node group %s", nodes, n.name) + + targetSize := n.nodeCount - len(nodes) + ctx := context.Background() + k8sCluster, err := n.client.GetPaaSService(ctx, n.clusterUUID) + if err != nil { + return err + } + paramenters := k8sCluster.Properties.Parameters + // Update the node count of the node group + nodePools, ok := paramenters["pools"].([]interface{}) + if !ok { + return errors.New("'pools' is not found in cluster parameters") + } + // find the node pool that we want to update + for i, pool := range nodePools { + nodePoolProperties, ok := pool.(map[string]interface{}) + if !ok { + return errors.New("node pool properties is not a map") + } + nodePoolName, ok := nodePoolProperties["name"].(string) + if !ok { + return errors.New("'name' is not found in node pool properties") + } + if nodePoolName == n.name { + nodePoolProperties["count"] = targetSize + nodePools[i] = nodePoolProperties + break + } + } + paramenters["pools"] = nodePools + updateRequestBody := gsclient.PaaSServiceUpdateRequest{ + Parameters: paramenters, + } + err = n.client.UpdatePaaSService(ctx, n.clusterUUID, updateRequestBody) + if err != nil { + return err + } + // update internal cache + n.nodeCount = targetSize + return nil +} + +// DecreaseTargetSize decreases the target size of the node group. This function +// doesn't permit to delete any existing node and can be used only to reduce the +// request for new nodes that have not been yet fulfilled. Delta should be negative. +// It is assumed that cloud provider will not delete the existing nodes when there +// is an option to just decrease the target. Implementation required. +func (n *NodeGroup) DecreaseTargetSize(delta int) error { + if delta >= 0 { + return fmt.Errorf("delta must be negative, have: %d", delta) + } + + targetSize := n.nodeCount + delta + if targetSize < n.MinSize() { + return fmt.Errorf("size decrease is too small. current: %d desired: %d min: %d", + n.nodeCount, targetSize, n.MinSize()) + } + + ctx := context.Background() + k8sCluster, err := n.client.GetPaaSService(ctx, n.clusterUUID) + if err != nil { + return err + } + paramenters := k8sCluster.Properties.Parameters + // Update the node count of the node group + nodePools, ok := paramenters["pools"].([]interface{}) + if !ok { + return errors.New("'pools' is not found in cluster parameters") + } + // find the node pool that we want to update + for i, pool := range nodePools { + nodePoolProperties, ok := pool.(map[string]interface{}) + if !ok { + return errors.New("node pool properties is not a map") + } + nodePoolName, ok := nodePoolProperties["name"].(string) + if !ok { + return errors.New("'name' is not found in node pool properties") + } + if nodePoolName == n.name { + nodePoolProperties["count"] = targetSize + nodePools[i] = nodePoolProperties + break + } + } + paramenters["pools"] = nodePools + updateRequestBody := gsclient.PaaSServiceUpdateRequest{ + Parameters: paramenters, + } + err = n.client.UpdatePaaSService(ctx, n.clusterUUID, updateRequestBody) + if err != nil { + return err + } + // update internal cache + n.nodeCount = targetSize + return nil +} + +// Id returns an unique identifier of the node group. +func (n *NodeGroup) Id() string { + return n.id +} + +// Debug returns a string containing all information regarding this node group. +func (n *NodeGroup) Debug() string { + return fmt.Sprintf("cluster ID: %s (min:%d max:%d)", n.Id(), n.MinSize(), n.MaxSize()) +} + +// Nodes returns a list of all nodes that belong to this node group. It is +// required that Instance objects returned by this method have Id field set. +// Other fields are optional. +// TODO: identify which nodes belong to this node group +func (n *NodeGroup) Nodes() ([]cloudprovider.Instance, error) { + //TODO(arslan): after increasing a node pool, the number of nodes is not + //anymore equal to the cache here. We should return a placeholder node for + //that. As an example PR check this out: + //https://github.com/kubernetes/autoscaler/pull/2235 + + serverList, err := n.client.GetServerList(context.Background()) + if err != nil { + return nil, err + } + + var gskNodeList []gsclient.Server + for _, server := range serverList { + if n.doesNodeMatch(server) { + gskNodeList = append(gskNodeList, server) + } + } + nodeList := toInstances(gskNodeList) + + klog.V(4).Infof("Node list: %v ", nodeList) + return nodeList, nil +} + +func (n *NodeGroup) doesNodeMatch(server gsclient.Server) bool { + // skip master node + if strings.Contains(server.Properties.Name, "master") { + return false + } + + // write server labels into a map for easier lookup + labelMap := make(map[string]struct{}, len(server.Properties.Labels)) + + containsPoolLabel := false + + for _, label := range server.Properties.Labels { + labelMap[label] = struct{}{} + + if strings.HasPrefix(label, "#gsk-pool#") { + containsPoolLabel = true + } + } + + // Build expected labels which relate the server to this node group + expectedClusterLabel := fmt.Sprintf("#gsk#%s", n.clusterUUID) + expectedPoolLabel := fmt.Sprintf("#gsk-pool#%s", n.name) + + // skip nodes not belonging to this cluster + if _, found := labelMap[expectedClusterLabel]; !found { + return false + } + + // Keep nodes which have the pool label (added in GSK 1.31) + if _, found := labelMap[expectedPoolLabel]; found { + return true + } + + // Here the server is part of this cluster but not of this pool. + // For backwards compatibility it could be that this cluster does not (yet) have the newly introduced + // "#gsk-pool#" label. We want to handle that. + // If the server has ANY pool label, we assume it correctly does not belong to this node group. + // If the server does not have ANY pool label, it may have not yet been labeled after the upgrade. + // In the last case, we match based on the server name. + if !containsPoolLabel { + // keep nodes whose name indicates they belong to this node group + return strings.Contains(server.Properties.Name, n.name) + } + + return false +} + +// TemplateNodeInfo returns a schedulerframework.NodeInfo structure of an empty +// (as if just started) node. This will be used in scale-up simulations to +// predict what would a new node look like if a node group was expanded. The +// returned NodeInfo is expected to have a fully populated Node object, with +// all of the labels, capacity and allocatable information as well as all pods +// that are started on the node by default, using manifest (most likely only +// kube-proxy). Implementation optional. +func (n *NodeGroup) TemplateNodeInfo() (*schedulerframework.NodeInfo, error) { + return nil, cloudprovider.ErrNotImplemented +} + +// Exist checks if the node group really exists on the cloud provider side. +// Allows to tell the theoretical node group from the real one. Implementation +// required. +func (n *NodeGroup) Exist() bool { + return true +} + +// Create creates the node group on the cloud provider side. Implementation +// optional. +func (n *NodeGroup) Create() (cloudprovider.NodeGroup, error) { + return nil, cloudprovider.ErrNotImplemented +} + +// Delete deletes the node group on the cloud provider side. This will be +// executed only for autoprovisioned node groups, once their size drops to 0. +// Implementation optional. +func (n *NodeGroup) Delete() error { + return cloudprovider.ErrNotImplemented +} + +// Autoprovisioned returns true if the node group is autoprovisioned. An +// autoprovisioned group was created by CA and can be deleted when scaled to 0. +func (n *NodeGroup) Autoprovisioned() bool { + return false +} + +// GetOptions returns NodeGroupAutoscalingOptions that should be used for this particular +// NodeGroup. Returning a nil will result in using default options. +func (n *NodeGroup) GetOptions(defaults config.NodeGroupAutoscalingOptions) (*config.NodeGroupAutoscalingOptions, error) { + return nil, cloudprovider.ErrNotImplemented +} + +// toInstances converts a slice of gsclient.Server to +// cloudprovider.Instance +func toInstances(nodes []gsclient.Server) []cloudprovider.Instance { + instances := make([]cloudprovider.Instance, 0, len(nodes)) + for _, nd := range nodes { + instances = append(instances, toInstance(nd)) + } + return instances +} + +// toInstance converts the given gsclient.Server to a +// cloudprovider.Instance +func toInstance(node gsclient.Server) cloudprovider.Instance { + return cloudprovider.Instance{ + Id: toProviderID(node.Properties.ObjectUUID), + Status: toInstanceStatus(node.Properties.Status), + } +} + +// toInstanceStatus converts the given node status to a +// cloudprovider.InstanceStatus +func toInstanceStatus(nodeStatus string) *cloudprovider.InstanceStatus { + if nodeStatus == "" { + return nil + } + + st := &cloudprovider.InstanceStatus{} + switch nodeStatus { + case "provisioning": + st.State = cloudprovider.InstanceCreating + case "active": + st.State = cloudprovider.InstanceRunning + default: + st.ErrorInfo = &cloudprovider.InstanceErrorInfo{ + ErrorClass: cloudprovider.OtherErrorClass, + ErrorCode: "no-code-gridscale", + ErrorMessage: fmt.Sprintf("unknown node status: %s", nodeStatus), + } + } + + return st +} diff --git a/cluster-autoscaler/cloudprovider/gridscale/gridscale_node_group_test.go b/cluster-autoscaler/cloudprovider/gridscale/gridscale_node_group_test.go new file mode 100644 index 000000000000..0bfcbcf77085 --- /dev/null +++ b/cluster-autoscaler/cloudprovider/gridscale/gridscale_node_group_test.go @@ -0,0 +1,266 @@ +package gridscale + +import ( + "context" + "errors" + "fmt" + "github.com/gridscale/gsclient-go/v3" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "testing" +) + +type nodeGroupMockClient struct { + GetPaaSServiceFunc func(ctx context.Context, id string) (gsclient.PaaSService, error) + UpdatePaaSServiceFunc func(ctx context.Context, id string, body gsclient.PaaSServiceUpdateRequest) error + GetServerListFunc func(ctx context.Context) ([]gsclient.Server, error) +} + +var _ = nodeGroupClient(&nodeGroupMockClient{}) // Interface guard + +func (n nodeGroupMockClient) GetPaaSService(ctx context.Context, id string) (gsclient.PaaSService, error) { + if n.GetPaaSServiceFunc != nil { + return n.GetPaaSServiceFunc(ctx, id) + } + panic("GetPaaSServiceFunc is not set") +} + +func (n nodeGroupMockClient) UpdatePaaSService(ctx context.Context, id string, body gsclient.PaaSServiceUpdateRequest) error { + if n.UpdatePaaSServiceFunc != nil { + return n.UpdatePaaSServiceFunc(ctx, id, body) + } + panic("UpdatePaaSServiceFunc is not set") +} + +func (n nodeGroupMockClient) GetServerList(ctx context.Context) ([]gsclient.Server, error) { + if n.GetServerListFunc != nil { + return n.GetServerListFunc(ctx) + } + panic("GetServerListFunc is not set") +} + +func TestNodeGroup_Nodes(t *testing.T) { + var client *nodeGroupMockClient + var group *NodeGroup + + // clusterID is the ID if this node groups cluster. + const clusterID = "42" + // nodePoolName is the name of this node group (= node pool). + const nodePoolName = "pool-dev" + + setup := func() { + client = &nodeGroupMockClient{} + group = &NodeGroup{ + client: client, + clusterUUID: clusterID, + name: nodePoolName, + // TODO: Add other relevant fields + } + } + + t.Run("returns no nodes if api returns an empty server list", func(t *testing.T) { + setup() + + client.GetServerListFunc = func(ctx context.Context) ([]gsclient.Server, error) { + return make([]gsclient.Server, 0), nil + } + + nodes, err := group.Nodes() + require.NoError(t, err) + require.Len(t, nodes, 0) + }) + + t.Run("does not return the master server", func(t *testing.T) { + setup() + + client.GetServerListFunc = func(ctx context.Context) ([]gsclient.Server, error) { + return []gsclient.Server{ + { + Properties: gsclient.ServerProperties{ + Name: "test-cluster-master-0", + Labels: []string{ + "#gsk#12345", + }, + }, + }, + }, nil + } + + nodes, err := group.Nodes() + require.NoError(t, err) + require.Len(t, nodes, 0) + }) + + t.Run("returns error if client returns an error", func(t *testing.T) { + setup() + + client.GetServerListFunc = func(ctx context.Context) ([]gsclient.Server, error) { + return nil, errors.New("some unexpected error") + } + + nodes, err := group.Nodes() + assert.Error(t, err) + assert.Empty(t, nodes) + }) + + t.Run("returns no nodes if no server belongs to this cluster", func(t *testing.T) { + setup() + + client.GetServerListFunc = func(ctx context.Context) ([]gsclient.Server, error) { + return []gsclient.Server{ + // Server which belongs to another cluster + { + Properties: gsclient.ServerProperties{ + Name: "some-other-cluster-node-pool-dev-0", + Labels: []string{ + "#gsk#12345", + "#gsk-pool#pool-dev", + }, + }, + }, + // Server which does not even belong to any cluster + { + Properties: gsclient.ServerProperties{ + Name: "my-server", + Labels: []string{ + "All your server are belong to us", + }, + }, + }, + }, nil + } + + nodes, err := group.Nodes() + require.NoError(t, err) + assert.Empty(t, nodes) + }) + + t.Run("returns nodes which belongs to this cluster and node pool", func(t *testing.T) { + setup() + + client.GetServerListFunc = func(ctx context.Context) ([]gsclient.Server, error) { + return []gsclient.Server{ + // Server which belongs to another cluster but with the same pool name + { + Properties: gsclient.ServerProperties{ + Name: "some-other-cluster-node-pool-dev-0", + Labels: []string{ + "#gsk#12345", + "#gsk-pool#pool-dev", + }, + }, + }, + // Server which belongs to this cluster and the dev pool + { + Properties: gsclient.ServerProperties{ + Name: "my-cluster-node-pool-dev-0", + Labels: []string{ + fmt.Sprintf("#gsk#%s", clusterID), + "#gsk-pool#pool-dev", + }, + }, + }, + // Server which belongs to this cluster and the dev pool + { + Properties: gsclient.ServerProperties{ + Name: "my-cluster-node-pool-dev-1", + Labels: []string{ + fmt.Sprintf("#gsk#%s", clusterID), + "#gsk-pool#pool-dev", + }, + }, + }, + // Server which belongs to this cluster but to another node pool, should not be included! + { + Properties: gsclient.ServerProperties{ + Name: "my-cluster-node-pool-prod-1", + Labels: []string{ + fmt.Sprintf("#gsk#%s", clusterID), + "#gsk-pool#pool-prod", + }, + }, + }, + }, nil + } + + nodes, err := group.Nodes() + require.NoError(t, err) + assert.Len(t, nodes, 2) + }) + + t.Run("returns nodes which belongs to this cluster and node pool even on weired node pool names", func(t *testing.T) { + setup() + + // This test assumes we have 2 pools named "pool0" and "pool01", which could be used + // by users. + group.name = "pool0" + + client.GetServerListFunc = func(ctx context.Context) ([]gsclient.Server, error) { + return []gsclient.Server{ + // Server which belongs to pool0 + { + Properties: gsclient.ServerProperties{ + Name: "my-cluster-node-pool0-0", + Labels: []string{ + fmt.Sprintf("#gsk#%s", clusterID), + "#gsk-pool#pool0", + }, + }, + }, + // Server which belongs to pool1 + { + Properties: gsclient.ServerProperties{ + Name: "my-cluster-node-pool01-0", + Labels: []string{ + fmt.Sprintf("#gsk#%s", clusterID), + "#gsk-pool#pool01", + }, + }, + }, + }, nil + } + + nodes, err := group.Nodes() + require.NoError(t, err) + assert.Len(t, nodes, 1) + }) + + t.Run("returns nodes based on server name if no pool name is set", func(t *testing.T) { + setup() + + // This test assumes we have 2 pools named "pool0" and "pool01", which could be used + // by users. + group.name = "pool0" + + client.GetServerListFunc = func(ctx context.Context) ([]gsclient.Server, error) { + return []gsclient.Server{ + // Server which belongs to pool0 + { + Properties: gsclient.ServerProperties{ + Name: "my-cluster-node-pool0-0", + Labels: []string{ + fmt.Sprintf("#gsk#%s", clusterID), + }, + }, + }, + // Server which belongs to pool1 + { + Properties: gsclient.ServerProperties{ + Name: "my-cluster-node-pool01-0", + Labels: []string{ + fmt.Sprintf("#gsk#%s", clusterID), + }, + }, + }, + }, nil + } + + nodes, err := group.Nodes() + require.NoError(t, err) + + // Why 2 nodes? Because the implementation will fallback to matching servers based on their name. + // This was the logic used before the #gsk-pool# label was introduced. For compatibility reasons + // we keep this behavior, if a server does not have that label yet. + assert.Len(t, nodes, 2) + }) +} diff --git a/cluster-autoscaler/core/scaledown/actuation/actuator.go b/cluster-autoscaler/core/scaledown/actuation/actuator.go index eccbe4c0884b..89386f74a19f 100644 --- a/cluster-autoscaler/core/scaledown/actuation/actuator.go +++ b/cluster-autoscaler/core/scaledown/actuation/actuator.go @@ -17,6 +17,8 @@ limitations under the License. package actuation import ( + "fmt" + "sort" "strings" "time" @@ -70,6 +72,13 @@ type actuatorNodeGroupConfigGetter interface { GetIgnoreDaemonSetsUtilization(nodeGroup cloudprovider.NodeGroup) (bool, error) } +type NodeGroupWithNodes struct { + Group cloudprovider.NodeGroup + All []*apiv1.Node + Empty []*apiv1.Node + Drain []*apiv1.Node +} + // NewActuator returns a new instance of Actuator. func NewActuator(ctx *context.AutoscalingContext, scaleStateNotifier nodegroupchange.NodeGroupChangeObserver, ndt *deletiontracker.NodeDeletionTracker, deleteOptions options.NodeDeleteOptions, drainabilityRules rules.Rules, configGetter actuatorNodeGroupConfigGetter) *Actuator { ndb := NewNodeDeletionBatcher(ctx, scaleStateNotifier, ndt, ctx.NodeDeletionBatcherInterval) @@ -148,6 +157,250 @@ func (a *Actuator) StartDeletion(empty, drain []*apiv1.Node) (status.ScaleDownRe return status.ScaleDownNodeDeleteStarted, scaledDownNodes, nil } +// groupNodesByNodeGroup groups empty and drain nodes by their node group. +// If sortByNodeName is true, the nodes in each group will be sorted alphabetically by node name. +func (a *Actuator) groupNodesByNodeGroup(empty, drain, all []*apiv1.Node, sortByNodeName bool) (map[string]NodeGroupWithNodes, errors.AutoscalerError) { + grouped := map[string]NodeGroupWithNodes{} + for _, node := range empty { + nodeGroup, err := a.ctx.CloudProvider.NodeGroupForNode(node) + if err != nil { + return nil, errors.NewAutoscalerError(errors.CloudProviderError, "failed to find node group for %s: %v", node.Name, err) + } + if _, ok := grouped[nodeGroup.Id()]; !ok { + grouped[nodeGroup.Id()] = NodeGroupWithNodes{ + Group: nodeGroup, + All: []*apiv1.Node{}, + Empty: []*apiv1.Node{}, + Drain: []*apiv1.Node{}, + } + } + currentNodeGroupWithNodes := grouped[nodeGroup.Id()] + currentNodeGroupWithNodes.Empty = append(currentNodeGroupWithNodes.Empty, node) + grouped[nodeGroup.Id()] = currentNodeGroupWithNodes + } + + for _, node := range drain { + nodeGroup, err := a.ctx.CloudProvider.NodeGroupForNode(node) + if err != nil { + return nil, errors.NewAutoscalerError(errors.CloudProviderError, "failed to find node group for %s: %v", node.Name, err) + } + if _, ok := grouped[nodeGroup.Id()]; !ok { + grouped[nodeGroup.Id()] = NodeGroupWithNodes{ + Group: nodeGroup, + All: []*apiv1.Node{}, + Empty: []*apiv1.Node{}, + Drain: []*apiv1.Node{}, + } + } + currentNodeGroupWithNodes := grouped[nodeGroup.Id()] + currentNodeGroupWithNodes.Drain = append(currentNodeGroupWithNodes.Drain, node) + grouped[nodeGroup.Id()] = currentNodeGroupWithNodes + } + + for _, node := range all { + nodeGroup, err := a.ctx.CloudProvider.NodeGroupForNode(node) + if err != nil { + return nil, errors.NewAutoscalerError(errors.CloudProviderError, "failed to find node group for %s: %v", node.Name, err) + } + if _, ok := grouped[nodeGroup.Id()]; !ok { + grouped[nodeGroup.Id()] = NodeGroupWithNodes{ + Group: nodeGroup, + All: []*apiv1.Node{}, + Empty: []*apiv1.Node{}, + Drain: []*apiv1.Node{}, + } + } + currentNodeGroupWithNodes := grouped[nodeGroup.Id()] + currentNodeGroupWithNodes.All = append(currentNodeGroupWithNodes.All, node) + grouped[nodeGroup.Id()] = currentNodeGroupWithNodes + } + // if sortByNodeName is true, sort the nodes alphabetically by node name in each group + if sortByNodeName { + for _, nodeGroupWithNodes := range grouped { + sort.Slice(nodeGroupWithNodes.Empty, func(i, j int) bool { + iNameLower := strings.ToLower(nodeGroupWithNodes.Empty[i].Name) + jNameLower := strings.ToLower(nodeGroupWithNodes.Empty[j].Name) + return iNameLower < jNameLower + }) + sort.Slice(nodeGroupWithNodes.Drain, func(i, j int) bool { + iNameLower := strings.ToLower(nodeGroupWithNodes.Drain[i].Name) + jNameLower := strings.ToLower(nodeGroupWithNodes.Drain[j].Name) + return iNameLower < jNameLower + }) + sort.Slice(nodeGroupWithNodes.All, func(i, j int) bool { + iNameLower := strings.ToLower(nodeGroupWithNodes.All[i].Name) + jNameLower := strings.ToLower(nodeGroupWithNodes.All[j].Name) + return iNameLower < jNameLower + }) + } + } + return grouped, nil +} + +// StartDeletionForGridscaleProvider triggers a new deletion process for gridscale provider. +// *NOTE* gridscale provider does not support deletion of specific nodes. Gridscale provider only supports +// scale up/down by changing the number of nodes in the cluster. For the case of scale down, the last n nodes are +// deleted automatically by the provider. Therefore, we need to follow theses steps: +// 1. Count the number of nodes to be deleted (including to-be-deleted empty and to-be-deleted non-empty nodes). +// 2. Replace the to-be-deleted nodes with the last n nodes in the cluster. +// 3. Taint & drain the to-be-deleted nodes. +// 4. Delete the last n nodes in the cluster. + +// NOTE: I drain the wrong nodes. +func (a *Actuator) StartDeletionForGridscaleProvider(empty, drain, all []*apiv1.Node) (status.ScaleDownResult, []*status.ScaleDownNode, errors.AutoscalerError) { + a.nodeDeletionScheduler.ResetAndReportMetrics() + deletionStartTime := time.Now() + defer func() { metrics.UpdateDuration(metrics.ScaleDownNodeDeletion, time.Since(deletionStartTime)) }() + if len(empty)+len(drain) >= len(all) { + // If the number of nodes to be deleted is greater than or equal to the number of nodes in the cluster, + // we cannot delete the nodes. Return an error. + return status.ScaleDownError, nil, errors.NewAutoscalerError( + errors.InternalError, + "cannot delete nodes because the number of nodes to be deleted is greater than or equal to the number of nodes in the cluster. There has to be at least one node left in the cluster.", + ) + } + + // Group the emtpy/drain nodes by node group. + nodesToDeleteByNodeGroup, err := a.groupNodesByNodeGroup(empty, drain, all, true) + if err != nil { + return status.ScaleDownError, nil, err + } + + var scaledDownNodes []*status.ScaleDownNode + // Scale down nodes for each node group. One node group at a time. + for nodeGroupID, nodeGroupWithNodes := range nodesToDeleteByNodeGroup { + klog.V(4).Infof(" ------ Start scaling down nodes for node group %s", nodeGroupID) + emptyToDeleteByGroup := []*apiv1.Node{} + drainToDeleteByGroup := []*apiv1.Node{} + emptyToDeleteNodeGroupViews, drainToDeleteNodeGroupViews := a.budgetProcessor.CropNodes( + a.nodeDeletionTracker, + nodeGroupWithNodes.Empty, + nodeGroupWithNodes.Drain, + ) + for _, bucket := range emptyToDeleteNodeGroupViews { + emptyToDeleteByGroup = append(emptyToDeleteByGroup, bucket.Nodes...) + } + for _, bucket := range drainToDeleteNodeGroupViews { + drainToDeleteByGroup = append(drainToDeleteByGroup, bucket.Nodes...) + } + if len(emptyToDeleteByGroup) == 0 && len(drainToDeleteByGroup) == 0 { + return status.ScaleDownNoNodeDeleted, nil, nil + } + + klog.V(4).Infof("[**]Original empty nodes in node group %s (count: %d):", nodeGroupID, len(emptyToDeleteByGroup)) + for _, node := range emptyToDeleteByGroup { + klog.V(4).Infof("\t-\t%s\n", node.Name) + } + klog.V(4).Infof("[**]Original drain nodes in node group %s (count: %d):", nodeGroupID, len(drainToDeleteByGroup)) + for _, node := range drainToDeleteByGroup { + klog.V(4).Infof("\t-\t%s\n", node.Name) + } + + // copy the all nodes (for safety). + copiedAllByGroup := make([]*apiv1.Node, len(nodeGroupWithNodes.All)) + copy(copiedAllByGroup, nodeGroupWithNodes.All) + // Replace the to-be-deleted nodes with the last n nodes in the group. + var nodesToDeleteByGroup []*apiv1.Node + nodesToDeleteCountByGroup := len(emptyToDeleteByGroup) + len(drainToDeleteByGroup) + if nodesToDeleteCountByGroup > 0 { + if nodesToDeleteCountByGroup > len(copiedAllByGroup) { + return status.ScaleDownError, nil, errors.NewAutoscalerError( + errors.InternalError, + fmt.Sprintf("cannot delete nodes because the number of nodes to be deleted is greater than the total node count in the node group %s.", nodeGroupID), + ) + } + nodesToDeleteByGroup = copiedAllByGroup[len(copiedAllByGroup)-nodesToDeleteCountByGroup:] + } + klog.V(4).Info("[**]New empty nodes to delete: ", len(nodesToDeleteByGroup)) + for _, node := range nodesToDeleteByGroup { + klog.V(4).Infof("\t-\t%s\n", node.Name) + } + + // Clean taint from OLD to-be-deleted nodes + oldToBeDeletedNodes := append(emptyToDeleteByGroup, drainToDeleteByGroup...) + for _, node := range oldToBeDeletedNodes { + if _, err := taints.CleanDeletionCandidate(node, a.ctx.ClientSet); err != nil { + klog.Warningf("failed to clean taint DeletionCandidateTaint from node %s: %v", node.Name, err) + } + if _, err := taints.CleanToBeDeleted(node, a.ctx.ClientSet, a.ctx.CordonNodeBeforeTerminate); err != nil { + klog.Warningf("failed to clean taint ToBeDeletedTaint from node %s: %v", node.Name, err) + } + } + + // do some sanity check + if len(nodesToDeleteByGroup) <= 0 { + return status.ScaleDownError, nil, errors.NewAutoscalerError( + errors.InternalError, + "cannot delete nodes because there is no node to be deleted.", + ) + } + for i, node := range nodesToDeleteByGroup { + if node == nil { + return status.ScaleDownError, nil, errors.NewAutoscalerError( + errors.InternalError, + fmt.Sprintf("cannot delete nodes because the node at index %d of to-be-deleted nodes is nil.", i), + ) + } + } + + nodesToDeleteNodeGroupViews := []*budgets.NodeGroupView{ + { + Nodes: nodesToDeleteByGroup, + }, + } + + // Taint all nodes that need drain synchronously, but don't start any drain/deletion yet. Otherwise, pods evicted from one to-be-deleted node + // could get recreated on another. + klog.V(4).Infof("Tainting to-be-deleted nodes for node group %s", nodeGroupID) + _, err := a.taintNodesSync(nodesToDeleteNodeGroupViews) + if err != nil { + return status.ScaleDownError, nil, err + } + // Clean taint from NEW to-be-deleted nodes after scale down. We don't care about the error here. + defer func() { + klog.V(4).Infof("Cleaning taint from to-be-deleted nodes for node group %s", nodeGroupID) + for _, node := range nodesToDeleteByGroup { + taints.CleanToBeDeleted(node, a.ctx.ClientSet, a.ctx.CordonNodeBeforeTerminate) + } + }() + klog.V(4).Infof("Finish tainting to-be-deleted nodes for node group %s", nodeGroupID) + + for _, drainNode := range nodesToDeleteByGroup { + if sdNode, err := a.scaleDownNodeToReport(drainNode, true); err == nil { + klog.V(0).Infof("Scale-down: removing node %s, utilization: %v, pods to reschedule: %s", drainNode.Name, sdNode.UtilInfo, joinPodNames(sdNode.EvictedPods)) + a.ctx.LogRecorder.Eventf(apiv1.EventTypeNormal, "ScaleDown", "Scale-down: removing node %s, utilization: %v, pods to reschedule: %s", drainNode.Name, sdNode.UtilInfo, joinPodNames(sdNode.EvictedPods)) + scaledDownNodes = append(scaledDownNodes, sdNode) + } else { + klog.Errorf("Scale-down: couldn't report scaled down node, err: %v", err) + } + } + + klog.V(4).Infof("Draining to-be-deleted nodes for node group %s", nodeGroupID) + // Drain to-be-deleted nodes synchronously. + finishFuncList, cpErr := a.drainNodesSyncForGridscaleProvider(nodeGroupID, nodesToDeleteByGroup) + if cpErr != nil { + return status.ScaleDownError, nil, errors.NewAutoscalerError(errors.CloudProviderError, "failed to drain nodes: %v", cpErr) + } + klog.V(4).Infof("Finish draining to-be-deleted nodes for node group %s", nodeGroupID) + + klog.V(4).Infof("Start scaling down nodes for node group %s", nodeGroupID) + // Delete the last n nodes in the cluster. + dErr := nodeGroupWithNodes.Group.DeleteNodes(nodesToDeleteByGroup) + if dErr != nil { + for _, finishFunc := range finishFuncList { + finishFunc(status.NodeDeleteErrorFailedToDelete, dErr) + } + return status.ScaleDownError, nil, errors.NewAutoscalerError(errors.CloudProviderError, "failed to delete nodes: %v", dErr) + } + for _, finishFunc := range finishFuncList { + finishFunc(status.NodeDeleteOk, nil) + } + klog.V(4).Infof(" ------ Finish scaling down nodes for node group %s", nodeGroupID) + } + klog.V(4).Infof("Finish scaling down nodes") + return status.ScaleDownNodeDeleteStarted, scaledDownNodes, nil +} + // deleteAsyncEmpty immediately starts deletions asynchronously. // scaledDownNodes return value contains all nodes for which deletion successfully started. func (a *Actuator) deleteAsyncEmpty(NodeGroupViews []*budgets.NodeGroupView, nodeDeleteDelayAfterTaint time.Duration) (reportedSDNodes []*status.ScaleDownNode) { @@ -218,6 +471,51 @@ func (a *Actuator) taintNodesSync(NodeGroupViews []*budgets.NodeGroupView) (time return nodeDeleteDelayAfterTaint, nil } +func (a *Actuator) drainNodesSyncForGridscaleProvider(nodeGroupID string, nodes []*apiv1.Node) ([]func(resultType status.NodeDeleteResultType, err error), errors.AutoscalerError) { + var finishFuncList []func(resultType status.NodeDeleteResultType, err error) + clusterSnapshot, err := a.createSnapshot(nodes) + if err != nil { + klog.Errorf("Scale-down: couldn't create delete snapshot, err: %v", err) + nodeDeleteResult := status.NodeDeleteResult{ResultType: status.NodeDeleteErrorInternal, Err: errors.NewAutoscalerError(errors.InternalError, "createSnapshot returned error %v", err)} + for _, node := range nodes { + a.nodeDeletionScheduler.AbortNodeDeletion(node, nodeGroupID, true, "failed to create delete snapshot", nodeDeleteResult) + } + return nil, errors.NewAutoscalerError(errors.InternalError, "couldn't create delete snapshot, err: %v", err) + } + for _, node := range nodes { + nodeInfo, err := clusterSnapshot.NodeInfos().Get(node.Name) + if err != nil { + klog.Errorf("Scale-down: can't retrieve node %q from snapshot, err: %v", node.Name, err) + nodeDeleteResult := status.NodeDeleteResult{ResultType: status.NodeDeleteErrorInternal, Err: errors.NewAutoscalerError(errors.InternalError, "nodeInfos.Get for %q returned error: %v", node.Name, err)} + a.nodeDeletionScheduler.AbortNodeDeletion(node, nodeGroupID, true, "failed to get node info", nodeDeleteResult) + continue + } + a.nodeDeletionTracker.StartDeletionWithDrain(nodeGroupID, node.Name) + evictionResults, err := a.nodeDeletionScheduler.evictor.DrainNode(a.ctx, nodeInfo) + klog.V(4).Infof("Scale-down: drain results for node %s: %v", node.Name, evictionResults) + if err != nil { + a.nodeDeletionTracker.EndDeletion(nodeGroupID, node.Name, status.NodeDeleteResult{ + Err: err, + ResultType: status.NodeDeleteErrorFailedToEvictPods, + PodEvictionResults: evictionResults, + }) + a.ctx.Recorder.Eventf(node, apiv1.EventTypeWarning, "ScaleDownFailed", "failed to drain the node: %v", err) + return nil, errors.NewAutoscalerError(errors.ApiCallError, "couldn't drain node %q", node) + } + nodeName := node.Name + finishFunc := func(resultType status.NodeDeleteResultType, err error) { + result := status.NodeDeleteResult{ + Err: err, + ResultType: resultType, + PodEvictionResults: evictionResults, + } + a.nodeDeletionTracker.EndDeletion(nodeGroupID, nodeName, result) + } + finishFuncList = append(finishFuncList, finishFunc) + } + return finishFuncList, nil +} + // deleteAsyncDrain asynchronously starts deletions with drain for all provided nodes. scaledDownNodes return value contains all nodes for which // deletion successfully started. func (a *Actuator) deleteAsyncDrain(NodeGroupViews []*budgets.NodeGroupView, nodeDeleteDelayAfterTaint time.Duration) (reportedSDNodes []*status.ScaleDownNode) { diff --git a/cluster-autoscaler/core/scaledown/legacy/wrapper.go b/cluster-autoscaler/core/scaledown/legacy/wrapper.go index 36ccad90ed25..2010b045cee9 100644 --- a/cluster-autoscaler/core/scaledown/legacy/wrapper.go +++ b/cluster-autoscaler/core/scaledown/legacy/wrapper.go @@ -98,6 +98,16 @@ func (p *ScaleDownWrapper) StartDeletion(empty, needDrain []*apiv1.Node) (status return p.actuator.StartDeletion(empty, needDrain) } +// StartDeletionForGridscaleProvider triggers an actual scale down logic for gridscale provider. +func (p *ScaleDownWrapper) StartDeletionForGridscaleProvider(empty, needDrain, all []*apiv1.Node) (status.ScaleDownResult, []*status.ScaleDownNode, errors.AutoscalerError) { + // Done to preserve legacy behavior, see comment on NodesToDelete. + if p.lastNodesToDeleteErr != nil || p.lastNodesToDeleteResult != status.ScaleDownNodeDeleteStarted { + return p.lastNodesToDeleteResult, []*status.ScaleDownNode{}, p.lastNodesToDeleteErr + } + + return p.actuator.StartDeletionForGridscaleProvider(empty, needDrain, all) +} + // CheckStatus snapshots current deletion status func (p *ScaleDownWrapper) CheckStatus() scaledown.ActuationStatus { return p.actuator.CheckStatus() diff --git a/cluster-autoscaler/core/scaledown/scaledown.go b/cluster-autoscaler/core/scaledown/scaledown.go index b73a4361a24f..52aa0b5b2e5a 100644 --- a/cluster-autoscaler/core/scaledown/scaledown.go +++ b/cluster-autoscaler/core/scaledown/scaledown.go @@ -57,6 +57,9 @@ type Actuator interface { // Actuator to ignore some of them e.g. if max configured level of // parallelism is reached. StartDeletion(empty, needDrain []*apiv1.Node) (status.ScaleDownResult, []*status.ScaleDownNode, errors.AutoscalerError) + // StartDeletionForGridscaleProvider is similar to StartDeletion but + // it is used for gridscale provider. + StartDeletionForGridscaleProvider(empty, needDrain, all []*apiv1.Node) (status.ScaleDownResult, []*status.ScaleDownNode, errors.AutoscalerError) // CheckStatus returns an immutable snapshot of ongoing deletions. CheckStatus() ActuationStatus // ClearResultsNotNewerThan removes information about deletions finished diff --git a/cluster-autoscaler/core/static_autoscaler.go b/cluster-autoscaler/core/static_autoscaler.go index b1625d2cb8f6..0e04898204c2 100644 --- a/cluster-autoscaler/core/static_autoscaler.go +++ b/cluster-autoscaler/core/static_autoscaler.go @@ -255,6 +255,23 @@ func (a *StaticAutoscaler) cleanUpIfRequired() { a.initialized = true } +// cleanUpTaintsForAllNodes removes ToBeDeletedByClusterAutoscaler and DeletionCandidateOfClusterAutoscaler taints added by CA. +// This function should be called when the CA is shutting down. +func (a *StaticAutoscaler) cleanUpTaintsForAllNodes() { + if readyNodes, err := a.ReadyNodeLister().List(); err != nil { + klog.Errorf("Failed to list ready nodes, not cleaning up taints: %v", err) + } else { + taints.CleanAllToBeDeleted(readyNodes, + a.AutoscalingContext.ClientSet, a.Recorder, a.CordonNodeBeforeTerminate) + if a.AutoscalingContext.AutoscalingOptions.MaxBulkSoftTaintCount == 0 { + // Clean old taints if soft taints handling is disabled + taints.CleanAllDeletionCandidates(readyNodes, + a.AutoscalingContext.ClientSet, a.Recorder) + } + } + a.initialized = true +} + func (a *StaticAutoscaler) initializeClusterSnapshot(nodes []*apiv1.Node, scheduledPods []*apiv1.Pod) caerrors.AutoscalerError { a.ClusterSnapshot.Clear() @@ -646,7 +663,7 @@ func (a *StaticAutoscaler) RunOnce(currentTime time.Time) caerrors.AutoscalerErr scaleDownStart := time.Now() metrics.UpdateLastTime(metrics.ScaleDown, scaleDownStart) empty, needDrain := a.scaleDownPlanner.NodesToDelete(currentTime) - scaleDownResult, scaledDownNodes, typedErr := a.scaleDownActuator.StartDeletion(empty, needDrain) + scaleDownResult, scaledDownNodes, typedErr := a.scaleDownActuator.StartDeletionForGridscaleProvider(empty, needDrain, scaleDownCandidates) scaleDownStatus.Result = scaleDownResult scaleDownStatus.ScaledDownNodes = scaledDownNodes metrics.UpdateDurationFromStart(metrics.ScaleDown, scaleDownStart) @@ -966,6 +983,7 @@ func (a *StaticAutoscaler) ExitCleanUp() { a.CloudProvider.Cleanup() a.clusterStateRegistry.Stop() + a.cleanUpTaintsForAllNodes() } func (a *StaticAutoscaler) obtainNodeLists() ([]*apiv1.Node, []*apiv1.Node, caerrors.AutoscalerError) { diff --git a/cluster-autoscaler/go.mod b/cluster-autoscaler/go.mod index 882bd622b01e..7376bcf92263 100644 --- a/cluster-autoscaler/go.mod +++ b/cluster-autoscaler/go.mod @@ -25,6 +25,7 @@ require ( github.com/google/go-cmp v0.6.0 github.com/google/go-querystring v1.0.0 github.com/google/uuid v1.6.0 + github.com/gridscale/gsclient-go/v3 v3.16.0 github.com/jmattheis/goverter v1.4.0 github.com/jmespath/go-jmespath v0.4.0 github.com/json-iterator/go v1.1.12 diff --git a/cluster-autoscaler/go.sum b/cluster-autoscaler/go.sum index 61a776811b97..f1fea0d1bf29 100644 --- a/cluster-autoscaler/go.sum +++ b/cluster-autoscaler/go.sum @@ -297,6 +297,8 @@ github.com/googleapis/gax-go/v2 v2.12.0/go.mod h1:y+aIqrI5eb1YGMVJfuV3185Ts/D7qK github.com/gorilla/websocket v1.4.0/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ= github.com/gorilla/websocket v1.5.0 h1:PPwGk2jz7EePpoHN/+ClbZu8SPxiqlu12wZP/3sWmnc= github.com/gorilla/websocket v1.5.0/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= +github.com/gridscale/gsclient-go/v3 v3.16.0 h1:NroV9q8QQn6KIwu4d7qXkvf3Z5YCKtR3rEcYeL9W6+4= +github.com/gridscale/gsclient-go/v3 v3.16.0/go.mod h1:B/xAA91QRonCLb+ScJix4ODj6wHWaDpPwvFHQD2KqsM= github.com/grpc-ecosystem/go-grpc-middleware v1.0.0/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs= github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 h1:+9834+KizmvFV7pXQGSXQTsaWhq2GjuNUt0aUU0YBYw= github.com/grpc-ecosystem/go-grpc-middleware v1.3.0/go.mod h1:z0ButlSOZa5vEBq9m2m2hlwIgKw+rp3sdCBRoJY+30Y= diff --git a/cluster-autoscaler/utils/taints/taints.go b/cluster-autoscaler/utils/taints/taints.go index 267a4a62872b..d4b1a5712a2b 100644 --- a/cluster-autoscaler/utils/taints/taints.go +++ b/cluster-autoscaler/utils/taints/taints.go @@ -35,6 +35,9 @@ import ( klog "k8s.io/klog/v2" ) +// gridscaleNode0SuffixName is the suffix of the gridscale node 0's name +const gridscaleNode0SuffixName = "-node-pool0-0" + const ( // ToBeDeletedTaint is a taint used to make the node unschedulable. ToBeDeletedTaint = "ToBeDeletedByClusterAutoscaler" @@ -186,6 +189,11 @@ func AddTaints(node *apiv1.Node, client kube_client.Interface, taints []apiv1.Ta var err error refresh := false for { + // skip tainting gridscale node 0 + if strings.HasSuffix(node.Name, gridscaleNode0SuffixName) { + klog.V(1).Infof("Skipping tainting of node %v, because it is a gridscale node 0", node.Name) + return nil + } if refresh { // Get the newest version of the node. freshNode, err = client.CoreV1().Nodes().Get(context.TODO(), node.Name, metav1.GetOptions{})