gridscale · fabiante · Apr 23, 2023 · Apr 23, 2023 · May 7, 2025 · Jan 3, 2024
diff --git a/.github/workflows/gsk-release.yaml b/.github/workflows/gsk-release.yaml
@@ -0,0 +1,39 @@
+name: Build and push Docker image for gridscale's Cluster Autoscaler
+
+on:
+  push:
+    tags:
+      - "*"
+
+jobs:
+  build-and-push:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v2
+
+      - name: Set up Go
+        uses: actions/setup-go@v2
+        with:
+          go-version: 1.22.4
+
+      - name: Change directory to cluster-autoscaler and build binary
+        working-directory: ./cluster-autoscaler
+        run: |
+          make build-arch-amd64
+
+      - name: Login to Docker registry
+        uses: docker/login-action@v1
+        with:
+          registry: registry.kubecuddle.io
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_PASSWORD }}
+
+      - name: Build and push Docker image
+        uses: docker/build-push-action@v2
+        with:
+          context: ./cluster-autoscaler
+          file: ./cluster-autoscaler/Dockerfile.amd64
+          push: true
+          platforms: linux/arm64
+          tags: registry.kubecuddle.io/k8s/cluster-autoscaler:${{ github.ref_name }}
diff --git a/cluster-autoscaler/FAQ.md b/cluster-autoscaler/FAQ.md
@@ -1,4 +1,6 @@
 <!--TODO: Remove "previously referred to as master" references from this doc once this terminology is fully removed from k8s-->
+**NOTE**: This is a forked version of Cluster Autoscaler made for gridscale managed Kubernetes clusters. Some of the information in this document may not apply to the original version of Cluster Autoscaler. Please use the offical product documentation provided by gridscale.
+
 # Frequently Asked Questions
 
 # Older versions

diff --git a/cluster-autoscaler/README.md b/cluster-autoscaler/README.md
@@ -3,6 +3,8 @@
 
 # Introduction
 
+**NOTE**: This is a forked version of Cluster Autoscaler made for gridscale managed Kubernetes clusters.
+
 Cluster Autoscaler is a tool that automatically adjusts the size of the Kubernetes cluster when one of the following conditions is true:
 * there are pods that failed to run in the cluster due to insufficient
   resources.

diff --git a/cluster-autoscaler/cloudprovider/builder/builder_all.go b/cluster-autoscaler/cloudprovider/builder/builder_all.go
@@ -36,6 +36,7 @@ import (
 	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/exoscale"
 	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/externalgrpc"
 	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/gce"
+	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/gridscale"
 	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/hetzner"
 	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/huaweicloud"
 	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/ionoscloud"
@@ -87,8 +88,8 @@ var AvailableCloudProviders = []string{
 	cloudprovider.VolcengineProviderName,
 }
 
-// DefaultCloudProvider is GCE.
-const DefaultCloudProvider = cloudprovider.GceProviderName
+// DefaultCloudProvider is gridscale.
+const DefaultCloudProvider = cloudprovider.GridscaleProviderName
 
 func buildCloudProvider(opts config.AutoscalingOptions,
 	do cloudprovider.NodeGroupDiscoveryOptions,
@@ -153,6 +154,8 @@ func buildCloudProvider(opts config.AutoscalingOptions,
 		return rancher.BuildRancher(opts, do, rl)
 	case cloudprovider.VolcengineProviderName:
 		return volcengine.BuildVolcengine(opts, do, rl)
+	case cloudprovider.GridscaleProviderName:
+		return gridscale.BuildGridscale(opts, do, rl)
 	}
 	return nil
 }
diff --git a/cluster-autoscaler/cloudprovider/cloud_provider.go b/cluster-autoscaler/cloudprovider/cloud_provider.go
@@ -90,6 +90,8 @@ const (
 	CivoProviderName = "civo"
 	// RancherProviderName gets the provider name of rancher
 	RancherProviderName = "rancher"
+	// GridscaleProviderName gets the provider name of gridscale
+	GridscaleProviderName = "gridscale"
 )
 
 // GpuConfig contains the label, type and the resource name for a GPU.

diff --git a/cluster-autoscaler/cloudprovider/gridscale/README.md b/cluster-autoscaler/cloudprovider/gridscale/README.md
@@ -0,0 +1,38 @@
+
+gridscale managed Kubernetes cluster-autoscaler is a tool that automatically adjusts the size of the Kubernetes cluster when the load changes. When the load is high, the cluster-autoscaler increases the size of the cluster, and when the load is low, it decreases the size of the cluster. 
+
+**Note**: The cluster-autoscaler currently supports gridscale managed Kubernetes clusters with version ~> 1.25.
+
+**Note 2**: Currently, gridscale managed k8s only supports scaling down the last nodes. Due to that limitation of the gridscale API, a forked version of the cluster-autoscaler is used. 
+
+## cluster-autoscaler deployment
+### Prerequisites
+1. A gridscale managed Kubernetes cluster.
+2. Create an gridscale API token via panel.
+3. kubectl is installed on your local machine.
+4. kubectl is configured to access your gridscale managed Kubernetes cluster.
+
+### Deploy cluster-autoscaler
+1. Download the cluster-autoscaler manifest file from [here](https://github.com/gridscale/autoscaler/blob/gsk-autoscaler-1.25.1/cluster-autoscaler/cloudprovider/gridscale/cluster-autoscaler-autodiscover.yaml) and save it as `cluster-autoscaler-autodiscover.yaml`.
+2. If you use namespace `gsk-autoscaler` in your `cluster-autoscaler-autodiscover.yaml`, create a new namespace called `gsk-autoscaler` by running the following command:
+```bash
+$ kubectl create namespace gsk-autoscaler
+```
+3. Insert your base64 encoded gridscale API user and token in the manifest file.
+4. Insert your gridscale kubernetes cluster UUID in environment variable `CLUSTER_UUID` in the manifest file.
+5. Change environment variable `CLUSTER_MAX_NODE_COUNT` in the manifest file to the maximum number of nodes you want to scale up to. (Optional) you can also change the minimum number of nodes by changing environment variable `CLUSTER_MIN_NODE_COUNT` (Default: 1) in the manifest file.
+6. To configure parameters of the cluster-autoscaler, you can add flags to the command in the manifest file. All available flags and their default values can be found [here](https://github.com/gridscale/autoscaler/blob/gsk-autoscaler-1.25.1/cluster-autoscaler/FAQ.md#what-are-the-parameters-to-ca).
+7. Deploy the cluster-autoscaler by running the following command:
+```bash
+$ kubectl apply -f cluster-autoscaler-autodiscover.yaml
+```
+8. You can check the autoscaling activity by reading the configmap `cluster-autoscaler-status` in namespace `kube-system`, i.e.:
+```bash
+$ kubectl get configmap cluster-autoscaler-status -n gsk-autoscaler -o yaml
+```
+
+**Note**: the cluster-autoscaler will be deployed in namespace called `gsk-autoscaler`.
+
+## FAQ
+### After upgrading my gridscle managed Kubernetes cluster, the cluster-autoscaler is not working anymore. What should I do?
+Please make sure that the minor version of the cluster-autoscaler matches the minor version of your gridscale managed Kubernetes cluster. If not, please redeploy the cluster-autoscaler with the correct version.
diff --git a/cluster-autoscaler/cloudprovider/gridscale/cluster-autoscaler-autodiscover.yaml b/cluster-autoscaler/cloudprovider/gridscale/cluster-autoscaler-autodiscover.yaml
@@ -0,0 +1,199 @@
+apiVersion: v1
+kind: Secret
+metadata:
+  name: cloud-config
+  namespace: gsk-autoscaler
+data:
+  # insert your base64 encoded gridscale access id and key here, ensure there's no trailing newline:
+  # such as:  echo -n "token" | base64
+  gridscale-uuid: "<BASE64_API_UUID>"
+  gridscale-token: "<BASE64_API_TOKEN>"
+---
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  labels:
+    k8s-addon: cluster-autoscaler.addons.k8s.io
+    k8s-app: cluster-autoscaler
+  name: cluster-autoscaler
+  namespace: gsk-autoscaler
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  name: cluster-autoscaler
+  labels:
+    k8s-addon: cluster-autoscaler.addons.k8s.io
+    k8s-app: cluster-autoscaler
+rules:
+  - apiGroups: [""]
+    resources: ["events", "endpoints"]
+    verbs: ["create", "patch"]
+  - apiGroups: [""]
+    resources: ["pods/eviction"]
+    verbs: ["create"]
+  - apiGroups: [""]
+    resources: ["pods/status"]
+    verbs: ["update"]
+  - apiGroups: [""]
+    resources: ["endpoints"]
+    resourceNames: ["cluster-autoscaler"]
+    verbs: ["get", "update"]
+  - apiGroups: [""]
+    resources: ["nodes"]
+    verbs: ["watch", "list", "get", "update"]
+  - apiGroups: [""]
+    resources:
+      - "namespaces"
+      - "pods"
+      - "services"
+      - "replicationcontrollers"
+      - "persistentvolumeclaims"
+      - "persistentvolumes"
+    verbs: ["watch", "list", "get"]
+  - apiGroups: ["extensions"]
+    resources: ["replicasets", "daemonsets"]
+    verbs: ["watch", "list", "get"]
+  - apiGroups: ["policy"]
+    resources: ["poddisruptionbudgets"]
+    verbs: ["watch", "list"]
+  - apiGroups: ["apps"]
+    resources: ["statefulsets", "replicasets", "daemonsets"]
+    verbs: ["watch", "list", "get"]
+  - apiGroups: ["storage.k8s.io"]
+    resources:
+      ["storageclasses", "csinodes", "csidrivers", "csistoragecapacities"]
+    verbs: ["watch", "list", "get"]
+  - apiGroups: ["batch", "extensions"]
+    resources: ["jobs"]
+    verbs: ["get", "list", "watch", "patch"]
+  - apiGroups: ["coordination.k8s.io"]
+    resources: ["leases"]
+    verbs: ["create"]
+  - apiGroups: ["coordination.k8s.io"]
+    resourceNames: ["cluster-autoscaler"]
+    resources: ["leases"]
+    verbs: ["get", "update"]
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: Role
+metadata:
+  name: cluster-autoscaler
+  namespace: gsk-autoscaler
+  labels:
+    k8s-addon: cluster-autoscaler.addons.k8s.io
+    k8s-app: cluster-autoscaler
+rules:
+  - apiGroups: [""]
+    resources: ["configmaps"]
+    verbs: ["create", "list", "watch"]
+  - apiGroups: [""]
+    resources: ["configmaps"]
+    resourceNames:
+      ["cluster-autoscaler-status", "cluster-autoscaler-priority-expander"]
+    verbs: ["delete", "get", "update", "watch"]
+
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+  name: cluster-autoscaler
+  labels:
+    k8s-addon: cluster-autoscaler.addons.k8s.io
+    k8s-app: cluster-autoscaler
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: ClusterRole
+  name: cluster-autoscaler
+subjects:
+  - kind: ServiceAccount
+    name: cluster-autoscaler
+    namespace: gsk-autoscaler
+
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: RoleBinding
+metadata:
+  name: cluster-autoscaler
+  namespace: gsk-autoscaler
+  labels:
+    k8s-addon: cluster-autoscaler.addons.k8s.io
+    k8s-app: cluster-autoscaler
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: Role
+  name: cluster-autoscaler
+subjects:
+  - kind: ServiceAccount
+    name: cluster-autoscaler
+    namespace: gsk-autoscaler
+
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: cluster-autoscaler
+  namespace: gsk-autoscaler
+  labels:
+    app: cluster-autoscaler
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: cluster-autoscaler
+  template:
+    metadata:
+      labels:
+        app: cluster-autoscaler
+      annotations:
+        prometheus.io/scrape: "true"
+        prometheus.io/port: "8085"
+    spec:
+      priorityClassName: system-cluster-critical
+      securityContext:
+        runAsNonRoot: true
+        runAsUser: 65534
+        fsGroup: 65534
+        seccompProfile:
+          type: RuntimeDefault
+      serviceAccountName: cluster-autoscaler
+      containers:
+        - image: registry.kubecuddle.io/k8s/cluster-autoscaler:gsk-v1.30.2
+          name: cluster-autoscaler
+          resources:
+            limits:
+              cpu: 100m
+              memory: 600Mi
+            requests:
+              cpu: 100m
+              memory: 600Mi
+          command:
+            - ./cluster-autoscaler
+            - --v=4
+            - --stderrthreshold=info
+            - --namespace=gsk-autoscaler
+            - --daemonset-eviction-for-occupied-nodes=false
+          env:
+            - name: API_UUID
+              valueFrom:
+                secretKeyRef:
+                  name: cloud-config
+                  key: gridscale-uuid
+            - name: API_TOKEN
+              valueFrom:
+                secretKeyRef:
+                  name: cloud-config
+                  key: gridscale-token
+            - name: CLUSTER_UUID
+              value: "00000000-0000-0000-0000-000000000000"
+            - name: CLUSTER_MIN_NODE_COUNT
+              value: "1"
+            - name: CLUSTER_MAX_NODE_COUNT
+              value: "10"
+          imagePullPolicy: "Always"
+          securityContext:
+            allowPrivilegeEscalation: false
+            capabilities:
+              drop:
+                - ALL
+            readOnlyRootFilesystem: true