Skip to content
This repository was archived by the owner on Sep 30, 2020. It is now read-only.

Commit 0504707

Browse files
authored
Merge pull request #629 from mumoshu/re-cluster-autoscaler
Re: cluster-autoscaler support
2 parents 910fbd0 + 166c34b commit 0504707

File tree

16 files changed

+438
-154
lines changed

16 files changed

+438
-154
lines changed

Documentation/kubernetes-on-aws-add-ons.md

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,42 @@
22

33
kube-aws has built-in supports for several Kubernetes add-ons known to require additional configurations beforehand.
44

5+
## cluster-autoscaler
6+
7+
[cluster-autoscaler](https://github.com/kubernetes/autoscaler/tree/master/cluster-autoscaler) is an add-on which automatically
8+
scales in/out your k8s cluster by removing/adding worker nodes according to resource utilization per node.
9+
10+
To enable cluster-autoscaler, add the below settings to your cluster.yaml:
11+
12+
```yaml
13+
addons:
14+
clusterAutoscaler:
15+
enabled: true
16+
worker:
17+
nodePools:
18+
- name: scaled
19+
autoScalingGroup:
20+
minSize: 1
21+
maxSize: 10
22+
autoscaling:
23+
clusterAutoscaler:
24+
enabled: true
25+
- name: notScaled
26+
autoScalingGroup:
27+
minSize: 2
28+
maxSize: 4
29+
```
30+
31+
The above example configuration would:
32+
33+
* By `addons.clusterAutoscaler.enabled`:
34+
* Provide controller nodes appropriate IAM permissions to call necessary AWS APIs from CA
35+
* Create a k8s deployment to run CA on one of controller nodes, so that CA can utilize the IAM permissions
36+
* By `worker.nodePools[0].autoscaling.clusterAutoscaler.enabled`:
37+
* If there are unschedulable, pending pod(s) that is requesting more capacity, CA will add more nodes to the `scaled` node pool, up until the max size `10`
38+
* If there are no unschdulable, pending pod(s) that is waiting for more capacity and one or more nodes are in low utlization, CA will remove node(s), down until the min size `1`
39+
* The second node pool `notScaled` is scaled manually by YOU, because you had not the autoscaling on it(=missing `autoscaling.clusterAutoscaler.enabled`)
40+
541
## kube2iam
642

743
[kube2iam](https://github.com/jtblin/kube2iam) is an add-on which provides IAM credentials for target IAM roles to pods running inside a Kubernetes cluster based on annotations.

core/controlplane/config/config.go

Lines changed: 78 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ func NewDefaultCluster() *Cluster {
5858
AwsNodeLabels: AwsNodeLabels{
5959
Enabled: false,
6060
},
61-
ClusterAutoscalerSupport: ClusterAutoscalerSupport{
61+
ClusterAutoscalerSupport: model.ClusterAutoscalerSupport{
6262
Enabled: false,
6363
},
6464
TLSBootstrap: TLSBootstrap{
@@ -81,7 +81,7 @@ func NewDefaultCluster() *Cluster {
8181
NodeDrainer: NodeDrainer{
8282
Enabled: false,
8383
},
84-
NodeLabels: NodeLabels{},
84+
NodeLabels: model.NodeLabels{},
8585
Plugins: Plugins{
8686
Rbac: Rbac{
8787
Enabled: false,
@@ -104,34 +104,35 @@ func NewDefaultCluster() *Cluster {
104104

105105
return &Cluster{
106106
DeploymentSettings: DeploymentSettings{
107-
ClusterName: "kubernetes",
108-
VPCCIDR: "10.0.0.0/16",
109-
ReleaseChannel: "stable",
110-
K8sVer: k8sVer,
111-
ContainerRuntime: "docker",
112-
Subnets: []model.Subnet{},
113-
EIPAllocationIDs: []string{},
114-
MapPublicIPs: true,
115-
Experimental: experimental,
116-
ManageCertificates: true,
117-
HyperkubeImage: model.Image{Repo: "quay.io/coreos/hyperkube", Tag: k8sVer, RktPullDocker: false},
118-
AWSCliImage: model.Image{Repo: "quay.io/coreos/awscli", Tag: "master", RktPullDocker: false},
119-
CalicoNodeImage: model.Image{Repo: "quay.io/calico/node", Tag: "v1.2.1", RktPullDocker: false},
120-
CalicoCniImage: model.Image{Repo: "quay.io/calico/cni", Tag: "v1.8.3", RktPullDocker: false},
121-
CalicoPolicyControllerImage: model.Image{Repo: "quay.io/calico/kube-policy-controller", Tag: "v0.6.0", RktPullDocker: false},
122-
CalicoCtlImage: model.Image{Repo: "quay.io/calico/ctl", Tag: "v1.2.1", RktPullDocker: false},
123-
ClusterAutoscalerImage: model.Image{Repo: "gcr.io/google_containers/cluster-proportional-autoscaler-amd64", Tag: "1.1.1", RktPullDocker: false},
124-
KubeDnsImage: model.Image{Repo: "gcr.io/google_containers/k8s-dns-kube-dns-amd64", Tag: "1.14.2", RktPullDocker: false},
125-
KubeDnsMasqImage: model.Image{Repo: "gcr.io/google_containers/k8s-dns-dnsmasq-nanny-amd64", Tag: "1.14.2", RktPullDocker: false},
126-
KubeReschedulerImage: model.Image{Repo: "gcr.io/google-containers/rescheduler", Tag: "v0.3.0", RktPullDocker: false},
127-
DnsMasqMetricsImage: model.Image{Repo: "gcr.io/google_containers/k8s-dns-sidecar-amd64", Tag: "1.14.2", RktPullDocker: false},
128-
ExecHealthzImage: model.Image{Repo: "gcr.io/google_containers/exechealthz-amd64", Tag: "1.2", RktPullDocker: false},
129-
HeapsterImage: model.Image{Repo: "gcr.io/google_containers/heapster", Tag: "v1.3.0", RktPullDocker: false},
130-
AddonResizerImage: model.Image{Repo: "gcr.io/google_containers/addon-resizer", Tag: "1.7", RktPullDocker: false},
131-
KubeDashboardImage: model.Image{Repo: "gcr.io/google_containers/kubernetes-dashboard-amd64", Tag: "v1.6.1", RktPullDocker: false},
132-
PauseImage: model.Image{Repo: "gcr.io/google_containers/pause-amd64", Tag: "3.0", RktPullDocker: false},
133-
FlannelImage: model.Image{Repo: "quay.io/coreos/flannel", Tag: "v0.7.1", RktPullDocker: false},
134-
DexImage: model.Image{Repo: "quay.io/coreos/dex", Tag: "v2.4.1", RktPullDocker: false},
107+
ClusterName: "kubernetes",
108+
VPCCIDR: "10.0.0.0/16",
109+
ReleaseChannel: "stable",
110+
K8sVer: k8sVer,
111+
ContainerRuntime: "docker",
112+
Subnets: []model.Subnet{},
113+
EIPAllocationIDs: []string{},
114+
MapPublicIPs: true,
115+
Experimental: experimental,
116+
ManageCertificates: true,
117+
HyperkubeImage: model.Image{Repo: "quay.io/coreos/hyperkube", Tag: k8sVer, RktPullDocker: false},
118+
AWSCliImage: model.Image{Repo: "quay.io/coreos/awscli", Tag: "master", RktPullDocker: false},
119+
CalicoNodeImage: model.Image{Repo: "quay.io/calico/node", Tag: "v1.2.1", RktPullDocker: false},
120+
CalicoCniImage: model.Image{Repo: "quay.io/calico/cni", Tag: "v1.8.3", RktPullDocker: false},
121+
CalicoPolicyControllerImage: model.Image{Repo: "quay.io/calico/kube-policy-controller", Tag: "v0.6.0", RktPullDocker: false},
122+
CalicoCtlImage: model.Image{Repo: "quay.io/calico/ctl", Tag: "v1.2.1", RktPullDocker: false},
123+
ClusterAutoscalerImage: model.Image{Repo: "quay.io/kube-aws/cluster-autoscaler", Tag: "b432362a70f925d94240fe0bb772bd05fb8ad8d6", RktPullDocker: false},
124+
ClusterProportionalAutoscalerImage: model.Image{Repo: "gcr.io/google_containers/cluster-proportional-autoscaler-amd64", Tag: "1.1.1", RktPullDocker: false},
125+
KubeDnsImage: model.Image{Repo: "gcr.io/google_containers/k8s-dns-kube-dns-amd64", Tag: "1.14.2", RktPullDocker: false},
126+
KubeDnsMasqImage: model.Image{Repo: "gcr.io/google_containers/k8s-dns-dnsmasq-nanny-amd64", Tag: "1.14.2", RktPullDocker: false},
127+
KubeReschedulerImage: model.Image{Repo: "gcr.io/google-containers/rescheduler", Tag: "v0.3.0", RktPullDocker: false},
128+
DnsMasqMetricsImage: model.Image{Repo: "gcr.io/google_containers/k8s-dns-sidecar-amd64", Tag: "1.14.2", RktPullDocker: false},
129+
ExecHealthzImage: model.Image{Repo: "gcr.io/google_containers/exechealthz-amd64", Tag: "1.2", RktPullDocker: false},
130+
HeapsterImage: model.Image{Repo: "gcr.io/google_containers/heapster", Tag: "v1.3.0", RktPullDocker: false},
131+
AddonResizerImage: model.Image{Repo: "gcr.io/google_containers/addon-resizer", Tag: "1.7", RktPullDocker: false},
132+
KubeDashboardImage: model.Image{Repo: "gcr.io/google_containers/kubernetes-dashboard-amd64", Tag: "v1.6.1", RktPullDocker: false},
133+
PauseImage: model.Image{Repo: "gcr.io/google_containers/pause-amd64", Tag: "3.0", RktPullDocker: false},
134+
FlannelImage: model.Image{Repo: "quay.io/coreos/flannel", Tag: "v0.7.1", RktPullDocker: false},
135+
DexImage: model.Image{Repo: "quay.io/coreos/dex", Tag: "v2.4.1", RktPullDocker: false},
135136
},
136137
KubeClusterSettings: KubeClusterSettings{
137138
DNSServiceIP: "10.3.0.10",
@@ -494,24 +495,25 @@ type DeploymentSettings struct {
494495
WaitSignal WaitSignal `yaml:"waitSignal"`
495496

496497
// Images repository
497-
HyperkubeImage model.Image `yaml:"hyperkubeImage,omitempty"`
498-
AWSCliImage model.Image `yaml:"awsCliImage,omitempty"`
499-
CalicoNodeImage model.Image `yaml:"calicoNodeImage,omitempty"`
500-
CalicoCniImage model.Image `yaml:"calicoCniImage,omitempty"`
501-
CalicoCtlImage model.Image `yaml:"calicoCtlImage,omitempty"`
502-
CalicoPolicyControllerImage model.Image `yaml:"calicoPolicyControllerImage,omitempty"`
503-
ClusterAutoscalerImage model.Image `yaml:"clusterAutoscalerImage,omitempty"`
504-
KubeDnsImage model.Image `yaml:"kubeDnsImage,omitempty"`
505-
KubeDnsMasqImage model.Image `yaml:"kubeDnsMasqImage,omitempty"`
506-
KubeReschedulerImage model.Image `yaml:"kubeReschedulerImage,omitempty"`
507-
DnsMasqMetricsImage model.Image `yaml:"dnsMasqMetricsImage,omitempty"`
508-
ExecHealthzImage model.Image `yaml:"execHealthzImage,omitempty"`
509-
HeapsterImage model.Image `yaml:"heapsterImage,omitempty"`
510-
AddonResizerImage model.Image `yaml:"addonResizerImage,omitempty"`
511-
KubeDashboardImage model.Image `yaml:"kubeDashboardImage,omitempty"`
512-
PauseImage model.Image `yaml:"pauseImage,omitempty"`
513-
FlannelImage model.Image `yaml:"flannelImage,omitempty"`
514-
DexImage model.Image `yaml:"dexImage,omitempty"`
498+
HyperkubeImage model.Image `yaml:"hyperkubeImage,omitempty"`
499+
AWSCliImage model.Image `yaml:"awsCliImage,omitempty"`
500+
CalicoNodeImage model.Image `yaml:"calicoNodeImage,omitempty"`
501+
CalicoCniImage model.Image `yaml:"calicoCniImage,omitempty"`
502+
CalicoCtlImage model.Image `yaml:"calicoCtlImage,omitempty"`
503+
CalicoPolicyControllerImage model.Image `yaml:"calicoPolicyControllerImage,omitempty"`
504+
ClusterAutoscalerImage model.Image `yaml:"clusterAutoscalerImage,omitempty"`
505+
ClusterProportionalAutoscalerImage model.Image `yaml:"clusterProportionalAutoscalerImage,omitempty"`
506+
KubeDnsImage model.Image `yaml:"kubeDnsImage,omitempty"`
507+
KubeDnsMasqImage model.Image `yaml:"kubeDnsMasqImage,omitempty"`
508+
KubeReschedulerImage model.Image `yaml:"kubeReschedulerImage,omitempty"`
509+
DnsMasqMetricsImage model.Image `yaml:"dnsMasqMetricsImage,omitempty"`
510+
ExecHealthzImage model.Image `yaml:"execHealthzImage,omitempty"`
511+
HeapsterImage model.Image `yaml:"heapsterImage,omitempty"`
512+
AddonResizerImage model.Image `yaml:"addonResizerImage,omitempty"`
513+
KubeDashboardImage model.Image `yaml:"kubeDashboardImage,omitempty"`
514+
PauseImage model.Image `yaml:"pauseImage,omitempty"`
515+
FlannelImage model.Image `yaml:"flannelImage,omitempty"`
516+
DexImage model.Image `yaml:"dexImage,omitempty"`
515517
}
516518

517519
// Part of configuration which is specific to worker nodes
@@ -673,24 +675,26 @@ type Cluster struct {
673675
}
674676

675677
type Experimental struct {
676-
Admission Admission `yaml:"admission"`
677-
AuditLog AuditLog `yaml:"auditLog"`
678-
Authentication Authentication `yaml:"authentication"`
679-
AwsEnvironment AwsEnvironment `yaml:"awsEnvironment"`
680-
AwsNodeLabels AwsNodeLabels `yaml:"awsNodeLabels"`
681-
ClusterAutoscalerSupport ClusterAutoscalerSupport `yaml:"clusterAutoscalerSupport"`
682-
TLSBootstrap TLSBootstrap `yaml:"tlsBootstrap"`
683-
EphemeralImageStorage EphemeralImageStorage `yaml:"ephemeralImageStorage"`
684-
Kube2IamSupport Kube2IamSupport `yaml:"kube2IamSupport,omitempty"`
685-
LoadBalancer LoadBalancer `yaml:"loadBalancer"`
686-
TargetGroup TargetGroup `yaml:"targetGroup"`
687-
NodeDrainer NodeDrainer `yaml:"nodeDrainer"`
688-
NodeLabels NodeLabels `yaml:"nodeLabels"`
689-
Plugins Plugins `yaml:"plugins"`
690-
Dex model.Dex `yaml:"dex"`
691-
DisableSecurityGroupIngress bool `yaml:"disableSecurityGroupIngress"`
692-
NodeMonitorGracePeriod string `yaml:"nodeMonitorGracePeriod"`
693-
Taints model.Taints `yaml:"taints"`
678+
Admission Admission `yaml:"admission"`
679+
AuditLog AuditLog `yaml:"auditLog"`
680+
Authentication Authentication `yaml:"authentication"`
681+
AwsEnvironment AwsEnvironment `yaml:"awsEnvironment"`
682+
AwsNodeLabels AwsNodeLabels `yaml:"awsNodeLabels"`
683+
// When cluster-autoscaler support is enabled, not only controller nodes but this node pool is also given
684+
// a node label and IAM permissions to run cluster-autoscaler
685+
ClusterAutoscalerSupport model.ClusterAutoscalerSupport `yaml:"clusterAutoscalerSupport"`
686+
TLSBootstrap TLSBootstrap `yaml:"tlsBootstrap"`
687+
EphemeralImageStorage EphemeralImageStorage `yaml:"ephemeralImageStorage"`
688+
Kube2IamSupport Kube2IamSupport `yaml:"kube2IamSupport,omitempty"`
689+
LoadBalancer LoadBalancer `yaml:"loadBalancer"`
690+
TargetGroup TargetGroup `yaml:"targetGroup"`
691+
NodeDrainer NodeDrainer `yaml:"nodeDrainer"`
692+
NodeLabels model.NodeLabels `yaml:"nodeLabels"`
693+
Plugins Plugins `yaml:"plugins"`
694+
Dex model.Dex `yaml:"dex"`
695+
DisableSecurityGroupIngress bool `yaml:"disableSecurityGroupIngress"`
696+
NodeMonitorGracePeriod string `yaml:"nodeMonitorGracePeriod"`
697+
Taints model.Taints `yaml:"taints"`
694698
model.UnknownKeys `yaml:",inline"`
695699
}
696700

@@ -727,10 +731,6 @@ type AwsNodeLabels struct {
727731
Enabled bool `yaml:"enabled"`
728732
}
729733

730-
type ClusterAutoscalerSupport struct {
731-
Enabled bool `yaml:"enabled"`
732-
}
733-
734734
type TLSBootstrap struct {
735735
Enabled bool `yaml:"enabled"`
736736
}
@@ -754,27 +754,6 @@ type NodeDrainer struct {
754754
Enabled bool `yaml:"enabled"`
755755
}
756756

757-
type NodeLabels map[string]string
758-
759-
func (l NodeLabels) Enabled() bool {
760-
return len(l) > 0
761-
}
762-
763-
// Returns key=value pairs separated by ',' to be passed to kubelet's `--node-labels` flag
764-
func (l NodeLabels) String() string {
765-
labels := []string{}
766-
keys := []string{}
767-
for k, _ := range l {
768-
keys = append(keys, k)
769-
}
770-
sort.Strings(keys)
771-
for _, k := range keys {
772-
v := l[k]
773-
labels = append(labels, fmt.Sprintf("%s=%s", k, v))
774-
}
775-
return strings.Join(labels, ",")
776-
}
777-
778757
type LoadBalancer struct {
779758
Enabled bool `yaml:"enabled"`
780759
Names []string `yaml:"names"`
@@ -1105,6 +1084,14 @@ func (c Cluster) NestedStackName() string {
11051084
return strings.Title(strings.Replace(c.StackName(), "-", "", -1))
11061085
}
11071086

1087+
func (c Cluster) NodeLabels() model.NodeLabels {
1088+
labels := c.Experimental.NodeLabels
1089+
if c.Addons.ClusterAutoscaler.Enabled {
1090+
labels["kube-aws.coreos.com/cluster-autoscaler-supported"] = "true"
1091+
}
1092+
return labels
1093+
}
1094+
11081095
// Etcdadm returns the content of the etcdadm script to be embedded into cloud-config-etcd
11091096
func (c *Config) Etcdadm() (string, error) {
11101097
return gzipcompressor.CompressData(Etcdadm)

core/controlplane/config/templates/cloud-config-controller

Lines changed: 61 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -197,7 +197,7 @@ coreos:
197197
--container-runtime={{.ContainerRuntime}} \
198198
--rkt-path=/usr/bin/rkt \
199199
--rkt-stage1-image=coreos.com/rkt/stage1-coreos \
200-
--node-labels node-role.kubernetes.io/master{{if .Experimental.NodeLabels.Enabled}},{{.Experimental.NodeLabels.String}} \
200+
--node-labels node-role.kubernetes.io/master{{if .NodeLabels.Enabled}},{{.NodeLabels.String}} \
201201
{{end}} \
202202
--register-with-taints=node.alpha.kubernetes.io/role=master:NoSchedule \
203203
--allow-privileged=true \
@@ -578,7 +578,7 @@ write_files:
578578
kubectl apply -f "${mfdir}/kube-dns-sa.yaml"
579579

580580
# Deployments
581-
for manifest in {kube-dns-de,kube-dns-autoscaler-de,heapster-de{{ if .KubeResourcesAutosave.Enabled }},kube-resources-autosave{{ end }}}.yaml; do
581+
for manifest in {kube-dns-de,kube-dns-autoscaler-de,cluster-autoscaler-de,heapster-de{{ if .KubeResourcesAutosave.Enabled }},kube-resources-autosave{{ end }}}.yaml; do
582582
kubectl apply -f "${mfdir}/$manifest"
583583
done
584584

@@ -1530,7 +1530,7 @@ write_files:
15301530
operator: "Exists"
15311531
containers:
15321532
- name: autoscaler
1533-
image: {{ .ClusterAutoscalerImage.RepoWithTag }}
1533+
image: {{ .ClusterProportionalAutoscalerImage.RepoWithTag }}
15341534
resources:
15351535
requests:
15361536
cpu: "20m"
@@ -1795,6 +1795,64 @@ write_files:
17951795
- --poll-period=300000
17961796
- --estimator=exponential
17971797

1798+
{{if .Addons.ClusterAutoscaler.Enabled}}
1799+
- path: /srv/kubernetes/manifests/cluster-autoscaler-de.yaml
1800+
content: |
1801+
apiVersion: extensions/v1beta1
1802+
kind: Deployment
1803+
metadata:
1804+
name: cluster-autoscaler
1805+
namespace: kube-system
1806+
labels:
1807+
app: cluster-autoscaler
1808+
spec:
1809+
replicas: 1
1810+
selector:
1811+
matchLabels:
1812+
app: cluster-autoscaler
1813+
template:
1814+
metadata:
1815+
labels:
1816+
app: cluster-autoscaler
1817+
spec:
1818+
affinity:
1819+
nodeAffinity:
1820+
requiredDuringSchedulingIgnoredDuringExecution:
1821+
nodeSelectorTerms:
1822+
- matchExpressions:
1823+
- key: "kube-aws.coreos.com/cluster-autoscaler-supported"
1824+
operator: "In"
1825+
values:
1826+
- "true"
1827+
tolerations:
1828+
- key: "node.alpha.kubernetes.io/role"
1829+
operator: "Equal"
1830+
value: "master"
1831+
effect: "NoSchedule"
1832+
containers:
1833+
- image: {{ .ClusterAutoscalerImage.RepoWithTag }}
1834+
name: cluster-autoscaler
1835+
resources:
1836+
limits:
1837+
cpu: 100m
1838+
memory: 300Mi
1839+
requests:
1840+
cpu: 100m
1841+
memory: 300Mi
1842+
command:
1843+
- ./cluster-autoscaler
1844+
- --v=4
1845+
- --stderrthreshold=info
1846+
- --cloud-provider=aws
1847+
- --skip-nodes-with-local-storage=false
1848+
- --expander=least-waste
1849+
- --node-group-auto-discovery=asg:tag=k8s.io/cluster-autoscaler/enabled
1850+
env:
1851+
- name: AWS_REGION
1852+
value: {{.Region}}
1853+
imagePullPolicy: "Always"
1854+
{{end}}
1855+
17981856
- path: /srv/kubernetes/manifests/heapster-svc.yaml
17991857
content: |
18001858
kind: Service

core/controlplane/config/templates/cloud-config-worker

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ coreos:
192192
--container-runtime={{.ContainerRuntime}} \
193193
--rkt-path=/usr/bin/rkt \
194194
--rkt-stage1-image=coreos.com/rkt/stage1-coreos \
195-
{{if .Experimental.NodeLabels.Enabled}}--node-labels {{.Experimental.NodeLabels.String}} \
195+
{{if .Experimental.NodeLabels.Enabled}}--node-labels {{.NodeLabels.String}} \
196196
{{end}}--register-node=true \
197197
{{if .Experimental.Taints}}--register-with-taints={{.Experimental.Taints.String}}\
198198
{{end}}--allow-privileged=true \

0 commit comments

Comments
 (0)