Skip to content

Commit 6309620

Browse files
committed
feat(github-arc): optimize Karpenter config and GitHub ARC runner deployment
- Update Karpenter node access to use EC2_LINUX type for proper system:nodes group membership and kubelet-serving CSR auto-approval - Reduce Karpenter log level from debug to info for production readiness - Decrease EC2 node volume size from 100Gi to 50Gi to optimize storage costs - Refine instance type selection to focus on c5/c5a/c5n/c6a/c6i/c7a/m5/m5a/m6a/m6i 2xlarge and 4xlarge sizes for consistent performance - Change Karpenter disruption policy from WhenEmptyOrUnderutilized to WhenEmpty and increase consolidation interval from 15s to 30s - Pin GitHub ARC controller and runner scale set Helm charts to version 0.13.1 - Switch runner container mode to kubernetes-novolume for simplified pod management - Remove Docker-in-Docker sidecar and init container, reducing resource overhead and complexity - Set minRunners to 0 for cost optimization with on-demand scaling - Reduce runner resource requests/limits from 3 CPU/6Gi memory to 1-2 CPU/2-3Gi memory - Add ACTIONS_RUNNER_REQUIRE_JOB_CONTAINER=false to allow containerless job execution
1 parent 1c660d3 commit 6309620

4 files changed

Lines changed: 28 additions & 66 deletions

File tree

KaaS/Elastic Kubernetes Service(EKS)/Terraform/github-arc/eks.tf

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -27,19 +27,11 @@ module "eks" {
2727
}
2828
}
2929
}
30-
# Karpenter access
30+
# Karpenter node access - must be EC2_LINUX so nodes join the system:nodes group
31+
# and kubelet-serving CSRs get auto-approved (required for logs/exec)
3132
karpenter = {
32-
kubernetes_groups = []
33-
principal_arn = aws_iam_role.karpenter_node_role.arn
34-
35-
policy_associations = {
36-
karpenter = {
37-
policy_arn = "arn:${data.aws_partition.current.partition}:eks::aws:cluster-access-policy/AmazonEKSClusterAdminPolicy"
38-
access_scope = {
39-
type = "cluster"
40-
}
41-
}
42-
}
33+
type = "EC2_LINUX"
34+
principal_arn = aws_iam_role.karpenter_node_role.arn
4335
}
4436
}
4537

KaaS/Elastic Kubernetes Service(EKS)/Terraform/github-arc/karpenter.tf

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ tolerations:
4444
value: "true"
4545
effect: "NoSchedule"
4646
47-
logLevel: debug
47+
logLevel: info
4848
EOT
4949
]
5050

@@ -82,7 +82,7 @@ resource "kubectl_manifest" "karpenter_ec2nodeclass_runner" {
8282
{
8383
deviceName = "/dev/xvda"
8484
ebs = {
85-
volumeSize = "100Gi"
85+
volumeSize = "50Gi"
8686
volumeType = "gp3"
8787
deleteOnTermination = true
8888
encrypted = true
@@ -128,10 +128,14 @@ spec:
128128
- key: node.kubernetes.io/instance-type
129129
operator: In
130130
values: [
131-
"c4.large", "c4.xlarge", "c4.2xlarge", "c4.4xlarge",
132-
"c5.large", "c5.xlarge", "c5.2xlarge", "c5.4xlarge",
133-
"c5a.large", "c5a.xlarge", "c5a.2xlarge", "c5a.4xlarge",
134-
"c5n.large", "c5n.xlarge", "c5n.2xlarge", "c5n.4xlarge"
131+
"c5.2xlarge", "c5.4xlarge",
132+
"c5a.2xlarge", "c5a.4xlarge",
133+
"c5n.2xlarge", "c5n.4xlarge",
134+
"c6a.2xlarge", "c6a.4xlarge",
135+
"c6i.2xlarge", "c6i.4xlarge",
136+
"c7a.2xlarge", "c7a.4xlarge",
137+
"m5.2xlarge", "m5a.2xlarge",
138+
"m6a.2xlarge", "m6i.2xlarge"
135139
]
136140
nodeClassRef:
137141
group: karpenter.k8s.aws
@@ -141,8 +145,8 @@ spec:
141145
limits:
142146
cpu: 1000
143147
disruption:
144-
consolidationPolicy: WhenEmptyOrUnderutilized
145-
consolidateAfter: 15s
148+
consolidationPolicy: WhenEmpty
149+
consolidateAfter: 30s
146150
EOT
147151

148152
depends_on = [

KaaS/Elastic Kubernetes Service(EKS)/Terraform/github-arc/runner-scale-set-controller.tf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ resource "helm_release" "arc" {
22
name = "arc"
33
namespace = "arc-systems"
44
create_namespace = true
5-
5+
version = "0.13.1"
66
repository = "oci://ghcr.io/actions/actions-runner-controller-charts"
77
chart = "gha-runner-scale-set-controller"
88

KaaS/Elastic Kubernetes Service(EKS)/Terraform/github-arc/runner-scale-set.tf

Lines changed: 11 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ resource "kubernetes_secret_v1" "pre_defined" {
2323
resource "helm_release" "arc_runner_set" {
2424
name = "arc-runner-set-eks"
2525
namespace = kubernetes_namespace.arc_runners.metadata[0].name
26-
26+
version = "0.13.1"
2727
repository = "oci://ghcr.io/actions/actions-runner-controller-charts"
2828
chart = "gha-runner-scale-set"
2929
values = [
@@ -34,7 +34,10 @@ githubConfigSecret: "${kubernetes_secret_v1.pre_defined.metadata[0].name}"
3434
3535
maxRunners: 20
3636
37-
minRunners: 1
37+
minRunners: 0
38+
39+
containerMode:
40+
type: "kubernetes-novolume"
3841
3942
listenerTemplate:
4043
spec:
@@ -52,57 +55,20 @@ listenerTemplate:
5255
5356
template:
5457
spec:
55-
initContainers:
56-
- name: init-dind-externals
57-
image: ghcr.io/actions/actions-runner:latest
58-
command: ["cp", "-r", "/home/runner/externals/.", "/home/runner/tmpDir/"]
59-
volumeMounts:
60-
- name: dind-externals
61-
mountPath: /home/runner/tmpDir
6258
containers:
6359
- name: runner
6460
image: ghcr.io/actions/actions-runner:latest
6561
command: ["/home/runner/run.sh"]
6662
env:
67-
- name: DOCKER_HOST
68-
value: unix:///var/run/docker.sock
69-
volumeMounts:
70-
- name: work
71-
mountPath: /home/runner/_work
72-
- name: dind-sock
73-
mountPath: /var/run
63+
- name: ACTIONS_RUNNER_REQUIRE_JOB_CONTAINER
64+
value: "false"
7465
resources:
7566
limits:
76-
cpu: 3
77-
memory: 6Gi
67+
cpu: "2"
68+
memory: 3Gi
7869
requests:
79-
cpu: 3
80-
memory: 6Gi
81-
- name: dind
82-
image: docker:dind
83-
args:
84-
- dockerd
85-
- --host=unix:///var/run/docker.sock
86-
- --group=$(DOCKER_GROUP_GID)
87-
env:
88-
- name: DOCKER_GROUP_GID
89-
value: "123"
90-
securityContext:
91-
privileged: true
92-
volumeMounts:
93-
- name: work
94-
mountPath: /home/runner/_work
95-
- name: dind-sock
96-
mountPath: /var/run
97-
- name: dind-externals
98-
mountPath: /home/runner/externals
99-
volumes:
100-
- name: work
101-
emptyDir: {}
102-
- name: dind-sock
103-
emptyDir: {}
104-
- name: dind-externals
105-
emptyDir: {}
70+
cpu: "1"
71+
memory: 2Gi
10672
EOT
10773
]
10874

0 commit comments

Comments
 (0)