Skip to content

Commit d659d7b

Browse files
authored
Merge pull request #31 from gitpod-io/nv/docker-resource-limits
fix: add memory and CPU limits to all docker containers (A19)
2 parents d6d818e + 9e777b0 commit d659d7b

5 files changed

Lines changed: 53 additions & 0 deletions

File tree

files/proxy-cloud-init.tftpl

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,8 @@ write_files:
277277
ExecStart=/usr/bin/docker%{ if DOCKER_CONFIG_ENABLED } --config /var/lib/gitpod/docker-config%{ endif } run --rm --name prometheus \
278278
--network host \
279279
--hostname %H \
280+
--memory=512m \
281+
--cpus=0.25 \
280282
--volume /var/lib/prometheus:/etc/prometheus:ro \
281283
--volume /var/lib/prometheus/data:/prometheus \
282284
%{ if CA_ENABLED ~}
@@ -336,6 +338,8 @@ write_files:
336338
ExecStart=/usr/bin/docker%{ if DOCKER_CONFIG_ENABLED } --config /var/lib/gitpod/docker-config%{ endif } run --rm --name node-exporter \
337339
--network host \
338340
--pid host \
341+
--memory=256m \
342+
--cpus=0.25 \
339343
--volume /:/host:ro,rslave \
340344
%{ if CA_ENABLED ~}
341345
--volume /var/lib/gitpod/certs/gitpod-custom-ca.crt:/etc/ssl/certs/gitpod-trust-bundle.crt:ro \
@@ -665,6 +669,8 @@ write_files:
665669
ExecStartPre=-/usr/bin/docker rm gitpod-proxy
666670
ExecStart=/usr/bin/docker%{ if DOCKER_CONFIG_ENABLED } --config /var/lib/gitpod/docker-config%{ endif } run --rm --name gitpod-proxy \
667671
--network host \
672+
--memory=${PROXY_CONTAINER_MEMORY} \
673+
--cpus=${PROXY_CONTAINER_CPUS} \
668674
--volume /var/lib/gitpod/tls:/tmp/certs:rw \
669675
%{ if CA_ENABLED ~}
670676
--volume /var/lib/gitpod/certs/gitpod-custom-ca.crt:/etc/ssl/certs/gitpod-trust-bundle.crt:ro \

files/runner-cloud-init.tftpl

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -296,6 +296,8 @@ write_files:
296296
ExecStart=/usr/bin/docker%{ if DOCKER_CONFIG_ENABLED } --config /var/lib/gitpod/docker-config%{ endif } run --rm --name prometheus \
297297
--network host \
298298
--hostname %H \
299+
--memory=1g \
300+
--cpus=0.5 \
299301
--volume /var/lib/prometheus:/etc/prometheus:ro \
300302
--volume /var/lib/prometheus/data:/prometheus \
301303
--volume /var/lib/gitpod/certs/gitpod-custom-ca.crt:/etc/ssl/certs/gitpod-trust-bundle.crt:ro \
@@ -344,6 +346,8 @@ write_files:
344346
ExecStart=/usr/bin/docker%{ if DOCKER_CONFIG_ENABLED } --config /var/lib/gitpod/docker-config%{ endif } run --rm --name node-exporter \
345347
--network host \
346348
--pid host \
349+
--memory=256m \
350+
--cpus=0.25 \
347351
--volume /:/host:ro,rslave \
348352
--volume /var/lib/gitpod/certs/gitpod-custom-ca.crt:/etc/ssl/certs/gitpod-trust-bundle.crt:ro \
349353
--env http_proxy=${HTTP_PROXY} \
@@ -652,6 +656,8 @@ write_files:
652656
ExecStart=/usr/bin/docker%{ if DOCKER_CONFIG_ENABLED } --config /var/lib/gitpod/docker-config%{ endif } run --rm --name gitpod-auth-proxy \
653657
--network host \
654658
--hostname %H \
659+
--memory=512m \
660+
--cpus=0.5 \
655661
--volume /var/lib/gitpod/certs:/var/lib/gitpod/certs:ro \
656662
%{ if HAS_TRUST_BUNDLE ~}
657663
--volume /var/lib/gitpod/certs/gitpod-custom-ca.crt:/etc/ssl/certs/gitpod-trust-bundle.crt:ro \
@@ -700,6 +706,8 @@ write_files:
700706
ExecStart=/usr/bin/docker%{ if DOCKER_CONFIG_ENABLED } --config /var/lib/gitpod/docker-config%{ endif } run --rm --name gitpod-runner \
701707
--network host \
702708
--hostname %H \
709+
--memory=${RUNNER_CONTAINER_MEMORY} \
710+
--cpus=${RUNNER_CONTAINER_CPUS} \
703711
--volume /var/lib/prometheus:/var/lib/prometheus \
704712
--volume /tmp:/tmp \
705713
--env http_proxy=${HTTP_PROXY} \

locals.tf

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,39 @@ locals {
3232

3333
runner_dev_image = var.development_version != "" ? "us-docker.pkg.dev/gitpod-next-production/gitpod-next/gitpod-gcp-runner:${var.development_version}" : local.runner_image
3434

35+
# Container resource limits derived from VM machine type.
36+
# GCP standard machine types follow the pattern {family}-standard-{vcpus}
37+
# with memory = vcpus * 4 GB. We reserve ~25% for the host OS and Docker
38+
# daemon, then allocate the rest across containers.
39+
#
40+
# Aligned with EC2 Fargate runner limits:
41+
# EC2 small: 1 vCPU / 3 GB task → runner gets ~1 GB
42+
# EC2 large: 8 vCPU / 16 GB task → runner gets ~14 GB
43+
# GCP small: 2 vCPU / 8 GB VM → runner gets 5 GB / 1.25 CPU
44+
# GCP regular: 4 vCPU / 16 GB VM → runner gets 12 GB / 3 CPU
45+
46+
runner_vcpus = tonumber(regex("-(\\d+)$", var.runner_vm_config.machine_type)[0])
47+
runner_memory_gb = local.runner_vcpus * 4
48+
49+
# Sidecar limits are fixed (small footprint). The main runner container
50+
# gets whatever remains after sidecars and OS overhead.
51+
runner_sidecar_memory_mb = 1792 # prometheus 1024 + auth-proxy 512 + node-exporter 256
52+
runner_sidecar_cpus = 1.25 # prometheus 0.5 + auth-proxy 0.5 + node-exporter 0.25
53+
runner_os_reserve_mb = 512
54+
55+
runner_container_memory_mb = (local.runner_memory_gb * 1024) - local.runner_sidecar_memory_mb - local.runner_os_reserve_mb
56+
runner_container_cpus = local.runner_vcpus - local.runner_sidecar_cpus
57+
58+
proxy_vcpus = tonumber(regex("-(\\d+)$", var.proxy_vm_config.machine_type)[0])
59+
proxy_memory_gb = local.proxy_vcpus * 4
60+
61+
proxy_sidecar_memory_mb = 768 # prometheus 512 + node-exporter 256
62+
proxy_sidecar_cpus = 0.5 # prometheus 0.25 + node-exporter 0.25
63+
proxy_os_reserve_mb = 512
64+
65+
proxy_container_memory_mb = (local.proxy_memory_gb * 1024) - local.proxy_sidecar_memory_mb - local.proxy_os_reserve_mb
66+
proxy_container_cpus = local.proxy_vcpus - local.proxy_sidecar_cpus
67+
3568
# Docker config handling
3669
docker_config_enabled = var.custom_images.docker_config_json != ""
3770
docker_config_bucket_name = local.docker_config_enabled ? google_storage_bucket.runner_assets.name : ""

proxy-vm.tf

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,9 @@ data "cloudinit_config" "proxy" {
3737
# Insecure registries configuration
3838
INSECURE_REGISTRIES_ENABLED = local.insecure_registries_enabled
3939
INSECURE_REGISTRIES_JSON = local.insecure_registries_json
40+
# Container resource limits (computed from machine type)
41+
PROXY_CONTAINER_MEMORY = "${local.proxy_container_memory_mb}m"
42+
PROXY_CONTAINER_CPUS = tostring(local.proxy_container_cpus)
4043
})
4144
}
4245
}

runner-vm.tf

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,9 @@ data "cloudinit_config" "runner" {
167167
ENVIRONMENT_VM_LABELS = join(",", [for k, v in var.labels : "${k}=${v}"])
168168
# Module version reported to the management plane
169169
TERRAFORM_MODULE_VERSION = local.module_version
170+
# Container resource limits (computed from machine type)
171+
RUNNER_CONTAINER_MEMORY = "${local.runner_container_memory_mb}m"
172+
RUNNER_CONTAINER_CPUS = tostring(local.runner_container_cpus)
170173
})
171174
}
172175
}

0 commit comments

Comments
 (0)