Skip to content

Commit ea898a6

Browse files
Merge pull request #315 from uselagoon/build-pod-cluster-autoscaler-eviction
feat: default label to prevent autoscaler evictions on pods
2 parents 0ad9fa6 + 6b47296 commit ea898a6

File tree

4 files changed

+29
-5
lines changed

4 files changed

+29
-5
lines changed

cmd/main.go

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,8 @@ func main() {
199199
var dockerHostNamespace string
200200
var dockerHostReuseType string
201201

202+
var clusterAutoscalerEvict bool
203+
202204
flag.StringVar(&metricsAddr, "metrics-bind-address", "0", "The address the metrics endpoint binds to. "+
203205
"Use :8443 for HTTPS or :8080 for HTTP, or leave as 0 to disable the metrics service.")
204206
flag.BoolVar(&secureMetrics, "metrics-secure", true,
@@ -430,6 +432,10 @@ func main() {
430432
`The resource type (namespace, project, organization) to use when assigning a docker-host to a build to preference an already used dockerhost
431433
eg. If project is defined, all builds from a project will prefer to build on the same docker-host where possible`)
432434

435+
// Flag to control the setting for the label cluster-autoscaler.kubernetes.io/safe-to-evict on build pods, defaults to false to avoid evicting pods
436+
flag.BoolVar(&clusterAutoscalerEvict, "enable-cluster-autoscaler-eviction", false,
437+
"Flag to enable cluster autoscaler eviction on build pods, defaults to false to avoid evicting running builds")
438+
433439
flag.Parse()
434440

435441
// get overrides from environment variables
@@ -990,6 +996,7 @@ func main() {
990996
DockerHost: dockerhosts,
991997
QueueCache: buildsQueueCache,
992998
BuildCache: buildsCache,
999+
ClusterAutoscalerEvict: clusterAutoscalerEvict,
9931000
}).SetupWithManager(mgr); err != nil {
9941001
setupLog.Error(err, "unable to create controller", "controller", "LagoonBuild")
9951002
os.Exit(1)
@@ -1018,11 +1025,12 @@ func main() {
10181025
HTTPSProxy: httpsProxy,
10191026
NoProxy: noProxy,
10201027
},
1021-
LFFTaskQoSEnabled: lffTaskQoSEnabled,
1022-
TaskQoS: taskQoSConfigv1beta2,
1023-
ImagePullPolicy: tipp,
1024-
QueueCache: tasksQueueCache,
1025-
TasksCache: tasksCache,
1028+
LFFTaskQoSEnabled: lffTaskQoSEnabled,
1029+
TaskQoS: taskQoSConfigv1beta2,
1030+
ImagePullPolicy: tipp,
1031+
QueueCache: tasksQueueCache,
1032+
TasksCache: tasksCache,
1033+
ClusterAutoscalerEvict: clusterAutoscalerEvict,
10261034
}).SetupWithManager(mgr); err != nil {
10271035
setupLog.Error(err, "unable to create controller", "controller", "LagoonTask")
10281036
os.Exit(1)

internal/controllers/v1beta2/build_controller.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ type LagoonBuildReconciler struct {
8484
DockerHost *dockerhost.DockerHost
8585
QueueCache *lru.Cache[string, string]
8686
BuildCache *lru.Cache[string, string]
87+
ClusterAutoscalerEvict bool
8788
}
8889

8990
// BackupConfig holds all the backup configuration settings

internal/controllers/v1beta2/build_helpers.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -799,6 +799,11 @@ func (r *LagoonBuildReconciler) processBuild(ctx context.Context, opLog logr.Log
799799
newPod.Labels["organization.lagoon.sh/name"] = lagoonBuild.Spec.Project.Organization.Name
800800
}
801801

802+
if !r.ClusterAutoscalerEvict {
803+
// try to prevent build pods from being evicted by cluster autoscaler
804+
newPod.Labels["cluster-autoscaler.kubernetes.io/safe-to-evict"] = "false"
805+
}
806+
802807
// set the pod security context, if defined to a non-default value
803808
if r.BuildPodRunAsUser != 0 || r.BuildPodRunAsGroup != 0 ||
804809
r.BuildPodFSGroup != 0 {

internal/controllers/v1beta2/task_controller.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ type LagoonTaskReconciler struct {
5858
ImagePullPolicy corev1.PullPolicy
5959
QueueCache *lru.Cache[string, string]
6060
TasksCache *lru.Cache[string, string]
61+
ClusterAutoscalerEvict bool
6162
}
6263

6364
var (
@@ -323,6 +324,11 @@ func (r *LagoonTaskReconciler) getTaskPodDeployment(ctx context.Context, lagoonT
323324
taskPod.Labels["organization.lagoon.sh/id"] = fmt.Sprintf("%d", *lagoonTask.Spec.Project.Organization.ID)
324325
taskPod.Labels["organization.lagoon.sh/name"] = lagoonTask.Spec.Project.Organization.Name
325326
}
327+
328+
if !r.ClusterAutoscalerEvict {
329+
// try to prevent build pods from being evicted by cluster autoscaler
330+
taskPod.Labels["cluster-autoscaler.kubernetes.io/safe-to-evict"] = "false"
331+
}
326332
return taskPod, nil
327333
}
328334
}
@@ -662,6 +668,10 @@ func (r *LagoonTaskReconciler) createAdvancedTask(ctx context.Context, lagoonTas
662668
newPod.Labels["organization.lagoon.sh/id"] = fmt.Sprintf("%d", *lagoonTask.Spec.Project.Organization.ID)
663669
newPod.Labels["organization.lagoon.sh/name"] = lagoonTask.Spec.Project.Organization.Name
664670
}
671+
if !r.ClusterAutoscalerEvict {
672+
// try to prevent build pods from being evicted by cluster autoscaler
673+
newPod.Labels["cluster-autoscaler.kubernetes.io/safe-to-evict"] = "false"
674+
}
665675
if lagoonTask.Spec.AdvancedTask.DeployerToken {
666676
// start this with the serviceaccount so that it gets the token mounted into it
667677
newPod.Spec.ServiceAccountName = "lagoon-deployer"

0 commit comments

Comments
 (0)