diff --git a/internal/k8s/kubernetes.go b/internal/k8s/kubernetes.go index 7dcd29be..51fb7f15 100644 --- a/internal/k8s/kubernetes.go +++ b/internal/k8s/kubernetes.go @@ -457,7 +457,7 @@ func PartitionPodsForEviction(pods []*v1.Pod, castNamespace string, skipDeletedT continue } - if IsDaemonSetPod(p) || IsStaticPod(p) { + if IsDaemonSetPod(p) || IsStaticPod(p) || HasWildcardToleration(p) { nonEvictable = append(nonEvictable, p) continue } @@ -493,7 +493,19 @@ func IsControlledBy(p *v1.Pod, kind string) bool { } func IsNonEvictible(p *v1.Pod) bool { - return IsDaemonSetPod(p) || IsStaticPod(p) + return IsDaemonSetPod(p) || IsStaticPod(p) || HasWildcardToleration(p) +} + +// HasWildcardToleration returns true if the pod has a toleration that matches all taints +// (i.e. key is empty and operator is Exists). Such pods would be rescheduled back onto +// a cordoned node since they tolerate the node.kubernetes.io/unschedulable taint. +func HasWildcardToleration(p *v1.Pod) bool { + for _, t := range p.Spec.Tolerations { + if t.Key == "" && t.Operator == v1.TolerationOpExists { + return true + } + } + return false } // PatchNode patches a node with the given change function. diff --git a/internal/k8s/kubernetes_test.go b/internal/k8s/kubernetes_test.go index 967c0c29..3d8227ce 100644 --- a/internal/k8s/kubernetes_test.go +++ b/internal/k8s/kubernetes_test.go @@ -1087,6 +1087,40 @@ func TestPartitionPodsForEviction(t *testing.T) { skipDeletedTimeoutSecs: 60, wantEvictableLen: 0, }, + { + name: "pod with wildcard toleration is non-evictable", + pods: []v1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{Name: "wildcard-pod", Namespace: "default"}, + Spec: v1.PodSpec{ + Tolerations: []v1.Toleration{ + {Operator: v1.TolerationOpExists}, + }, + }, + Status: v1.PodStatus{Phase: v1.PodRunning}, + }, + }, + castNamespace: testCastNamespace, + wantNonEvictableLen: 1, + wantNonEvictablePodNames: []string{"wildcard-pod"}, + }, + { + name: "pod with specific key toleration is evictable", + pods: []v1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{Name: "specific-toleration-pod", Namespace: "default"}, + Spec: v1.PodSpec{ + Tolerations: []v1.Toleration{ + {Key: "node.kubernetes.io/unschedulable", Operator: v1.TolerationOpExists}, + }, + }, + Status: v1.PodStatus{Phase: v1.PodRunning}, + }, + }, + castNamespace: testCastNamespace, + wantEvictableLen: 1, + wantEvictablePodNames: []string{"specific-toleration-pod"}, + }, { name: "mixed pods are partitioned correctly", pods: []v1.Pod{