Skip to content

Commit 73e666f

Browse files
committed
Add --in-place-skip-disruption-budget flag
Signed-off-by: Omer Aplatony <[email protected]>
1 parent 934a600 commit 73e666f

File tree

12 files changed

+504
-67
lines changed

12 files changed

+504
-67
lines changed

vertical-pod-autoscaler/docs/flags.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,7 @@ This document is auto-generated from the flag definitions in the VPA updater cod
146146
| `eviction-tolerance` | float | 0.5 | Fraction of replica count that can be evicted for update, if more than one pod can be evicted. |
147147
| `feature-gates` | mapStringBool | | A set of key=value pairs that describe feature gates for alpha/experimental features. Options are:<br>AllAlpha=true\|false (ALPHA - default=false)<br>AllBeta=true\|false (BETA - default=false)<br>InPlaceOrRecreate=true\|false (BETA - default=true)<br>PerVPAConfig=true\|false (ALPHA - default=false) |
148148
| `ignored-vpa-object-namespaces` | string | | A comma-separated list of namespaces to ignore when searching for VPA objects. Leave empty to avoid ignoring any namespaces. These namespaces will not be cleaned by the garbage collector. |
149+
| `in-place-skip-disruption-budget` | | | If true, VPA updater skips disruption budget checks for in-place pod updates when all containers have NotRequired resize policy (or no policy defined) for both CPU and memory resources. Disruption budgets are still respected when any container has RestartContainer resize policy for any resource. |
149150
| `in-recommendation-bounds-eviction-lifetime-threshold` | | 12h0m0s | duration Pods that live for at least that long can be evicted even if their request is within the [MinRecommended...MaxRecommended] range |
150151
| `kube-api-burst` | float | 100 | QPS burst limit when making requests to Kubernetes apiserver |
151152
| `kube-api-qps` | float | 50 | QPS limit when making requests to Kubernetes apiserver |
@@ -174,4 +175,3 @@ This document is auto-generated from the flag definitions in the VPA updater cod
174175
| `v,` | | : 4 | , --v Level set the log level verbosity (default 4) |
175176
| `vmodule` | moduleSpec | | comma-separated list of pattern=N settings for file-filtered logging |
176177
| `vpa-object-namespace` | string | | Specifies the namespace to search for VPA objects. Leave empty to include all namespaces. If provided, the garbage collector will only clean this namespace. |
177-

vertical-pod-autoscaler/pkg/updater/logic/updater.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ func NewUpdater(
9393
evictionRateBurst int,
9494
evictionToleranceFraction float64,
9595
useAdmissionControllerStatus bool,
96+
inPlaceSkipDisruptionBudget bool,
9697
statusNamespace string,
9798
recommendationProcessor vpa_api_util.RecommendationProcessor,
9899
evictionAdmission priority.PodEvictionAdmission,
@@ -111,6 +112,7 @@ func NewUpdater(
111112
minReplicasForEviction,
112113
evictionToleranceFraction,
113114
patchCalculators,
115+
inPlaceSkipDisruptionBudget,
114116
)
115117
if err != nil {
116118
return nil, fmt.Errorf("failed to create restriction factory: %v", err)

vertical-pod-autoscaler/pkg/updater/main.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,13 @@ var (
7575
useAdmissionControllerStatus = flag.Bool("use-admission-controller-status", true,
7676
"If true, updater will only evict pods when admission controller status is valid.")
7777

78+
inPlaceSkipDisruptionBudget = flag.Bool(
79+
"in-place-skip-disruption-budget",
80+
false,
81+
"If true, VPA updater skips disruption budget checks for in-place pod updates when all containers have NotRequired resize policy (or no policy defined) for both CPU and memory resources. "+
82+
"Disruption budgets are still respected when any container has RestartContainer resize policy for any resource.",
83+
)
84+
7885
namespace = os.Getenv("NAMESPACE")
7986
)
8087

@@ -217,6 +224,7 @@ func run(healthCheck *metrics.HealthCheck, commonFlag *common.CommonFlags) {
217224
*evictionRateBurst,
218225
*evictionToleranceFraction,
219226
*useAdmissionControllerStatus,
227+
*inPlaceSkipDisruptionBudget,
220228
admissionControllerStatusNamespace,
221229
vpa_api_util.NewCappingRecommendationProcessor(limitRangeCalculator),
222230
priority.NewScalingDirectionPodEvictionAdmission(),

vertical-pod-autoscaler/pkg/updater/restriction/pods_eviction_restriction_test.go

Lines changed: 46 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ func TestEvictTooFewReplicas(t *testing.T) {
5454
}
5555

5656
basicVpa := getBasicVpa()
57-
factory, err := getRestrictionFactory(&rc, nil, nil, nil, 10, 0.5, nil, nil, nil)
57+
factory, err := getRestrictionFactory(&rc, nil, nil, nil, 10, 0.5, nil, nil, nil, false)
5858
assert.NoError(t, err)
5959
creatorToSingleGroupStatsMap, podToReplicaCreatorMap, err := factory.GetCreatorMaps(pods, basicVpa)
6060
assert.NoError(t, err)
@@ -94,7 +94,7 @@ func TestEvictionTolerance(t *testing.T) {
9494
}
9595

9696
basicVpa := getBasicVpa()
97-
factory, err := getRestrictionFactory(&rc, nil, nil, nil, 2 /*minReplicas*/, tolerance, nil, nil, nil)
97+
factory, err := getRestrictionFactory(&rc, nil, nil, nil, 2 /*minReplicas*/, tolerance, nil, nil, nil, false)
9898
assert.NoError(t, err)
9999
creatorToSingleGroupStatsMap, podToReplicaCreatorMap, err := factory.GetCreatorMaps(pods, basicVpa)
100100
assert.NoError(t, err)
@@ -138,7 +138,7 @@ func TestEvictAtLeastOne(t *testing.T) {
138138
}
139139

140140
basicVpa := getBasicVpa()
141-
factory, err := getRestrictionFactory(&rc, nil, nil, nil, 2, tolerance, nil, nil, nil)
141+
factory, err := getRestrictionFactory(&rc, nil, nil, nil, 2, tolerance, nil, nil, nil, false)
142142
assert.NoError(t, err)
143143
creatorToSingleGroupStatsMap, podToReplicaCreatorMap, err := factory.GetCreatorMaps(pods, basicVpa)
144144
assert.NoError(t, err)
@@ -230,7 +230,7 @@ func TestEvictEmitEvent(t *testing.T) {
230230
pods = append(pods, p.pod)
231231
}
232232
clock := baseclocktest.NewFakeClock(time.Time{})
233-
factory, err := getRestrictionFactory(&rc, nil, nil, nil, 2, testCase.evictionTolerance, clock, map[string]time.Time{}, nil)
233+
factory, err := getRestrictionFactory(&rc, nil, nil, nil, 2, testCase.evictionTolerance, clock, map[string]time.Time{}, nil, false)
234234
assert.NoError(t, err)
235235
creatorToSingleGroupStatsMap, podToReplicaCreatorMap, err := factory.GetCreatorMaps(pods, testCase.vpa)
236236
assert.NoError(t, err)
@@ -257,3 +257,45 @@ func TestEvictEmitEvent(t *testing.T) {
257257
}
258258
}
259259
}
260+
261+
// This test ensures that in-place-skip-disruption-budget only affects in-place
262+
// updates and does not bypass eviction tolerance when performing pod evictions.
263+
func TestEvictTooFewReplicasWithInPlaceSkipDisruptionBudget(t *testing.T) {
264+
replicas := int32(5)
265+
livePods := 5
266+
267+
rc := apiv1.ReplicationController{
268+
ObjectMeta: metav1.ObjectMeta{
269+
Name: "rc",
270+
Namespace: "default",
271+
},
272+
TypeMeta: metav1.TypeMeta{
273+
Kind: "ReplicationController",
274+
},
275+
Spec: apiv1.ReplicationControllerSpec{
276+
Replicas: &replicas,
277+
},
278+
}
279+
280+
pods := make([]*apiv1.Pod, livePods)
281+
for i := range pods {
282+
pods[i] = test.Pod().WithName(getTestPodName(i)).WithCreator(&rc.ObjectMeta, &rc.TypeMeta).Get()
283+
}
284+
285+
basicVpa := getBasicVpa()
286+
// factory with inPlaceSkipDisruptionBudget on
287+
factory, err := getRestrictionFactory(&rc, nil, nil, nil, 10, 0.5, nil, nil, nil, true)
288+
assert.NoError(t, err)
289+
creatorToSingleGroupStatsMap, podToReplicaCreatorMap, err := factory.GetCreatorMaps(pods, basicVpa)
290+
assert.NoError(t, err)
291+
eviction := factory.NewPodsEvictionRestriction(creatorToSingleGroupStatsMap, podToReplicaCreatorMap)
292+
293+
for _, pod := range pods {
294+
assert.False(t, eviction.CanEvict(pod))
295+
}
296+
297+
for _, pod := range pods {
298+
err := eviction.Evict(pod, basicVpa, test.FakeEventRecorder())
299+
assert.Error(t, err, "Error expected")
300+
}
301+
}

vertical-pod-autoscaler/pkg/updater/restriction/pods_inplace_restriction.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ type PodsInPlaceRestrictionImpl struct {
6767
patchCalculators []patch.Calculator
6868
clock clock.Clock
6969
lastInPlaceAttemptTimeMap map[string]time.Time
70+
inPlaceSkipDisruptionBudget bool
7071
}
7172

7273
// CanInPlaceUpdate checks if pod can be safely updated
@@ -89,6 +90,13 @@ func (ip *PodsInPlaceRestrictionImpl) CanInPlaceUpdate(pod *apiv1.Pod) utils.InP
8990
}
9091
return utils.InPlaceDeferred
9192
}
93+
if ip.inPlaceSkipDisruptionBudget {
94+
if utils.IsNonDisruptiveResize(pod) {
95+
klog.V(4).InfoS("in-place-skip-disruption-budget enabled, skipping disruption budget check for in-place update")
96+
return utils.InPlaceApproved
97+
}
98+
klog.V(4).InfoS("in-place-skip-disruption-budget enabled, but pod has RestartContainer resize policy", "pod", klog.KObj(pod))
99+
}
92100
if singleGroupStats.isPodDisruptable() {
93101
return utils.InPlaceApproved
94102
}

0 commit comments

Comments
 (0)