kubernetes · maxcao13 · Mar 14, 2024 · Mar 17, 2025 · Mar 7, 2025 · Mar 21, 2025
diff --git a/vertical-pod-autoscaler/RELEASE.md b/vertical-pod-autoscaler/RELEASE.md
@@ -9,7 +9,7 @@ Before doing the release for the first time check if you have all the necessary
 There are the following steps of the release process:
 
 1. [ ] Open issue to track the release.
-2. [ ] Update VPA version const.
+2. [ ] Rollup all changes.
 3. [ ] Build and stage images.
 4. [ ] Test the release.
 5. [ ] Promote image.
@@ -20,7 +20,7 @@ There are the following steps of the release process:
 Open a new issue to track the release, use the [vpa_release](https://github.com/kubernetes/autoscaler/issues/new?&template=vpa_release.md) template.
 We use the issue to communicate what is state of the release.
 
-## Update VPA version const
+## Rollup all changes
 
 1. [ ] Wait for all VPA changes that will be in the release to merge.
 2. [ ] Wait for [the end to end tests](https://testgrid.k8s.io/sig-autoscaling-vpa) to run with all VPA changes
@@ -31,13 +31,12 @@ We use the issue to communicate what is state of the release.
 
 ### New minor release
 
-1. [ ] Change the version in
-    [common/version-go](https://github.com/kubernetes/autoscaler/blob/master/vertical-pod-autoscaler/common/version.go)
-    to `1.${next-minor}.0`,
-2. [ ] Commit and merge the change,
-3. [ ] Go to the merged change,
-4. [ ] [Create a new branch](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-and-deleting-branches-within-your-repository) named `vpa-release-1.${next-minor}` from the
+1. [ ] [Create a new branch](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-and-deleting-branches-within-your-repository) named `vpa-release-1.${next-minor}` from the
     merged change.
+2. [ ] In the **main branch**, change the version in
+    [common/version-go](https://github.com/kubernetes/autoscaler/blob/master/vertical-pod-autoscaler/common/version.go)
+    to `1.${next-minor}.0`.
+3. [ ] Commit and merge the change.
 
 ### New patch release
 

diff --git a/vertical-pod-autoscaler/common/version.go b/vertical-pod-autoscaler/common/version.go
@@ -21,7 +21,7 @@ package common
 var gitCommit = ""
 
 // versionCore is the version of VPA.
-const versionCore = "1.3.0"
+const versionCore = "1.4.0"
 
 // VerticalPodAutoscalerVersion returns the version of the VPA.
 func VerticalPodAutoscalerVersion() string {

diff --git a/vertical-pod-autoscaler/deploy/admission-controller-deployment.yaml b/vertical-pod-autoscaler/deploy/admission-controller-deployment.yaml
@@ -47,15 +47,3 @@ spec:
         - name: tls-certs
           secret:
             secretName: vpa-tls-certs
----
-apiVersion: v1
-kind: Service
-metadata:
-  name: vpa-webhook
-  namespace: kube-system
-spec:
-  ports:
-    - port: 443
-      targetPort: 8000
-  selector:
-    app: vpa-admission-controller
diff --git a/vertical-pod-autoscaler/deploy/admission-controller-service.yaml b/vertical-pod-autoscaler/deploy/admission-controller-service.yaml
@@ -0,0 +1,11 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: vpa-webhook
+  namespace: kube-system
+spec:
+  ports:
+    - port: 443
+      targetPort: 8000
+  selector:
+    app: vpa-admission-controller
diff --git a/vertical-pod-autoscaler/deploy/vpa-rbac.yaml b/vertical-pod-autoscaler/deploy/vpa-rbac.yaml
@@ -121,6 +121,32 @@ rules:
       - create
 ---
 apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  name: system:vpa-updater-in-place
+rules:
+  - apiGroups:
+      - ""
+    resources:
+      - pods/resize
+      - pods # required for patching vpaInPlaceUpdated annotations onto the pod
+    verbs:
+      - patch
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+  name: system:vpa-updater-in-place-binding
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: ClusterRole
+  name: system:vpa-updater-in-place
+subjects:
+  - kind: ServiceAccount
+    name: vpa-updater
+    namespace: kube-system
+---
+apiVersion: rbac.authorization.k8s.io/v1
 kind: ClusterRoleBinding
 metadata:
   name: system:metrics-reader

diff --git a/vertical-pod-autoscaler/deploy/vpa-v1-crd-gen.yaml b/vertical-pod-autoscaler/deploy/vpa-v1-crd-gen.yaml
@@ -458,6 +458,7 @@ spec:
                     - "Off"
                     - Initial
                     - Recreate
+                    - InPlaceOrRecreate
                     - Auto
                     type: string
                 type: object

diff --git a/vertical-pod-autoscaler/docs/api.md b/vertical-pod-autoscaler/docs/api.md
@@ -155,7 +155,7 @@ _Appears in:_
 
 | Field | Description | Default | Validation |
 | --- | --- | --- | --- |
-| `updateMode` _[UpdateMode](#updatemode)_ | Controls when autoscaler applies changes to the pod resources.<br />The default is 'Auto'. |  | Enum: [Off Initial Recreate Auto] <br /> |
+| `updateMode` _[UpdateMode](#updatemode)_ | Controls when autoscaler applies changes to the pod resources.<br />The default is 'Auto'. |  | Enum: [Off Initial Recreate InPlaceOrRecreate Auto] <br /> |
 | `minReplicas` _integer_ | Minimal number of replicas which need to be alive for Updater to attempt<br />pod eviction (pending other checks like PDB). Only positive values are<br />allowed. Overrides global '--min-replicas' flag. |  |  |
 | `evictionRequirements` _[EvictionRequirement](#evictionrequirement) array_ | EvictionRequirements is a list of EvictionRequirements that need to<br />evaluate to true in order for a Pod to be evicted. If more than one<br />EvictionRequirement is specified, all of them need to be fulfilled to allow eviction. |  |  |
 
@@ -208,7 +208,7 @@ _Underlying type:_ _string_
 UpdateMode controls when autoscaler applies changes to the pod resources.
 
 _Validation:_
-- Enum: [Off Initial Recreate Auto]
+- Enum: [Off Initial Recreate InPlaceOrRecreate Auto]
 
 _Appears in:_
 - [PodUpdatePolicy](#podupdatepolicy)
@@ -218,7 +218,8 @@ _Appears in:_
 | `Off` | UpdateModeOff means that autoscaler never changes Pod resources.<br />The recommender still sets the recommended resources in the<br />VerticalPodAutoscaler object. This can be used for a "dry run".<br /> |
 | `Initial` | UpdateModeInitial means that autoscaler only assigns resources on pod<br />creation and does not change them during the lifetime of the pod.<br /> |
 | `Recreate` | UpdateModeRecreate means that autoscaler assigns resources on pod<br />creation and additionally can update them during the lifetime of the<br />pod by deleting and recreating the pod.<br /> |
-| `Auto` | UpdateModeAuto means that autoscaler assigns resources on pod creation<br />and additionally can update them during the lifetime of the pod,<br />using any available update method. Currently this is equivalent to<br />Recreate, which is the only available update method.<br /> |
+| `Auto` | UpdateModeAuto means that autoscaler assigns resources on pod creation<br />and additionally can update them during the lifetime of the pod,<br />using any available update method. Currently this is equivalent to<br />Recreate.<br /> |
+| `InPlaceOrRecreate` | UpdateModeInPlaceOrRecreate means that autoscaler tries to assign resources in-place.<br />If this is not possible (e.g., resizing takes too long or is infeasible), it falls back to the<br />"Recreate" update mode.<br />Requires VPA level feature gate "InPlaceOrRecreate" to be enabled<br />on the admission and updater pods.<br />Requires cluster feature gate "InPlacePodVerticalScaling" to be enabled.<br /> |
 
 
 #### VerticalPodAutoscaler

diff --git a/vertical-pod-autoscaler/docs/features.md b/vertical-pod-autoscaler/docs/features.md
@@ -4,6 +4,8 @@
 
 - [Limits control](#limits-control)
 - [Memory Value Humanization](#memory-value-humanization)
+- [CPU Recommendation Rounding](#cpu-recommendation-rounding)
+- [In-Place Updates](#in-place-updates-inplaceorrecreate)
 
 ## Limits control
 
@@ -50,4 +52,78 @@ To enable this feature, set the --round-cpu-millicores flag when running the VPA
 
 ```bash
 --round-cpu-millicores=50
-```
+```
+
+## In-Place Updates (`InPlaceOrRecreate`)
+
+> [!WARNING] 
+> FEATURE STATE: VPA v1.4.0 [alpha]
+
+VPA supports in-place updates to reduce disruption when applying resource recommendations. This feature leverages Kubernetes' in-place update capabilities (which is in beta as of Kubernetes 1.33) to modify container resources without requiring pod recreation.
+For more information, see [AEP-4016: Support for in place updates in VPA](https://github.com/kubernetes/autoscaler/tree/master/vertical-pod-autoscaler/enhancements/4016-in-place-updates-support)
+
+### Usage
+
+To use in-place updates, set the VPA's `updateMode` to `InPlaceOrRecreate`:
+```yaml
+apiVersion: autoscaling.k8s.io/v1
+kind: VerticalPodAutoscaler
+metadata:
+  name: my-vpa
+spec:
+  updatePolicy:
+    updateMode: "InPlaceOrRecreate"
+```
+
+### Behavior
+
+When using `InPlaceOrRecreate` mode, VPA will first attempt to apply updates in-place, if in-place update fails, VPA will fall back to pod recreation.
+Updates are attempted when:
+* Container requests are outside the recommended bounds
+* Quick OOM occurs
+* For long-running pods (>12h), when recommendations differ significantly (>10%)
+
+Important Notes
+
+* Disruption Possibility: While in-place updates aim to minimize disruption, they cannot guarantee zero disruption as the container runtime is responsible for the actual resize operation.
+
+* Memory Limit Downscaling: In the beta version, memory limit downscaling is not supported for pods with resizePolicy: PreferNoRestart. In such cases, VPA will fall back to pod recreation.
+
+### Requirements:
+
+* Kubernetes 1.33+ with `InPlacePodVerticalScaling` feature gate enabled
+* VPA version 1.4.0+ with `InPlaceOrRecreate` feature gate enabled
+
+### Configuration
+
+Enable the feature by setting the following flags in VPA components ( for both updater and admission-controller ):
+
+```bash
+--feature-gates=InPlaceOrRecreate=true
+``` 
+
+### Limitations
+
+* All containers in a pod are updated together (partial updates not supported)
+* Memory downscaling requires careful consideration to prevent OOMs
+* Updates still respect VPA's standard update conditions and timing restrictions
+* In-place updates will fail if they would result in a change to the pod's QoS class
+
+### Fallback Behavior
+
+VPA will fall back to pod recreation in the following scenarios:
+
+* In-place update is [infeasible](https://github.com/kubernetes/enhancements/blob/master/keps/sig-node/1287-in-place-update-pod-resources/README.md#resize-status) (node resources, etc.)
+* Update is [deferred](https://github.com/kubernetes/enhancements/blob/master/keps/sig-node/1287-in-place-update-pod-resources/README.md#resize-status) for more than 5 minutes
+* Update is in progress for more than 1 hour
+* [Pod QoS](https://kubernetes.io/docs/concepts/workloads/pods/pod-qos/) class would change due to the update
+* Memory limit downscaling is required with [PreferNoRestart policy](https://github.com/kubernetes/enhancements/blob/master/keps/sig-node/1287-in-place-update-pod-resources/README.md#container-resize-policy)
+
+### Monitoring
+
+VPA provides metrics to track in-place update operations:
+
+* `vpa_in_place_updatable_pods_total`: Number of pods matching in-place update criteria
+* `vpa_in_place_updated_pods_total`: Number of pods successfully updated in-place
+* `vpa_vpas_with_in_place_updatable_pods_total`: Number of VPAs with pods eligible for in-place updates
+* `vpa_vpas_with_in_place_updated_pods_total`: Number of VPAs with successfully in-place updated pods
diff --git a/vertical-pod-autoscaler/docs/flags.md b/vertical-pod-autoscaler/docs/flags.md
@@ -12,6 +12,7 @@ This document is auto-generated from the flag definitions in the VPA admission-c
 | `--address` | ":8944" |                         The address to expose Prometheus metrics. |
 | `--alsologtostderr` |  |                        log to standard error as well as files (no effect when -logtostderr=true) |
 | `--client-ca-file` | "/etc/tls-certs/caCert.pem" |                  Path to CA PEM file. |
+| `--feature-gates` |  |            A set of key=value pairs that describe feature gates for alpha/experimental features. Options are: |
 | `--ignored-vpa-object-namespaces` |  |   A comma-separated list of namespaces to ignore when searching for VPA objects. Leave empty to avoid ignoring any namespaces. These namespaces will not be cleaned by the garbage collector. |
 | `--kube-api-burst` | 10 |                   QPS burst limit when making requests to Kubernetes apiserver |
 | `--kube-api-qps` | 5 |                     QPS limit when making requests to Kubernetes apiserver |
@@ -65,6 +66,7 @@ This document is auto-generated from the flag definitions in the VPA recommender
 | `--cpu-integer-post-processor-enabled` |  |                     Enable the cpu-integer recommendation post processor. The post processor will round up CPU recommendations to a whole CPU for pods which were opted in by setting an appropriate label on VPA object (experimental) |
 | `--external-metrics-cpu-metric` |  |                     ALPHA.  Metric to use with external metrics provider for CPU usage. |
 | `--external-metrics-memory-metric` |  |                  ALPHA.  Metric to use with external metrics provider for memory usage. |
+| `--feature-gates` |  |                            A set of key=value pairs that describe feature gates for alpha/experimental features. Options are: |
 | `--history-length` | "8d" |                                  How much time back prometheus have to be queried to get historical metrics |
 | `--history-resolution` | "1h" |                              Resolution at which Prometheus is queried for historical metrics |
 | `--humanize-memory` |  |                                        Convert memory values in recommendations to the highest appropriate SI unit with up to 2 decimal places for better readability. |
@@ -135,6 +137,7 @@ This document is auto-generated from the flag definitions in the VPA updater cod
 | `--eviction-rate-burst` | 1 |                                         Burst of pods that can be evicted. |
 | `--eviction-rate-limit` |  |                                       Number of pods that can be evicted per seconds. A rate limit set to 0 or -1 will disable |
 | `--eviction-tolerance` | 0.5 |                                        Fraction of replica count that can be evicted for update, if more than one pod can be evicted. |
+| `--feature-gates` |  |                                     A set of key=value pairs that describe feature gates for alpha/experimental features. Options are: |
 | `--ignored-vpa-object-namespaces` |  |                            A comma-separated list of namespaces to ignore when searching for VPA objects. Leave empty to avoid ignoring any namespaces. These namespaces will not be cleaned by the garbage collector. |
 | `--in-recommendation-bounds-eviction-lifetime-threshold` | 12h0m0s |   Pods that live for at least that long can be evicted even if their request is within the [MinRecommended...MaxRecommended] range |
 | `--kube-api-burst` | 10 |                                            QPS burst limit when making requests to Kubernetes apiserver |

diff --git a/vertical-pod-autoscaler/docs/installation.md b/vertical-pod-autoscaler/docs/installation.md
@@ -138,6 +138,16 @@ To print YAML contents with all resources that would be understood by
 The output of that command won't include secret information generated by
 [pkg/admission-controller/gencerts.sh](https://github.com/kubernetes/autoscaler/tree/master/vertical-pod-autoscaler/pkg/admission-controller/gencerts.sh) script.
 
+### Feature gates
+
+To install VPA with feature gates, you can specify the environment variable `$FEATURE_GATES`.
+
+For example, to enable the `InPlaceOrRecreate` feature gate:
+
+```console
+FEATURE_GATES="InPlaceOrRecreate=true" ./hack/vpa-up.sh
+```
+
 ## Tear down
 
 Note that if you stop running VPA in your cluster, the resource requests

diff --git a/vertical-pod-autoscaler/e2e/go.mod b/vertical-pod-autoscaler/e2e/go.mod
@@ -14,7 +14,7 @@ require (
 	k8s.io/apimachinery v0.32.0
 	k8s.io/autoscaler/vertical-pod-autoscaler v1.2.1
 	k8s.io/client-go v0.32.0
-	k8s.io/component-base v0.32.0
+	k8s.io/component-base v0.32.2
 	k8s.io/klog/v2 v2.130.1
 	k8s.io/kubernetes v1.32.0
 	k8s.io/pod-security-admission v0.32.0

diff --git a/vertical-pod-autoscaler/e2e/go.sum b/vertical-pod-autoscaler/e2e/go.sum
@@ -92,6 +92,8 @@ github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
 github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
 github.com/golang-jwt/jwt/v4 v4.5.0 h1:7cYmW1XlMY7h7ii7UhUyChSgS5wUJEnm9uZVTGqOWzg=
 github.com/golang-jwt/jwt/v4 v4.5.0/go.mod h1:m21LjoU+eqJr34lmDMbreY2eSTRJ1cv77w39/MY0Ch0=
+github.com/golang/mock v1.6.0 h1:ErTB+efbowRARo13NNdxyJji2egdxLGQhRaY+DUumQc=
+github.com/golang/mock v1.6.0/go.mod h1:p6yTPP+5HYm5mzsMV8JkE6ZKdX+/wYM6Hr+LicevLPs=
 github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
 github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
 github.com/google/btree v1.0.1 h1:gK4Kx5IaGY9CD5sPJ36FHiBJ6ZXl0kilRiiCj+jdYp4=