From 0439c70e2ea196a4aea7ba880590881552c37155 Mon Sep 17 00:00:00 2001 From: John Kyros Date: Tue, 16 Apr 2024 16:40:28 -0500 Subject: [PATCH 01/26] try cleaning up containers as part of runonce --- .../pkg/recommender/input/cluster_feeder.go | 32 +++++++++++++++++++ .../pkg/recommender/model/vpa.go | 5 +++ .../pkg/recommender/routines/recommender.go | 3 ++ 3 files changed, 40 insertions(+) diff --git a/vertical-pod-autoscaler/pkg/recommender/input/cluster_feeder.go b/vertical-pod-autoscaler/pkg/recommender/input/cluster_feeder.go index 5fef7df5deb..fde7a61304e 100644 --- a/vertical-pod-autoscaler/pkg/recommender/input/cluster_feeder.go +++ b/vertical-pod-autoscaler/pkg/recommender/input/cluster_feeder.go @@ -68,6 +68,9 @@ type ClusterStateFeeder interface { // LoadPods updates clusterState with current specification of Pods and their Containers. LoadPods() + // PruneContainers removes any containers from the cluster state that are no longer present in pods. + PruneContainers() + // LoadRealTimeMetrics updates clusterState with current usage metrics of containers. LoadRealTimeMetrics() @@ -425,6 +428,35 @@ func (feeder *clusterStateFeeder) LoadPods() { } } +func (feeder *clusterStateFeeder) PruneContainers() { + + // Find all the containers that are legitimately in pods + containersInPods := make(map[string]*model.ContainerState) + for _, pod := range feeder.clusterState.Pods { + for containerID, container := range pod.Containers { + containersInPods[containerID] = container + } + } + + // Go through the VPAs + for _, vpa := range feeder.clusterState.Vpas { + // Look at the aggregates + for container := range vpa.AggregateContainerStates() { + // If the aggregate isn't in the pod according to the state, remove it + if _, ok := containersInPods[container.ContainerName()]; !ok { + klog.Infof("Deleting container %s, not present in any pods", container.ContainerName()) + vpa.DeleteAggregation(container) + // If this container is also in the initlal state, say, from a checkpoint, remove it from there too + if _, ok := vpa.ContainersInitialAggregateState[container.ContainerName()]; ok { + klog.Infof("Also removing container %s, from initial aggregate state", container.ContainerName()) + delete(vpa.ContainersInitialAggregateState, container.ContainerName()) + } + } + + } + } +} + func (feeder *clusterStateFeeder) LoadRealTimeMetrics() { containersMetrics, err := feeder.metricsClient.GetContainersMetrics() if err != nil { diff --git a/vertical-pod-autoscaler/pkg/recommender/model/vpa.go b/vertical-pod-autoscaler/pkg/recommender/model/vpa.go index b4f32e07f8c..f9f9fc5fbee 100644 --- a/vertical-pod-autoscaler/pkg/recommender/model/vpa.go +++ b/vertical-pod-autoscaler/pkg/recommender/model/vpa.go @@ -209,6 +209,11 @@ func (vpa *Vpa) AggregateStateByContainerName() ContainerNameToAggregateStateMap return containerNameToAggregateStateMap } +// AggregateContainerStates returns the underlying internal aggregate state map. +func (vpa *Vpa) AggregateContainerStates() aggregateContainerStatesMap { + return vpa.aggregateContainerStates +} + // HasRecommendation returns if the VPA object contains any recommendation func (vpa *Vpa) HasRecommendation() bool { return (vpa.Recommendation != nil) && len(vpa.Recommendation.ContainerRecommendations) > 0 diff --git a/vertical-pod-autoscaler/pkg/recommender/routines/recommender.go b/vertical-pod-autoscaler/pkg/recommender/routines/recommender.go index 1f456a80c18..202142c456f 100644 --- a/vertical-pod-autoscaler/pkg/recommender/routines/recommender.go +++ b/vertical-pod-autoscaler/pkg/recommender/routines/recommender.go @@ -153,6 +153,9 @@ func (r *recommender) RunOnce() { r.clusterStateFeeder.LoadPods() timer.ObserveStep("LoadPods") + r.clusterStateFeeder.PruneContainers() + timer.ObserveStep("PruneContainers") + r.clusterStateFeeder.LoadRealTimeMetrics() timer.ObserveStep("LoadMetrics") klog.V(3).InfoS("ClusterState is tracking", "pods", len(r.clusterState.Pods), "vpas", len(r.clusterState.Vpas)) From 4f16402adf30276d4532c0c445c86ea495d363fd Mon Sep 17 00:00:00 2001 From: John Kyros Date: Tue, 16 Apr 2024 16:57:54 -0500 Subject: [PATCH 02/26] more logs --- .../pkg/recommender/input/cluster_feeder.go | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/vertical-pod-autoscaler/pkg/recommender/input/cluster_feeder.go b/vertical-pod-autoscaler/pkg/recommender/input/cluster_feeder.go index fde7a61304e..75156d8fbd8 100644 --- a/vertical-pod-autoscaler/pkg/recommender/input/cluster_feeder.go +++ b/vertical-pod-autoscaler/pkg/recommender/input/cluster_feeder.go @@ -429,21 +429,25 @@ func (feeder *clusterStateFeeder) LoadPods() { } func (feeder *clusterStateFeeder) PruneContainers() { + klog.Infof("Pruning containers") // Find all the containers that are legitimately in pods - containersInPods := make(map[string]*model.ContainerState) + containersInPods := make(map[string]string) for _, pod := range feeder.clusterState.Pods { - for containerID, container := range pod.Containers { - containersInPods[containerID] = container + for containerID, _ := range pod.Containers { + containersInPods[containerID] = pod.ID.PodName + klog.Infof("--> found container %s", containerID) } } // Go through the VPAs for _, vpa := range feeder.clusterState.Vpas { + klog.Infof("--> inspecting vpa %s", vpa.ID.VpaName) // Look at the aggregates for container := range vpa.AggregateContainerStates() { + klog.Infof("--> Checking aggregate container %s", container.ContainerName) // If the aggregate isn't in the pod according to the state, remove it - if _, ok := containersInPods[container.ContainerName()]; !ok { + if podName, ok := containersInPods[container.ContainerName()]; !ok { klog.Infof("Deleting container %s, not present in any pods", container.ContainerName()) vpa.DeleteAggregation(container) // If this container is also in the initlal state, say, from a checkpoint, remove it from there too @@ -451,6 +455,8 @@ func (feeder *clusterStateFeeder) PruneContainers() { klog.Infof("Also removing container %s, from initial aggregate state", container.ContainerName()) delete(vpa.ContainersInitialAggregateState, container.ContainerName()) } + } else { + klog.Infof("--> Container %s is in pod %s", container.ContainerName(), podName) } } From 58fb87a371309f65a5ef7705f084b2416a6d1c2a Mon Sep 17 00:00:00 2001 From: John Kyros Date: Tue, 16 Apr 2024 17:05:33 -0500 Subject: [PATCH 03/26] use the initial as the key --- .../pkg/recommender/input/cluster_feeder.go | 25 +++++++++++++------ 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/vertical-pod-autoscaler/pkg/recommender/input/cluster_feeder.go b/vertical-pod-autoscaler/pkg/recommender/input/cluster_feeder.go index 75156d8fbd8..fb1ce1d1b88 100644 --- a/vertical-pod-autoscaler/pkg/recommender/input/cluster_feeder.go +++ b/vertical-pod-autoscaler/pkg/recommender/input/cluster_feeder.go @@ -444,21 +444,30 @@ func (feeder *clusterStateFeeder) PruneContainers() { for _, vpa := range feeder.clusterState.Vpas { klog.Infof("--> inspecting vpa %s", vpa.ID.VpaName) // Look at the aggregates + + // TODO(jkyros): The bad aggregate IS NOT in the raw aggregate container list for container := range vpa.AggregateContainerStates() { - klog.Infof("--> Checking aggregate container %s", container.ContainerName) + klog.Infof("--> Checking aggregate container %s", container.ContainerName()) // If the aggregate isn't in the pod according to the state, remove it if podName, ok := containersInPods[container.ContainerName()]; !ok { - klog.Infof("Deleting container %s, not present in any pods", container.ContainerName()) + klog.Infof("Deleting Aggregate container %s, not present in any pods", container.ContainerName()) vpa.DeleteAggregation(container) - // If this container is also in the initlal state, say, from a checkpoint, remove it from there too - if _, ok := vpa.ContainersInitialAggregateState[container.ContainerName()]; ok { - klog.Infof("Also removing container %s, from initial aggregate state", container.ContainerName()) - delete(vpa.ContainersInitialAggregateState, container.ContainerName()) - } + } else { - klog.Infof("--> Container %s is in pod %s", container.ContainerName(), podName) + klog.Infof("--> Aggregate Container %s is in pod %s", container.ContainerName(), podName) } + } + for container := range vpa.ContainersInitialAggregateState { + klog.Infof("--> Checking Initial Aggregate container %s", container) + // If the aggregate isn't in the pod according to the state, remove it + if podName, ok := containersInPods[container]; !ok { + klog.Infof("Deleting Initial Aggregate container %s, not present in any pods", container) + delete(vpa.ContainersInitialAggregateState, container) + + } else { + klog.Infof("--> Initial Aggregate Container %s is in pod %s", container, podName) + } } } } From 533d487f35646a50695f450796a1dfff1babfc05 Mon Sep 17 00:00:00 2001 From: John Kyros Date: Tue, 16 Apr 2024 17:25:36 -0500 Subject: [PATCH 04/26] easy mode? --- .../pkg/recommender/input/cluster_feeder.go | 26 +++++++++++++++++++ .../pkg/recommender/model/vpa.go | 7 +++++ 2 files changed, 33 insertions(+) diff --git a/vertical-pod-autoscaler/pkg/recommender/input/cluster_feeder.go b/vertical-pod-autoscaler/pkg/recommender/input/cluster_feeder.go index fb1ce1d1b88..4faa17ac32f 100644 --- a/vertical-pod-autoscaler/pkg/recommender/input/cluster_feeder.go +++ b/vertical-pod-autoscaler/pkg/recommender/input/cluster_feeder.go @@ -428,7 +428,33 @@ func (feeder *clusterStateFeeder) LoadPods() { } } +// PruneContainers prunes any containers from the intial aggregate states +// that are no longer present in the aggregate states. This is important +// for cases where a container has been renamed or removed, as otherwise +// the VPA will split the recommended resources across containers that +// are no longer present, resulting in the containers that are still +// present being under-resourced func (feeder *clusterStateFeeder) PruneContainers() { + + var keysPruned int + // Look through all of our VPAs + for _, vpa := range feeder.clusterState.Vpas { + aggregates := vpa.AggregateStateByContainerNameWithoutCheckpoints() + // Check each initial state to see if it's still "real" + for container := range vpa.ContainersInitialAggregateState { + if _, ok := aggregates[container]; !ok { + delete(vpa.ContainersInitialAggregateState, container) + keysPruned = keysPruned + 1 + + } + } + } + if keysPruned > 0 { + klog.Infof("Pruned %d stale initial aggregate container keys", keysPruned) + } +} + +func (feeder *clusterStateFeeder) PruneContainers2() { klog.Infof("Pruning containers") // Find all the containers that are legitimately in pods diff --git a/vertical-pod-autoscaler/pkg/recommender/model/vpa.go b/vertical-pod-autoscaler/pkg/recommender/model/vpa.go index f9f9fc5fbee..99580f188bb 100644 --- a/vertical-pod-autoscaler/pkg/recommender/model/vpa.go +++ b/vertical-pod-autoscaler/pkg/recommender/model/vpa.go @@ -209,6 +209,13 @@ func (vpa *Vpa) AggregateStateByContainerName() ContainerNameToAggregateStateMap return containerNameToAggregateStateMap } +// AggregateStateByContainerNameWithoutCheckpoints returns a map from container name to the aggregated state +// of all containers with that name, belonging to pods matched by the VPA, omitting any checkpoints. +func (vpa *Vpa) AggregateStateByContainerNameWithoutCheckpoints() ContainerNameToAggregateStateMap { + containerNameToAggregateStateMap := AggregateStateByContainerName(vpa.aggregateContainerStates) + return containerNameToAggregateStateMap +} + // AggregateContainerStates returns the underlying internal aggregate state map. func (vpa *Vpa) AggregateContainerStates() aggregateContainerStatesMap { return vpa.aggregateContainerStates From 7105b208ffe01cbf99493e5f22da5d6e05cd57de Mon Sep 17 00:00:00 2001 From: John Kyros Date: Tue, 16 Apr 2024 18:14:58 -0500 Subject: [PATCH 05/26] back the other way --- .../pkg/recommender/input/cluster_feeder.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/vertical-pod-autoscaler/pkg/recommender/input/cluster_feeder.go b/vertical-pod-autoscaler/pkg/recommender/input/cluster_feeder.go index 4faa17ac32f..1d7387c5e14 100644 --- a/vertical-pod-autoscaler/pkg/recommender/input/cluster_feeder.go +++ b/vertical-pod-autoscaler/pkg/recommender/input/cluster_feeder.go @@ -434,11 +434,12 @@ func (feeder *clusterStateFeeder) LoadPods() { // the VPA will split the recommended resources across containers that // are no longer present, resulting in the containers that are still // present being under-resourced -func (feeder *clusterStateFeeder) PruneContainers() { +func (feeder *clusterStateFeeder) PruneContainers2() { var keysPruned int // Look through all of our VPAs for _, vpa := range feeder.clusterState.Vpas { + // TODO(jkyros): maybe vpa.PruneInitialAggregateContainerStates() ? aggregates := vpa.AggregateStateByContainerNameWithoutCheckpoints() // Check each initial state to see if it's still "real" for container := range vpa.ContainersInitialAggregateState { @@ -454,7 +455,7 @@ func (feeder *clusterStateFeeder) PruneContainers() { } } -func (feeder *clusterStateFeeder) PruneContainers2() { +func (feeder *clusterStateFeeder) PruneContainers() { klog.Infof("Pruning containers") // Find all the containers that are legitimately in pods From b6d5eddc0ef262f44c788b7328ad76d087972029 Mon Sep 17 00:00:00 2001 From: John Kyros Date: Wed, 17 Apr 2024 14:05:34 -0500 Subject: [PATCH 06/26] change gc timescale --- .../pkg/recommender/input/cluster_feeder.go | 42 +++++++++---------- .../pkg/recommender/main.go | 2 +- 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/vertical-pod-autoscaler/pkg/recommender/input/cluster_feeder.go b/vertical-pod-autoscaler/pkg/recommender/input/cluster_feeder.go index 1d7387c5e14..1ece304d766 100644 --- a/vertical-pod-autoscaler/pkg/recommender/input/cluster_feeder.go +++ b/vertical-pod-autoscaler/pkg/recommender/input/cluster_feeder.go @@ -455,48 +455,48 @@ func (feeder *clusterStateFeeder) PruneContainers2() { } } +// PruneContainers removes any containers from the aggregates and initial aggregates that are no longer +// present in the feeder's clusterState. Without this, we would be averaging our resource calculations +// over containers that no longer exist, and continuing to update checkpoints that should be left to expire. func (feeder *clusterStateFeeder) PruneContainers() { - klog.Infof("Pruning containers") - // Find all the containers that are legitimately in pods + // Find all the containers that are still legitimately in pods containersInPods := make(map[string]string) for _, pod := range feeder.clusterState.Pods { for containerID, _ := range pod.Containers { containersInPods[containerID] = pod.ID.PodName - klog.Infof("--> found container %s", containerID) } } - // Go through the VPAs + var aggregatesPruned int + var initialAggregatesPruned int for _, vpa := range feeder.clusterState.Vpas { - klog.Infof("--> inspecting vpa %s", vpa.ID.VpaName) // Look at the aggregates - - // TODO(jkyros): The bad aggregate IS NOT in the raw aggregate container list for container := range vpa.AggregateContainerStates() { - klog.Infof("--> Checking aggregate container %s", container.ContainerName()) - // If the aggregate isn't in the pod according to the state, remove it - if podName, ok := containersInPods[container.ContainerName()]; !ok { - klog.Infof("Deleting Aggregate container %s, not present in any pods", container.ContainerName()) + // If the container being aggregated isn't in a pod anymore according to the state, remove it + if _, ok := containersInPods[container.ContainerName()]; !ok { + klog.V(4).Infof("Deleting Aggregate container %s, not present in any pods", container.ContainerName()) vpa.DeleteAggregation(container) - - } else { - klog.Infof("--> Aggregate Container %s is in pod %s", container.ContainerName(), podName) + aggregatesPruned = aggregatesPruned + 1 } + } + // Also remove it from the initial aggregates. This is done separately from the normal aggregates because it + // could be in this list, but not that list and vice versa for container := range vpa.ContainersInitialAggregateState { - klog.Infof("--> Checking Initial Aggregate container %s", container) - - // If the aggregate isn't in the pod according to the state, remove it - if podName, ok := containersInPods[container]; !ok { - klog.Infof("Deleting Initial Aggregate container %s, not present in any pods", container) + if _, ok := containersInPods[container]; !ok { + klog.V(4).Infof("Deleting Initial Aggregate container %s, not present in any pods", container) delete(vpa.ContainersInitialAggregateState, container) + initialAggregatesPruned = initialAggregatesPruned + 1 - } else { - klog.Infof("--> Initial Aggregate Container %s is in pod %s", container, podName) } } } + // Only log if we did something + if initialAggregatesPruned > 0 || aggregatesPruned > 0 { + klog.Infof("Pruned %d aggregate and %d initial aggregate containers", aggregatesPruned, initialAggregatesPruned) + } + } func (feeder *clusterStateFeeder) LoadRealTimeMetrics() { diff --git a/vertical-pod-autoscaler/pkg/recommender/main.go b/vertical-pod-autoscaler/pkg/recommender/main.go index 8a53137f1e5..af4f2d25fec 100644 --- a/vertical-pod-autoscaler/pkg/recommender/main.go +++ b/vertical-pod-autoscaler/pkg/recommender/main.go @@ -107,7 +107,7 @@ var ( const ( // aggregateContainerStateGCInterval defines how often expired AggregateContainerStates are garbage collected. - aggregateContainerStateGCInterval = 1 * time.Hour + aggregateContainerStateGCInterval = 5 * time.Minute scaleCacheEntryLifetime time.Duration = time.Hour scaleCacheEntryFreshnessTime time.Duration = 10 * time.Minute scaleCacheEntryJitterFactor float64 = 1. From 45652feae0a032d341e488e7c9d456e7582e82a1 Mon Sep 17 00:00:00 2001 From: John Kyros Date: Wed, 17 Apr 2024 14:24:44 -0500 Subject: [PATCH 07/26] cleanup the checkpoint too --- .../pkg/recommender/input/cluster_feeder.go | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/vertical-pod-autoscaler/pkg/recommender/input/cluster_feeder.go b/vertical-pod-autoscaler/pkg/recommender/input/cluster_feeder.go index 1ece304d766..9c1d049e914 100644 --- a/vertical-pod-autoscaler/pkg/recommender/input/cluster_feeder.go +++ b/vertical-pod-autoscaler/pkg/recommender/input/cluster_feeder.go @@ -293,7 +293,7 @@ func (feeder *clusterStateFeeder) GarbageCollectCheckpoints() { } for _, checkpoint := range checkpointList.Items { vpaID := model.VpaID{Namespace: checkpoint.Namespace, VpaName: checkpoint.Spec.VPAObjectName} - _, exists := feeder.clusterState.Vpas[vpaID] + vpa, exists := feeder.clusterState.Vpas[vpaID] if !exists { err = feeder.vpaCheckpointClient.VerticalPodAutoscalerCheckpoints(namespace).Delete(context.TODO(), checkpoint.Name, metav1.DeleteOptions{}) if err == nil { @@ -302,6 +302,19 @@ func (feeder *clusterStateFeeder) GarbageCollectCheckpoints() { klog.ErrorS(err, "Orphaned VPA checkpoint cleanup - error deleting", "checkpoint", klog.KRef(namespace, checkpoint.Name)) } } + // Also clean up a checkpoint if the VPA is still there, but the container is gone + // TODO(jkyros): could we also just wait until it got "old" enough, e.g. the checkpoint hasn't + // been updated for an hour, blow it a away? + _, aggregateExists := vpa.AggregateStateByContainerName()[checkpoint.Spec.ContainerName] + if !aggregateExists { + err = feeder.vpaCheckpointClient.VerticalPodAutoscalerCheckpoints(namespace).Delete(context.TODO(), checkpoint.Name, metav1.DeleteOptions{}) + if err == nil { + klog.V(3).Infof("Orphaned VPA checkpoint cleanup - deleting %v/%v.", namespace, checkpoint.Name) + } else { + klog.Errorf("Cannot delete VPA checkpoint %v/%v. Reason: %+v", namespace, checkpoint.Name, err) + } + } + } } } From ff6ba64f51f42ff8bcbe164156347c924bace0f2 Mon Sep 17 00:00:00 2001 From: John Kyros Date: Wed, 17 Apr 2024 15:56:11 -0500 Subject: [PATCH 08/26] try to handle the container count issue --- .../hack/e2e/vpa-rbac.yaml | 442 ++++++++++++++++++ .../pkg/recommender/input/cluster_feeder.go | 6 +- .../pkg/recommender/logic/recommender.go | 9 +- .../pkg/recommender/logic/recommender_test.go | 8 +- .../pkg/recommender/model/cluster.go | 24 + .../pkg/recommender/model/vpa.go | 5 + .../pkg/recommender/routines/recommender.go | 2 +- 7 files changed, 486 insertions(+), 10 deletions(-) create mode 100644 vertical-pod-autoscaler/hack/e2e/vpa-rbac.yaml diff --git a/vertical-pod-autoscaler/hack/e2e/vpa-rbac.yaml b/vertical-pod-autoscaler/hack/e2e/vpa-rbac.yaml new file mode 100644 index 00000000000..883c12ca58a --- /dev/null +++ b/vertical-pod-autoscaler/hack/e2e/vpa-rbac.yaml @@ -0,0 +1,442 @@ +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: system:metrics-reader +rules: + - apiGroups: + - "external.metrics.k8s.io" + resources: + - "*" + verbs: + - get + - list + - apiGroups: + - "metrics.k8s.io" + resources: + - pods + verbs: + - get + - list +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: system:vpa-actor +rules: + - apiGroups: + - "" + resources: + - pods + - nodes + - limitranges + verbs: + - get + - list + - watch + - apiGroups: + - "" + resources: + - events + verbs: + - get + - list + - watch + - create + - apiGroups: + - "poc.autoscaling.k8s.io" + resources: + - verticalpodautoscalers + verbs: + - get + - list + - watch + - apiGroups: + - "autoscaling.k8s.io" + resources: + - verticalpodautoscalers + verbs: + - get + - list + - watch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: system:vpa-status-actor +rules: + - apiGroups: + - "autoscaling.k8s.io" + resources: + - verticalpodautoscalers/status + verbs: + - get + - patch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: system:vpa-checkpoint-actor +rules: + - apiGroups: + - "poc.autoscaling.k8s.io" + resources: + - verticalpodautoscalercheckpoints + verbs: + - get + - list + - watch + - create + - patch + - delete + - apiGroups: + - "autoscaling.k8s.io" + resources: + - verticalpodautoscalercheckpoints + verbs: + - get + - list + - watch + - create + - patch + - delete + - apiGroups: + - "" + resources: + - namespaces + verbs: + - get + - list +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: system:evictioner +rules: + - apiGroups: + - "apps" + - "extensions" + resources: + - replicasets + verbs: + - get + - apiGroups: + - "" + resources: + - pods/eviction + verbs: + - create +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: system:metrics-reader +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: system:metrics-reader +subjects: + - kind: ServiceAccount + name: vpa-recommender + namespace: kube-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: system:vpa-actor +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: system:vpa-actor +subjects: + - kind: ServiceAccount + name: vpa-recommender + namespace: kube-system + - kind: ServiceAccount + name: vpa-updater + namespace: kube-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: system:vpa-status-actor +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: system:vpa-status-actor +subjects: + - kind: ServiceAccount + name: vpa-recommender + namespace: kube-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: system:vpa-checkpoint-actor +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: system:vpa-checkpoint-actor +subjects: + - kind: ServiceAccount + name: vpa-recommender + namespace: kube-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: system:vpa-target-reader +rules: + - apiGroups: + - '*' + resources: + - '*/scale' + verbs: + - get + - watch + - apiGroups: + - "" + resources: + - replicationcontrollers + verbs: + - get + - list + - watch + - apiGroups: + - apps + resources: + - daemonsets + - deployments + - replicasets + - statefulsets + verbs: + - get + - list + - watch + - apiGroups: + - batch + resources: + - jobs + - cronjobs + verbs: + - get + - list + - watch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: system:vpa-target-reader-binding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: system:vpa-target-reader +subjects: + - kind: ServiceAccount + name: vpa-recommender + namespace: kube-system + - kind: ServiceAccount + name: vpa-admission-controller + namespace: kube-system + - kind: ServiceAccount + name: vpa-updater + namespace: kube-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: system:vpa-evictioner-binding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: system:evictioner +subjects: + - kind: ServiceAccount + name: vpa-updater + namespace: kube-system +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: vpa-admission-controller + namespace: kube-system +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: vpa-recommender + namespace: kube-system +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: vpa-updater + namespace: kube-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: system:vpa-admission-controller +rules: + - apiGroups: + - "" + resources: + - pods + - configmaps + - nodes + - limitranges + verbs: + - get + - list + - watch + - apiGroups: + - "admissionregistration.k8s.io" + resources: + - mutatingwebhookconfigurations + verbs: + - create + - delete + - get + - list + - apiGroups: + - "poc.autoscaling.k8s.io" + resources: + - verticalpodautoscalers + verbs: + - get + - list + - watch + - apiGroups: + - "autoscaling.k8s.io" + resources: + - verticalpodautoscalers + verbs: + - get + - list + - watch + - apiGroups: + - "coordination.k8s.io" + resources: + - leases + verbs: + - create + - update + - get + - list + - watch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: system:vpa-admission-controller +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: system:vpa-admission-controller +subjects: + - kind: ServiceAccount + name: vpa-admission-controller + namespace: kube-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: system:vpa-status-reader +rules: + - apiGroups: + - "coordination.k8s.io" + resources: + - leases + verbs: + - get + - list + - watch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: system:vpa-status-reader-binding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: system:vpa-status-reader +subjects: + - kind: ServiceAccount + name: vpa-updater + namespace: kube-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: system:leader-locking-vpa-updater + namespace: kube-system +rules: + - apiGroups: + - "coordination.k8s.io" + resources: + - leases + verbs: + - create + - apiGroups: + - "coordination.k8s.io" + resourceNames: + - vpa-updater + resources: + - leases + verbs: + - get + - watch + - update +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: system:leader-locking-vpa-updater + namespace: kube-system +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: system:leader-locking-vpa-updater +subjects: + - kind: ServiceAccount + name: vpa-updater + namespace: kube-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: system:leader-locking-vpa-recommender + namespace: kube-system +rules: + - apiGroups: + - "coordination.k8s.io" + resources: + - leases + verbs: + - create + - apiGroups: + - "coordination.k8s.io" + resourceNames: + # TODO: Clean vpa-recommender up once vpa-recommender-lease is used everywhere. See https://github.com/kubernetes/autoscaler/issues/7461. + - vpa-recommender + - vpa-recommender-lease + resources: + - leases + verbs: + - get + - watch + - update +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: system:leader-locking-vpa-recommender + namespace: kube-system +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: system:leader-locking-vpa-recommender +subjects: + - kind: ServiceAccount + name: vpa-recommender + namespace: kube-system diff --git a/vertical-pod-autoscaler/pkg/recommender/input/cluster_feeder.go b/vertical-pod-autoscaler/pkg/recommender/input/cluster_feeder.go index 9c1d049e914..afa181b3264 100644 --- a/vertical-pod-autoscaler/pkg/recommender/input/cluster_feeder.go +++ b/vertical-pod-autoscaler/pkg/recommender/input/cluster_feeder.go @@ -302,9 +302,11 @@ func (feeder *clusterStateFeeder) GarbageCollectCheckpoints() { klog.ErrorS(err, "Orphaned VPA checkpoint cleanup - error deleting", "checkpoint", klog.KRef(namespace, checkpoint.Name)) } } - // Also clean up a checkpoint if the VPA is still there, but the container is gone + // Also clean up a checkpoint if the VPA is still there, but the container is gone. AggregateStateByContainerName + // merges in the initial aggregates so we can use it to check "both lists" (initial, aggregates) at once // TODO(jkyros): could we also just wait until it got "old" enough, e.g. the checkpoint hasn't - // been updated for an hour, blow it a away? + // been updated for an hour, blow it a away? Because once we remove it from the aggregate lists, it will stop + // being maintained. _, aggregateExists := vpa.AggregateStateByContainerName()[checkpoint.Spec.ContainerName] if !aggregateExists { err = feeder.vpaCheckpointClient.VerticalPodAutoscalerCheckpoints(namespace).Delete(context.TODO(), checkpoint.Name, metav1.DeleteOptions{}) diff --git a/vertical-pod-autoscaler/pkg/recommender/logic/recommender.go b/vertical-pod-autoscaler/pkg/recommender/logic/recommender.go index 8aaf9cbd5e9..b613f7a8b8d 100644 --- a/vertical-pod-autoscaler/pkg/recommender/logic/recommender.go +++ b/vertical-pod-autoscaler/pkg/recommender/logic/recommender.go @@ -22,6 +22,7 @@ import ( vpa_types "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1" "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/recommender/model" + "k8s.io/klog/v2" ) var ( @@ -39,7 +40,7 @@ var ( // PodResourceRecommender computes resource recommendation for a Vpa object. type PodResourceRecommender interface { - GetRecommendedPodResources(containerNameToAggregateStateMap model.ContainerNameToAggregateStateMap) RecommendedPodResources + GetRecommendedPodResources(containerNameToAggregateStateMap model.ContainerNameToAggregateStateMap, containersPerPod int) RecommendedPodResources } // RecommendedPodResources is a Map from container name to recommended resources. @@ -65,13 +66,15 @@ type podResourceRecommender struct { upperBoundMemory MemoryEstimator } -func (r *podResourceRecommender) GetRecommendedPodResources(containerNameToAggregateStateMap model.ContainerNameToAggregateStateMap) RecommendedPodResources { +func (r *podResourceRecommender) GetRecommendedPodResources(containerNameToAggregateStateMap model.ContainerNameToAggregateStateMap, containersPerPod int) RecommendedPodResources { var recommendation = make(RecommendedPodResources) if len(containerNameToAggregateStateMap) == 0 { return recommendation } - fraction := 1.0 / float64(len(containerNameToAggregateStateMap)) + // TODO(jkyros): This right here is factoring in containers that don't exist anymore + fraction := 1.0 / float64(containersPerPod) + klog.Infof("Spreading recommendation across %d containers (fraction %f)", containersPerPod, fraction) minCPU := model.ScaleResource(model.CPUAmountFromCores(*podMinCPUMillicores*0.001), fraction) minMemory := model.ScaleResource(model.MemoryAmountFromBytes(*podMinMemoryMb*1024*1024), fraction) diff --git a/vertical-pod-autoscaler/pkg/recommender/logic/recommender_test.go b/vertical-pod-autoscaler/pkg/recommender/logic/recommender_test.go index 5824745a461..54f70af7bb8 100644 --- a/vertical-pod-autoscaler/pkg/recommender/logic/recommender_test.go +++ b/vertical-pod-autoscaler/pkg/recommender/logic/recommender_test.go @@ -40,7 +40,7 @@ func TestMinResourcesApplied(t *testing.T) { "container-1": &model.AggregateContainerState{}, } - recommendedResources := recommender.GetRecommendedPodResources(containerNameToAggregateStateMap) + recommendedResources := recommender.GetRecommendedPodResources(containerNameToAggregateStateMap, len(containerNameToAggregateStateMap)) assert.Equal(t, model.CPUAmountFromCores(*podMinCPUMillicores/1000), recommendedResources["container-1"].Target[model.ResourceCPU]) assert.Equal(t, model.MemoryAmountFromBytes(*podMinMemoryMb*1024*1024), recommendedResources["container-1"].Target[model.ResourceMemory]) } @@ -63,7 +63,7 @@ func TestMinResourcesSplitAcrossContainers(t *testing.T) { "container-2": &model.AggregateContainerState{}, } - recommendedResources := recommender.GetRecommendedPodResources(containerNameToAggregateStateMap) + recommendedResources := recommender.GetRecommendedPodResources(containerNameToAggregateStateMap, len(containerNameToAggregateStateMap)) assert.Equal(t, model.CPUAmountFromCores((*podMinCPUMillicores/1000)/2), recommendedResources["container-1"].Target[model.ResourceCPU]) assert.Equal(t, model.CPUAmountFromCores((*podMinCPUMillicores/1000)/2), recommendedResources["container-2"].Target[model.ResourceCPU]) assert.Equal(t, model.MemoryAmountFromBytes((*podMinMemoryMb*1024*1024)/2), recommendedResources["container-1"].Target[model.ResourceMemory]) @@ -90,7 +90,7 @@ func TestControlledResourcesFiltered(t *testing.T) { }, } - recommendedResources := recommender.GetRecommendedPodResources(containerNameToAggregateStateMap) + recommendedResources := recommender.GetRecommendedPodResources(containerNameToAggregateStateMap, len(containerNameToAggregateStateMap)) assert.Contains(t, recommendedResources[containerName].Target, model.ResourceMemory) assert.Contains(t, recommendedResources[containerName].LowerBound, model.ResourceMemory) assert.Contains(t, recommendedResources[containerName].UpperBound, model.ResourceMemory) @@ -119,7 +119,7 @@ func TestControlledResourcesFilteredDefault(t *testing.T) { }, } - recommendedResources := recommender.GetRecommendedPodResources(containerNameToAggregateStateMap) + recommendedResources := recommender.GetRecommendedPodResources(containerNameToAggregateStateMap, len(containerNameToAggregateStateMap)) assert.Contains(t, recommendedResources[containerName].Target, model.ResourceMemory) assert.Contains(t, recommendedResources[containerName].LowerBound, model.ResourceMemory) assert.Contains(t, recommendedResources[containerName].UpperBound, model.ResourceMemory) diff --git a/vertical-pod-autoscaler/pkg/recommender/model/cluster.go b/vertical-pod-autoscaler/pkg/recommender/model/cluster.go index 8fd2fd8f030..6e6183b403f 100644 --- a/vertical-pod-autoscaler/pkg/recommender/model/cluster.go +++ b/vertical-pod-autoscaler/pkg/recommender/model/cluster.go @@ -144,9 +144,28 @@ func (cluster *ClusterState) AddOrUpdatePod(podID PodID, newLabels labels.Set, p cluster.addPodToItsVpa(pod) } + // Tally the number of containers for later when we're averaging the recommendations + cluster.setVPAContainersPerPod(pod) pod.Phase = phase } +// setVPAContainersPerPod sets the number of containers per pod seen for pods connected to this VPA +// so that later when were spreading the minimum recommendations we're spreading them over the correct +// number and not just the number of all aggregates that have *ever* been in the pod. (We don't want minimum resources +// to erroneously shrink) +func (cluster *ClusterState) setVPAContainersPerPod(pod *PodState) { + for _, vpa := range cluster.Vpas { + if vpa_utils.PodLabelsMatchVPA(pod.ID.Namespace, cluster.labelSetMap[pod.labelSetKey], vpa.ID.Namespace, vpa.PodSelector) { + // We want the "high water mark" of the most containers in the pod in the event + // that we're rolling out a pod that has an additional container + if len(pod.Containers) > vpa.ContainersPerPod { + vpa.ContainersPerPod = len(pod.Containers) + } + } + } + +} + // addPodToItsVpa increases the count of Pods associated with a VPA object. // Does a scan similar to findOrCreateAggregateContainerState so could be optimized if needed. func (cluster *ClusterState) addPodToItsVpa(pod *PodState) { @@ -275,6 +294,11 @@ func (cluster *ClusterState) AddOrUpdateVpa(apiObject *vpa_types.VerticalPodAuto } vpa.PodCount = len(cluster.GetMatchingPods(vpa)) } + + // Default this to the minimum, we will tally the true number when we load the pods later + // TODO(jkyros): This is gross, it depends on the order I know it currently loads things in, but + // that might not be the case someday + vpa.ContainersPerPod = 1 vpa.TargetRef = apiObject.Spec.TargetRef vpa.Annotations = annotationsMap vpa.Conditions = conditionsMap diff --git a/vertical-pod-autoscaler/pkg/recommender/model/vpa.go b/vertical-pod-autoscaler/pkg/recommender/model/vpa.go index 99580f188bb..600453f4594 100644 --- a/vertical-pod-autoscaler/pkg/recommender/model/vpa.go +++ b/vertical-pod-autoscaler/pkg/recommender/model/vpa.go @@ -110,6 +110,11 @@ type Vpa struct { TargetRef *autoscaling.CrossVersionObjectReference // PodCount contains number of live Pods matching a given VPA object. PodCount int + // ContainersPerPod contains the "high water mark" of the number of containers + // per pod to average the recommandation across. Used to make sure we aren't + // "fractionalizing" minResources erroneously during a redeploy when when a pod's + // container is removed or renamed + ContainersPerPod int } // NewVpa returns a new Vpa with a given ID and pod selector. Doesn't set the diff --git a/vertical-pod-autoscaler/pkg/recommender/routines/recommender.go b/vertical-pod-autoscaler/pkg/recommender/routines/recommender.go index 202142c456f..3b92bdbcb05 100644 --- a/vertical-pod-autoscaler/pkg/recommender/routines/recommender.go +++ b/vertical-pod-autoscaler/pkg/recommender/routines/recommender.go @@ -91,7 +91,7 @@ func (r *recommender) UpdateVPAs() { if !found { continue } - resources := r.podResourceRecommender.GetRecommendedPodResources(GetContainerNameToAggregateStateMap(vpa)) + resources := r.podResourceRecommender.GetRecommendedPodResources(GetContainerNameToAggregateStateMap(vpa), vpa.ContainersPerPod) had := vpa.HasRecommendation() listOfResourceRecommendation := logic.MapToListOfRecommendedContainerResources(resources) From 9486cca04779c1d4ec41ab5420b215c08a22e021 Mon Sep 17 00:00:00 2001 From: John Kyros Date: Thu, 14 Mar 2024 01:56:44 -0500 Subject: [PATCH 09/26] VPA: Add UpdateModeInPlaceOrRecreate to types This just addes the UpdateModeInPlaceOrRecreate mode to the types so we can use it. I did not add InPlaceOnly, as that seemed contentious and it didn't seem like we had a good use case for it yet. --- .../pkg/apis/autoscaling.k8s.io/v1/types.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1/types.go b/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1/types.go index 5ede0fe4810..47255520294 100644 --- a/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1/types.go +++ b/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1/types.go @@ -152,7 +152,7 @@ type PodUpdatePolicy struct { } // UpdateMode controls when autoscaler applies changes to the pod resources. -// +kubebuilder:validation:Enum=Off;Initial;Recreate;Auto +// +kubebuilder:validation:Enum=Off;Initial;Recreate;InPlaceOrRecreate;Auto type UpdateMode string const ( @@ -172,6 +172,10 @@ const ( // using any available update method. Currently this is equivalent to // Recreate, which is the only available update method. UpdateModeAuto UpdateMode = "Auto" + // UpdateModeInPlaceOrRecreate means that autoscaler tries to assign resources in-place + // first, and if it cannot ( resize takes too long or is Infeasible ) it falls back to the + // "Recreate" update mode. + UpdateModeInPlaceOrRecreate UpdateMode = "InPlaceOrRecreate" ) // PodResourcePolicy controls how autoscaler computes the recommended resources From e27433b7dc519dec01e8beb24710d6279d9d6681 Mon Sep 17 00:00:00 2001 From: John Kyros Date: Thu, 14 Mar 2024 00:20:34 -0500 Subject: [PATCH 10/26] VPA: Allow admission-controller to update in-place With the InPlacePodVerticalScaling feature, we can now update the resource requests and limits of pods in-place rather than having to evict the pod to rewrite it. We do have to make sure though ( because apps could react badly to an update or require container restarts ) that we limit the amount of disruption we can introduce at once, so we limit our updates only to the ones that the updater has okayed. (And then, over in the updater we're going to meter them so they don't get sent to the admission-controller all at once) This commit: - allows the admission-controller to monitor update operations - adds the new UpdateOrRecreate update mode to the list of possible update modes - makes sure the admission-controller only patches pod update requests that were triggered by the updater (by using a special annotation) - makes sure the admission-controller removes the annotation upon patching to signify that it is done --- .../pkg/admission-controller/config.go | 3 ++- .../resource/pod/handler.go | 17 +++++++++++++++++ .../resource/pod/patch/util.go | 8 ++++++++ .../resource/vpa/handler.go | 9 +++++---- 4 files changed, 32 insertions(+), 5 deletions(-) diff --git a/vertical-pod-autoscaler/pkg/admission-controller/config.go b/vertical-pod-autoscaler/pkg/admission-controller/config.go index e0d8ac0805f..c73490d592e 100644 --- a/vertical-pod-autoscaler/pkg/admission-controller/config.go +++ b/vertical-pod-autoscaler/pkg/admission-controller/config.go @@ -157,7 +157,8 @@ func selfRegistration(clientset kubernetes.Interface, caCert []byte, webHookDela AdmissionReviewVersions: []string{"v1"}, Rules: []admissionregistration.RuleWithOperations{ { - Operations: []admissionregistration.OperationType{admissionregistration.Create}, + // we're watching for updates now also because of in-place VPA, but we only patch if the update was triggered by the updater + Operations: []admissionregistration.OperationType{admissionregistration.Create, admissionregistration.Update}, Rule: admissionregistration.Rule{ APIGroups: []string{""}, APIVersions: []string{"v1"}, diff --git a/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/handler.go b/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/handler.go index 7d8e704f743..b0415eabf6d 100644 --- a/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/handler.go +++ b/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/handler.go @@ -77,6 +77,16 @@ func (h *resourceHandler) GetPatches(ctx context.Context, ar *admissionv1.Admiss pod.Name = pod.GenerateName + "%" pod.Namespace = namespace } + + // We're watching for updates now also because of in-place VPA, but we only want to act on the ones + // that were triggered by the updater so we don't violate disruption quotas + if ar.Operation == admissionv1.Update { + // The patcher will remove this annotation, it's just the signal that the updater okayed this + if _, ok := pod.Annotations["autoscaling.k8s.io/resize"]; !ok { + return nil, nil + } + } + klog.V(4).InfoS("Admitting pod", "pod", klog.KObj(&pod)) controllingVpa := h.vpaMatcher.GetMatchingVPA(ctx, &pod) if controllingVpa == nil { @@ -92,6 +102,13 @@ func (h *resourceHandler) GetPatches(ctx context.Context, ar *admissionv1.Admiss if pod.Annotations == nil { patches = append(patches, patch.GetAddEmptyAnnotationsPatch()) } + // TODO(jkyros): this is weird here, work it out with the above block somehow, but + // we need to have this annotation patched out + if ar.Operation == admissionv1.Update { + // TODO(jkyros): the squiggle is escaping the / in the annotation + patches = append(patches, patch.GetRemoveAnnotationPatch("autoscaling.k8s.io~1resize")) + } + for _, c := range h.patchCalculators { partialPatches, err := c.CalculatePatches(&pod, controllingVpa) if err != nil { diff --git a/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/patch/util.go b/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/patch/util.go index 7bdea7988ab..7955a6ae54f 100644 --- a/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/patch/util.go +++ b/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/patch/util.go @@ -39,3 +39,11 @@ func GetAddAnnotationPatch(annotationName, annotationValue string) resource_admi Value: annotationValue, } } + +// GetRemoveAnnotationPatch returns a patch that removes the specified annotation. +func GetRemoveAnnotationPatch(annotationName string) resource_admission.PatchRecord { + return resource_admission.PatchRecord{ + Op: "remove", + Path: fmt.Sprintf("/metadata/annotations/%s", annotationName), + } +} diff --git a/vertical-pod-autoscaler/pkg/admission-controller/resource/vpa/handler.go b/vertical-pod-autoscaler/pkg/admission-controller/resource/vpa/handler.go index f2f2a29c4d3..f7387ba0feb 100644 --- a/vertical-pod-autoscaler/pkg/admission-controller/resource/vpa/handler.go +++ b/vertical-pod-autoscaler/pkg/admission-controller/resource/vpa/handler.go @@ -34,10 +34,11 @@ import ( var ( possibleUpdateModes = map[vpa_types.UpdateMode]interface{}{ - vpa_types.UpdateModeOff: struct{}{}, - vpa_types.UpdateModeInitial: struct{}{}, - vpa_types.UpdateModeRecreate: struct{}{}, - vpa_types.UpdateModeAuto: struct{}{}, + vpa_types.UpdateModeOff: struct{}{}, + vpa_types.UpdateModeInitial: struct{}{}, + vpa_types.UpdateModeRecreate: struct{}{}, + vpa_types.UpdateModeAuto: struct{}{}, + vpa_types.UpdateModeInPlaceOrRecreate: struct{}{}, } possibleScalingModes = map[vpa_types.ContainerScalingMode]interface{}{ From 57c2ec576993f8bbbabb2e3203f98e194f891fd8 Mon Sep 17 00:00:00 2001 From: John Kyros Date: Thu, 14 Mar 2024 00:30:30 -0500 Subject: [PATCH 11/26] VPA: Stuck in-place resizes still require eviction So because of InPlacePodVerticalScaling, we can have a pod object whose resource spec is correct, but whose status is not, because that pod may have been updated in-place after the original admission. This would have been ignored until now because "the spec looks correct", but we need to take the status into account as well if a resize is in progress. This commit: - takes status resources into account for pods/containers that are being in-place resized - makes sure that any pods that are "stuck" in-place updating (i.e. the node doesn't have enough resources either temporarily or permanently) will still show up in the list as having "wrong" resources so they can still get queued for eviction and be re-assigned to nodes that do have enough resources --- .../updater/priority/priority_processor.go | 35 ++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/vertical-pod-autoscaler/pkg/updater/priority/priority_processor.go b/vertical-pod-autoscaler/pkg/updater/priority/priority_processor.go index 53c07fa7163..de9cc267f4f 100644 --- a/vertical-pod-autoscaler/pkg/updater/priority/priority_processor.go +++ b/vertical-pod-autoscaler/pkg/updater/priority/priority_processor.go @@ -51,7 +51,7 @@ func (*defaultPriorityProcessor) GetUpdatePriority(pod *apiv1.Pod, vpa *vpa_type hasObservedContainers, vpaContainerSet := parseVpaObservedContainers(pod) - for _, podContainer := range pod.Spec.Containers { + for num, podContainer := range pod.Spec.Containers { if hasObservedContainers && !vpaContainerSet.Has(podContainer.Name) { klog.V(4).InfoS("Not listed in VPA observed containers label. Skipping container priority calculations", "label", annotations.VpaObservedContainersLabel, "observedContainers", pod.GetAnnotations()[annotations.VpaObservedContainersLabel], "containerName", podContainer.Name, "vpa", klog.KObj(vpa)) continue @@ -73,6 +73,35 @@ func (*defaultPriorityProcessor) GetUpdatePriority(pod *apiv1.Pod, vpa *vpa_type (hasUpperBound && request.Cmp(upperBound) > 0) { outsideRecommendedRange = true } + + // TODO(jkyros): I think we're picking up early zeroes here from the VPA when it has no recommendation, I think that's why I have to wait + // for the recommendation later before I try to scale in-place + // TODO(jkyros): For in place VPA, this might be gross, but we need this pod to be in the eviction list because it doesn't actually have + // the resources it asked for even if the spec is right, and we might need to fall back to evicting it + // TODO(jkyros): Can we have empty container status at this point for real? It's at least failing the tests if we don't check, but + // we could just populate the status in the tests + // Statuses can be missing, or status resources can be nil + if len(pod.Status.ContainerStatuses) > num && pod.Status.ContainerStatuses[num].Resources != nil { + if statusRequest, hasStatusRequest := pod.Status.ContainerStatuses[num].Resources.Requests[resourceName]; hasStatusRequest { + // If we're updating, but we still don't have what we asked for, we may still need to act on this pod + if request.MilliValue() > statusRequest.MilliValue() { + scaleUp = true + // It's okay if we're actually still resizing, but if we can't now or we're stuck, make sure the pod + // is still in the list so we can evict it to go live on a fatter node or something + if pod.Status.Resize == apiv1.PodResizeStatusDeferred || pod.Status.Resize == apiv1.PodResizeStatusInfeasible { + klog.V(4).Infof("Pod %s looks like it's stuck scaling up (%v state), leaving it in for eviction", pod.Name, pod.Status.Resize) + } else { + klog.V(4).Infof("Pod %s is in the process of scaling up (%v state), leaving it in so we can see if it's taking too long", pod.Name, pod.Status.Resize) + } + } + // I guess if it's not outside of compliance, it's probably okay it's stuck here? + if (hasLowerBound && statusRequest.Cmp(lowerBound) < 0) || + (hasUpperBound && statusRequest.Cmp(upperBound) > 0) { + outsideRecommendedRange = true + } + } + } + } else { // Note: if the request is not specified, the container will use the // namespace default request. Currently we ignore it and treat such @@ -83,6 +112,10 @@ func (*defaultPriorityProcessor) GetUpdatePriority(pod *apiv1.Pod, vpa *vpa_type } } } + + // TODO(jkyros): hmm this gets hairy here because if "status" is what let us into the list, + // we probably need to do these calculations vs the status rather than the spec, because the + // spec is a "lie" resourceDiff := 0.0 for resource, totalRecommended := range totalRecommendedPerResource { totalRequest := math.Max(float64(totalRequestPerResource[resource]), 1.0) From 46cb6e8beb2859fcc644d242a8838ab1dc511608 Mon Sep 17 00:00:00 2001 From: John Kyros Date: Thu, 14 Mar 2024 00:39:03 -0500 Subject: [PATCH 12/26] VPA: Make eviction restriction in-place aware This commit makes the eviction restrictor in-place update aware. While this possibly could be a separate restrictor or refactored into a shared "disruption restrictor", I chose not to do that at this time. I don't think eviction/in-place update can be completely separate as they can both cause disruption (albeit in-place less so) -- they both need to factor in the total disruption -- so I just hacked the in-place update functions into the existing evictor and added some additional counters for disruption tracking. While we have the pod lifecycle to look at to figure out "where we are" in eviction, we don't have that luxury with in-place, so that's why we need the additional "IsInPlaceUpdating" helper. --- .../eviction/pods_eviction_restriction.go | 192 +++++++++++++++++- 1 file changed, 190 insertions(+), 2 deletions(-) diff --git a/vertical-pod-autoscaler/pkg/updater/eviction/pods_eviction_restriction.go b/vertical-pod-autoscaler/pkg/updater/eviction/pods_eviction_restriction.go index e79f79e54c5..56dc0449452 100644 --- a/vertical-pod-autoscaler/pkg/updater/eviction/pods_eviction_restriction.go +++ b/vertical-pod-autoscaler/pkg/updater/eviction/pods_eviction_restriction.go @@ -47,6 +47,11 @@ type PodsEvictionRestriction interface { Evict(pod *apiv1.Pod, vpa *vpa_types.VerticalPodAutoscaler, eventRecorder record.EventRecorder) error // CanEvict checks if pod can be safely evicted CanEvict(pod *apiv1.Pod) bool + + // InPlaceUpdate updates the pod resources in-place + InPlaceUpdate(pod *apiv1.Pod, eventRecorder record.EventRecorder) error + // CanEvict checks if pod can be safely evicted + CanInPlaceUpdate(pod *apiv1.Pod) bool } type podsEvictionRestrictionImpl struct { @@ -61,6 +66,7 @@ type singleGroupStats struct { running int evictionTolerance int evicted int + inPlaceUpdating int } // PodsEvictionRestrictionFactory creates PodsEvictionRestriction @@ -105,16 +111,36 @@ func (e *podsEvictionRestrictionImpl) CanEvict(pod *apiv1.Pod) bool { return true } if present { + shouldBeAlive := singleGroupStats.configured - singleGroupStats.evictionTolerance - if singleGroupStats.running-singleGroupStats.evicted > shouldBeAlive { + // TODO(jkyros): Come back and think through this better, but for now, take in-place updates into account because + // they might cause disruption. We assume pods will not be both in-place updated and evicted in the same pass, but + // we need eviction to take the numbers into account so we don't violate our disruption dolerances. + // If we're already resizing this pod, don't do anything to it, unless we failed to resize it, then we want to evict it. + if IsInPlaceUpdating(pod) { + klog.V(4).Infof("pod %s disruption tolerance: %d config: %d tolerance: %d evicted: %d updating: %d", pod.Name, singleGroupStats.running, singleGroupStats.configured, singleGroupStats.evictionTolerance, singleGroupStats.evicted, singleGroupStats.inPlaceUpdating) + if singleGroupStats.running-(singleGroupStats.evicted+(singleGroupStats.inPlaceUpdating-1)) > shouldBeAlive { + klog.V(4).Infof("Would be able to evict, but already resizing %s", pod.Name) + + if pod.Status.Resize == apiv1.PodResizeStatusInfeasible || pod.Status.Resize == apiv1.PodResizeStatusDeferred { + klog.Warningf("Attempted in-place resize of %s impossible, should now evict", pod.Name) + return true + } + } + return false + } + + if singleGroupStats.running-(singleGroupStats.evicted+singleGroupStats.inPlaceUpdating) > shouldBeAlive { return true } // If all pods are running and eviction tolerance is small evict 1 pod. if singleGroupStats.running == singleGroupStats.configured && singleGroupStats.evictionTolerance == 0 && - singleGroupStats.evicted == 0 { + singleGroupStats.evicted == 0 && + singleGroupStats.inPlaceUpdating == 0 { return true } + } } return false @@ -250,9 +276,16 @@ func (f *podsEvictionRestrictionFactoryImpl) NewPodsEvictionRestriction(pods []* if pod.Status.Phase == apiv1.PodPending { singleGroup.pending = singleGroup.pending + 1 } + if IsInPlaceUpdating(pod) { + singleGroup.inPlaceUpdating = singleGroup.inPlaceUpdating + 1 + + } } singleGroup.running = len(replicas) - singleGroup.pending + + // This has to happen last, singlegroup never gets returned, only this does creatorToSingleGroupStatsMap[creator] = singleGroup + } return &podsEvictionRestrictionImpl{ client: f.client, @@ -391,3 +424,158 @@ func setUpInformer(kubeClient kube_client.Interface, kind controllerKind) (cache } return informer, nil } + +// CanInPlaceUpdate performs the same checks +func (e *podsEvictionRestrictionImpl) CanInPlaceUpdate(pod *apiv1.Pod) bool { + + cr, present := e.podToReplicaCreatorMap[getPodID(pod)] + // TODO(jkyros): why is present checked twice? + if present { + + // If our QoS class is guaranteed, we can't change the resources without a restart + if pod.Status.QOSClass == apiv1.PodQOSGuaranteed { + klog.Warning("Can't resize %s in-place, pod QoS is %s", pod.Status.QOSClass) + return false + } + + // If we're already resizing this pod, don't do it again + if IsInPlaceUpdating(pod) { + klog.Warning("Not resizing %s, already resizing %s", pod.Name) + return false + } + + // TODO(jkyros): is there a pod-level thing we can use? + // Go through each container, and check to see if this is going to cause a disruption or not + noRestartPoliciesPopulated := true + + for _, container := range pod.Spec.Containers { + // If some of these are populated, we know it at least understands resizing + if len(container.ResizePolicy) > 0 { + noRestartPoliciesPopulated = false + } + + for _, policy := range container.ResizePolicy { + if policy.RestartPolicy != apiv1.NotRequired { + klog.Warningf("in-place resize of %s will cause container disruption, container %s restart policy is %v", pod.Name, container.Name, policy.RestartPolicy) + // TODO(jkyros): is there something that prevents this from happening elsewhere in the API? + if pod.Spec.RestartPolicy == apiv1.RestartPolicyNever { + klog.Warningf("in-place resize of %s not possible, container %s resize policy is %v but pod restartPolicy is %v", pod.Name, container.Name, policy.RestartPolicy, pod.Spec.RestartPolicy) + return false + } + + } + } + } + + // If none of the policies are populated, our feature is probably not enabled, so we can't in-place regardless + if noRestartPoliciesPopulated { + klog.Warning("impossible to resize %s in-place, container resize policies are not populated", pod.Name) + } + + //TODO(jkyros): Come back and handle sidecar containers at some point since they're weird? + singleGroupStats, present := e.creatorToSingleGroupStatsMap[cr] + // If we're pending, we can't in-place resize + // TODO(jkyros): are we sure we can't? Should I just set this to "if running"? + if pod.Status.Phase == apiv1.PodPending { + klog.V(4).Infof("Can't resize pending pod %s", pod.Name) + return false + } + // This second "present" check is against the crator-to-group-stats map, not the pod-to-replica map + if present { + klog.V(4).Infof("pod %s disruption tolerance run: %d config: %d tolerance: %d evicted: %d updating: %d", pod.Name, singleGroupStats.running, singleGroupStats.configured, singleGroupStats.evictionTolerance, singleGroupStats.evicted, singleGroupStats.inPlaceUpdating) + shouldBeAlive := singleGroupStats.configured - singleGroupStats.evictionTolerance + if singleGroupStats.running-(singleGroupStats.evicted+singleGroupStats.inPlaceUpdating) > shouldBeAlive { + klog.V(4).Infof("Should be alive: %d, Actually alive: %d", shouldBeAlive, singleGroupStats.running-(singleGroupStats.evicted+singleGroupStats.inPlaceUpdating)) + return true + } + // If all pods are running and eviction tolerance is small update 1 pod. + if singleGroupStats.running == singleGroupStats.configured && + singleGroupStats.evictionTolerance == 0 && + singleGroupStats.evicted == 0 && singleGroupStats.inPlaceUpdating == 0 { + klog.V(4).Infof("--> we are in good shape on %s, it is tolerant", pod.Name) + return true + } + } + } + return false +} + +// InPlaceUpdate sends eviction instruction to api client. Returns error if pod cannot be in-place updated or if client returned error +// Does not check if pod was actually in-place updated after grace period. +func (e *podsEvictionRestrictionImpl) InPlaceUpdate(podToUpdate *apiv1.Pod, eventRecorder record.EventRecorder) error { + cr, present := e.podToReplicaCreatorMap[getPodID(podToUpdate)] + if !present { + return fmt.Errorf("pod not suitable for eviction %v : not in replicated pods map", podToUpdate.Name) + } + + if !e.CanInPlaceUpdate(podToUpdate) { + return fmt.Errorf("cannot update pod %v in place : number of in-flight updates exceeded", podToUpdate.Name) + } + + // TODO(jkyros): for now I'm just going to annotate the pod + + // Modify the pod with the "hey please inplace update me" annotation + // We'll have the admission controller update the limits like it does + // today, and then remove the annotation with the patch + modifiedPod := podToUpdate.DeepCopy() + if modifiedPod.Annotations == nil { + modifiedPod.Annotations = make(map[string]string) + } + modifiedPod.Annotations["autoscaling.k8s.io/resize"] = "true" + + // Give the update to the APIserver + _, err := e.client.CoreV1().Pods(podToUpdate.Namespace).Update(context.TODO(), modifiedPod, metav1.UpdateOptions{}) + if err != nil { + klog.Errorf("failed to update pod %s/%s, error: %v", podToUpdate.Namespace, podToUpdate.Name, err) + return err + } + eventRecorder.Event(podToUpdate, apiv1.EventTypeNormal, "MarkedByVPA", + "Pod was marked by VPA Updater to be updated in-place.") + + // TODO(jkyros): You need to do this regardless once you update the pod, if it changes phases here as a result, you still + // need to catalog what you did + if podToUpdate.Status.Phase == apiv1.PodRunning { + singleGroupStats, present := e.creatorToSingleGroupStatsMap[cr] + if !present { + return fmt.Errorf("Internal error - cannot find stats for replication group %v", cr) + } + singleGroupStats.inPlaceUpdating = singleGroupStats.inPlaceUpdating + 1 + e.creatorToSingleGroupStatsMap[cr] = singleGroupStats + } else { + klog.Warningf("I updated, but my pod phase was %s", podToUpdate.Status.Phase) + } + + return nil +} + +// IsInPlaceUpdating checks whether or not the given pod is currently in the middle of an in-place update +func IsInPlaceUpdating(podToCheck *apiv1.Pod) (isUpdating bool) { + // If the pod is currently updating we need to tally that + if podToCheck.Status.Resize != "" { + klog.V(4).Infof("Resize of %s is in %s phase", podToCheck.Name, podToCheck.Status.Resize) + // Proposed -> Deferred -> InProgress, but what about Infeasible? + if podToCheck.Status.Resize == apiv1.PodResizeStatusInfeasible { + klog.V(4).Infof("Resource propopsal for %s is %v, we're probably stuck like this until we evict", podToCheck.Status.Resize) + } else if podToCheck.Status.Resize == apiv1.PodResizeStatusDeferred { + klog.V(4).Infof("Resource propopsal for %s is %v, our resize can't be satisfied by our Node right now", podToCheck.Status.Resize) + } + return true + } + + // If any of the container resources don't match their spec, it's...updating but the lifecycle hasn't kicked in yet? So we + // also need to mark that? + /* + for num, container := range podToCheck.Spec.Containers { + // TODO(jkyros): supported resources only? + // Resources can be nil, especially if the feature gate isn't on + if podToCheck.Status.ContainerStatuses[num].Resources != nil { + + if !reflect.DeepEqual(container.Resources, *podToCheck.Status.ContainerStatuses[num].Resources) { + klog.V(4).Infof("Resize must be in progress for %s, resources for container %s don't match", podToCheck.Name, container.Name) + return true + } + } + }*/ + return false + +} From 385826a87500ba4ff0e000d560b25775a5b3186a Mon Sep 17 00:00:00 2001 From: John Kyros Date: Thu, 14 Mar 2024 01:22:37 -0500 Subject: [PATCH 13/26] VPA: Updater logic allows in-place scaling The updater logic wasn't in-place aware, so I tried to make it so. The thought here is that we try to in-place update if we can, if we can't or if it gets stuck/can't satisfy the recommendation, then we fall back to eviction. I tried to keep the "blast radius" small by stuffing the in-place logic in its own function and then falling back to eviction if it's not possible. It would be nice if we had some sort of "can the node support an in-place resize with the current recommendation" but that seemed like a whole other can of worms and math. --- .../pkg/updater/logic/updater.go | 138 +++++++++++++++++- 1 file changed, 132 insertions(+), 6 deletions(-) diff --git a/vertical-pod-autoscaler/pkg/updater/logic/updater.go b/vertical-pod-autoscaler/pkg/updater/logic/updater.go index 5d4a300af57..39e21f82687 100644 --- a/vertical-pod-autoscaler/pkg/updater/logic/updater.go +++ b/vertical-pod-autoscaler/pkg/updater/logic/updater.go @@ -147,8 +147,8 @@ func (u *updater) RunOnce(ctx context.Context) { continue } if vpa_api_util.GetUpdateMode(vpa) != vpa_types.UpdateModeRecreate && - vpa_api_util.GetUpdateMode(vpa) != vpa_types.UpdateModeAuto { - klog.V(3).InfoS("Skipping VPA object because its mode is not \"Recreate\" or \"Auto\"", "vpa", klog.KObj(vpa)) + vpa_api_util.GetUpdateMode(vpa) != vpa_types.UpdateModeAuto && vpa_api_util.GetUpdateMode(vpa) != vpa_types.UpdateModeInPlaceOrRecreate { + klog.V(3).InfoS("Skipping VPA object because its mode is not \"UpdateOrRecreate\", \"Recreate\" or \"Auto\"", "vpa", klog.KObj(vpa)) continue } selector, err := u.selectorFetcher.Fetch(ctx, vpa) @@ -199,11 +199,17 @@ func (u *updater) RunOnce(ctx context.Context) { vpasWithEvictablePodsCounter := metrics_updater.NewVpasWithEvictablePodsCounter() vpasWithEvictedPodsCounter := metrics_updater.NewVpasWithEvictedPodsCounter() + vpasWithInPlaceUpdatablePodsCounter := metrics_updater.NewVpasWithInPlaceUpdtateablePodsCounter() + vpasWithInPlaceUpdatedPodsCounter := metrics_updater.NewVpasWithInPlaceUpdtatedPodsCounter() + // using defer to protect against 'return' after evictionRateLimiter.Wait defer controlledPodsCounter.Observe() defer evictablePodsCounter.Observe() defer vpasWithEvictablePodsCounter.Observe() defer vpasWithEvictedPodsCounter.Observe() + // separate counters for in-place + defer vpasWithInPlaceUpdatablePodsCounter.Observe() + defer vpasWithInPlaceUpdatedPodsCounter.Observe() // NOTE: this loop assumes that controlledPods are filtered // to contain only Pods controlled by a VPA in auto or recreate mode @@ -211,17 +217,40 @@ func (u *updater) RunOnce(ctx context.Context) { vpaSize := len(livePods) controlledPodsCounter.Add(vpaSize, vpaSize) evictionLimiter := u.evictionFactory.NewPodsEvictionRestriction(livePods, vpa) + // TODO(jkyros): I need to know the priority details here so I can use them to determine what we want to do to the pod + // previously it was just "evict" but now we have to make decisions, so we need to know podsForUpdate := u.getPodsUpdateOrder(filterNonEvictablePods(livePods, evictionLimiter), vpa) evictablePodsCounter.Add(vpaSize, len(podsForUpdate)) withEvictable := false withEvicted := false - for _, pod := range podsForUpdate { + + for _, prioritizedPod := range podsForUpdate { + + pod := prioritizedPod.Pod() + + // TODO(jkyros): Not ideal, but try to corral the mess from in-place VPA :) + fallBackToEviction, err := u.AttemptInPlaceScalingIfPossible(ctx, vpaSize, vpa, pod, evictionLimiter, vpasWithInPlaceUpdatablePodsCounter, vpasWithInPlaceUpdatedPodsCounter) + if err != nil { + klog.Warningf("error attemptng to scale pod %v in-place: %v", pod.Name, err) + return + } + // If in-place scaling was possible, and it isn't stuck, then skip eviction + if fallBackToEviction { + // TODO(jkyros): this needs to be cleaner, but we absolutely need to make sure a disruptionless update doesn't "sneak through" + if prioritizedPod.IsDisruptionless() { + klog.Infof("Not falling back to eviction, %v was supposed to be disruptionless", pod.Name) + continue + } + } else { + continue + } + withEvictable = true if !evictionLimiter.CanEvict(pod) { continue } - err := u.evictionRateLimiter.Wait(ctx) + err = u.evictionRateLimiter.Wait(ctx) if err != nil { klog.V(0).InfoS("Eviction rate limiter wait failed", "error", err) return @@ -246,6 +275,20 @@ func (u *updater) RunOnce(ctx context.Context) { timer.ObserveStep("EvictPods") } +// VpaReommendationProvided checks the VPA status to see if it has provided a recommendation yet. Used +// to make sure we don't get bogus values for in-place scaling +// TODO(jkyros): take this out when you find the proper place to gate this +func VpaReommendationProvided(vpa *vpa_types.VerticalPodAutoscaler) bool { + for _, condition := range vpa.Status.Conditions { + + if condition.Type == vpa_types.RecommendationProvided && condition.Status == apiv1.ConditionTrue { + return true + } + } + return false + +} + func getRateLimiter(evictionRateLimit float64, evictionRateLimitBurst int) *rate.Limiter { var evictionRateLimiter *rate.Limiter if evictionRateLimit <= 0 { @@ -260,7 +303,7 @@ func getRateLimiter(evictionRateLimit float64, evictionRateLimitBurst int) *rate } // getPodsUpdateOrder returns list of pods that should be updated ordered by update priority -func (u *updater) getPodsUpdateOrder(pods []*apiv1.Pod, vpa *vpa_types.VerticalPodAutoscaler) []*apiv1.Pod { +func (u *updater) getPodsUpdateOrder(pods []*apiv1.Pod, vpa *vpa_types.VerticalPodAutoscaler) []*priority.PrioritizedPod { priorityCalculator := priority.NewUpdatePriorityCalculator( vpa, nil, @@ -271,7 +314,7 @@ func (u *updater) getPodsUpdateOrder(pods []*apiv1.Pod, vpa *vpa_types.VerticalP priorityCalculator.AddPod(pod, time.Now()) } - return priorityCalculator.GetSortedPods(u.evictionAdmission) + return priorityCalculator.GetSortedPrioritizedPods(u.evictionAdmission) } func filterNonEvictablePods(pods []*apiv1.Pod, evictionRestriction eviction.PodsEvictionRestriction) []*apiv1.Pod { @@ -323,3 +366,86 @@ func newEventRecorder(kubeClient kube_client.Interface) record.EventRecorder { return eventBroadcaster.NewRecorder(vpascheme, apiv1.EventSource{Component: "vpa-updater"}) } + +func (u *updater) AttemptInPlaceScalingIfPossible(ctx context.Context, vpaSize int, vpa *vpa_types.VerticalPodAutoscaler, pod *apiv1.Pod, evictionLimiter eviction.PodsEvictionRestriction, vpasWithInPlaceUpdatablePodsCounter *metrics_updater.SizeBasedGauge, vpasWithInPlaceUpdatedPodsCounter *metrics_updater.SizeBasedGauge) (fallBackToEviction bool, err error) { + // TODO(jkyros): We're somehow jumping the gun here, I'm not sure if it's a race condition or what but evictions + // don't hit it (maybe they take too long?). We end up with 0's for resource recommendations because we + // queue this for in-place before the VPA has made a recommendation. + + if !VpaReommendationProvided(vpa) { + klog.V(4).Infof("VPA hasn't made a recommendation yet, we're early on %s for some reason", pod.Name) + // TODO(jkyros): so we must have had some erroneous evictions before, but we were passing the test suite? But for now if I want to test + // in-place I need it to not evict immediately if I can't in-place (because then it will never in-place) + fallBackToEviction = false + return + } + + if vpa_api_util.GetUpdateMode(vpa) == vpa_types.UpdateModeInPlaceOrRecreate || vpa_api_util.GetUpdateMode(vpa) == vpa_types.UpdateModeAuto { + + // separate counters/stats for in-place updates + withInPlaceUpdatable := false + withInPlaceUpdated := false + + // TODO(jkyros): I don't think this can happen, it gets removed immediately by admission if admitted + if _, ok := pod.Annotations["autoscaling.k8s.io/resize"]; ok { + klog.V(4).Infof("Pod is %s already marked for resize, ignoring for now", pod.Name) + return + } + + klog.V(4).Infof("Looks like we might be able to in-place update %s..", pod.Name) + withInPlaceUpdatable = true + // If I can't update + if !evictionLimiter.CanInPlaceUpdate(pod) { + // But it's not because we're updating already... + if !eviction.IsInPlaceUpdating(pod) { + klog.V(4).Infof("Can't in-place update pod %s, falling back to eviction, it might say no", pod.Name) + fallBackToEviction = true + return + + } + if pod.Status.Resize != apiv1.PodResizeStatusDeferred && pod.Status.Resize != apiv1.PodResizeStatusInfeasible { + klog.V(4).Infof("In-place update in progress for %s, not falling back to eviction", pod.Name) + fallBackToEviction = false + return + } + klog.V(4).Infof("In-place update looks stuck for %s, falling back to eviction", pod.Name) + fallBackToEviction = true + return + + } + + // TODO(jkyros): need our own rate limiter or can we freeload off the eviction one? + err = u.evictionRateLimiter.Wait(ctx) + if err != nil { + // TODO(jkyros): whether or not we fall back to eviction here probably depends on *why* we failed + klog.Warningf("updating pod %v failed: %v", pod.Name, err) + return + } + + klog.V(2).Infof("updating pod %v", pod.Name) + evictErr := evictionLimiter.InPlaceUpdate(pod, u.eventRecorder) + if evictErr != nil { + klog.Warningf("updating pod %v failed: %v", pod.Name, evictErr) + } else { + // TODO(jkyros): come back later for stats + withInPlaceUpdated = false + metrics_updater.AddInPlaceUpdatedPod(vpaSize) + } + + if withInPlaceUpdatable { + vpasWithInPlaceUpdatablePodsCounter.Add(vpaSize, 1) + } + if withInPlaceUpdated { + vpasWithInPlaceUpdatedPodsCounter.Add(vpaSize, 1) + } + + } else { + // If our update mode doesn't support in-place, then evict + fallBackToEviction = true + return + } + + // counters for in-place update + + return +} From c2ba47234ef1e105d60413f278d78b05d6864364 Mon Sep 17 00:00:00 2001 From: John Kyros Date: Thu, 14 Mar 2024 01:32:37 -0500 Subject: [PATCH 14/26] VPA: Add metrics gauges for in-place updates We might want to add a few more that are combined disruption counters, e.g. in-place + eviction totals, but for now just add some separate counters to keep track of what in-place updates are doing. --- .../pkg/utils/metrics/updater/updater.go | 53 +++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/vertical-pod-autoscaler/pkg/utils/metrics/updater/updater.go b/vertical-pod-autoscaler/pkg/utils/metrics/updater/updater.go index 9a068b735ef..a3034350da7 100644 --- a/vertical-pod-autoscaler/pkg/utils/metrics/updater/updater.go +++ b/vertical-pod-autoscaler/pkg/utils/metrics/updater/updater.go @@ -75,6 +75,38 @@ var ( }, []string{"vpa_size_log2"}, ) + inPlaceUpdatableCount = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: metricsNamespace, + Name: "in_place_Updatable_pods_total", + Help: "Number of Pods matching in place update criteria.", + }, []string{"vpa_size_log2"}, + ) + + inPlaceUpdatedCount = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: metricsNamespace, + Name: "in_place_updated_pods_total", + Help: "Number of Pods updated in-place by Updater to apply a new recommendation.", + }, []string{"vpa_size_log2"}, + ) + + vpasWithInPlaceUpdatablePodsCount = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: metricsNamespace, + Name: "vpas_with_in_place_Updatable_pods_total", + Help: "Number of VPA objects with at least one Pod matching in place update criteria.", + }, []string{"vpa_size_log2"}, + ) + + vpasWithInPlaceUpdatedPodsCount = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: metricsNamespace, + Name: "vpas_with_in_place_updated_pods_total", + Help: "Number of VPA objects with at least one in-place updated Pod.", + }, []string{"vpa_size_log2"}, + ) + functionLatency = metrics.CreateExecutionTimeMetric(metricsNamespace, "Time spent in various parts of VPA Updater main loop.") ) @@ -123,6 +155,27 @@ func AddEvictedPod(vpaSize int) { evictedCount.WithLabelValues(strconv.Itoa(log2)).Inc() } +// NewInPlaceUpdtateablePodsCounter returns a wrapper for counting Pods which are matching in-place update criteria +func NewInPlaceUpdtateablePodsCounter() *SizeBasedGauge { + return newSizeBasedGauge(evictableCount) +} + +// NewVpasWithInPlaceUpdtateablePodsCounter returns a wrapper for counting VPA objects with Pods matching in-place update criteria +func NewVpasWithInPlaceUpdtateablePodsCounter() *SizeBasedGauge { + return newSizeBasedGauge(vpasWithEvictablePodsCount) +} + +// NewVpasWithInPlaceUpdtatedPodsCounter returns a wrapper for counting VPA objects with evicted Pods +func NewVpasWithInPlaceUpdtatedPodsCounter() *SizeBasedGauge { + return newSizeBasedGauge(vpasWithEvictedPodsCount) +} + +// AddInPlaceUpdatedPod increases the counter of pods updated in place by Updater, by given VPA size +func AddInPlaceUpdatedPod(vpaSize int) { + log2 := metrics.GetVpaSizeLog2(vpaSize) + inPlaceUpdatedCount.WithLabelValues(strconv.Itoa(log2)).Inc() +} + // Add increases the counter for the given VPA size func (g *SizeBasedGauge) Add(vpaSize int, value int) { log2 := metrics.GetVpaSizeLog2(vpaSize) From 97c952e8f1386a5e41283a19859b9384b0f4ef37 Mon Sep 17 00:00:00 2001 From: John Kyros Date: Thu, 14 Mar 2024 01:34:18 -0500 Subject: [PATCH 15/26] VPA: Update mocks to accommodate in-place VPA changes For now, this just updates the mock with the new functions I added to the eviction interface. We need some in-place test cases. --- vertical-pod-autoscaler/pkg/utils/test/test_utils.go | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/vertical-pod-autoscaler/pkg/utils/test/test_utils.go b/vertical-pod-autoscaler/pkg/utils/test/test_utils.go index 257f613504f..5fbc79d2124 100644 --- a/vertical-pod-autoscaler/pkg/utils/test/test_utils.go +++ b/vertical-pod-autoscaler/pkg/utils/test/test_utils.go @@ -121,6 +121,18 @@ func (m *PodsEvictionRestrictionMock) CanEvict(pod *apiv1.Pod) bool { return args.Bool(0) } +// InPlaceUpdate is a mock implementation of PodsEvictionRestriction.InPlaceUpdate +func (m *PodsEvictionRestrictionMock) InPlaceUpdate(pod *apiv1.Pod, eventRecorder record.EventRecorder) error { + args := m.Called(pod, eventRecorder) + return args.Error(0) +} + +// CanInPlaceUpdate is a mock implementation of PodsEvictionRestriction.CanInPlaceUpdate +func (m *PodsEvictionRestrictionMock) CanInPlaceUpdate(pod *apiv1.Pod) bool { + args := m.Called(pod) + return args.Bool(0) +} + // PodListerMock is a mock of PodLister type PodListerMock struct { mock.Mock From e97d9eb75f3fef938e1bc211a167247ac2be9379 Mon Sep 17 00:00:00 2001 From: John Kyros Date: Thu, 14 Mar 2024 02:57:41 -0500 Subject: [PATCH 16/26] VPA: hack unit tests to account for in-place TODO(jkyros): come back here and look at this after you get it working --- vertical-pod-autoscaler/pkg/updater/logic/updater_test.go | 4 ++++ vertical-pod-autoscaler/pkg/utils/test/test_utils.go | 6 ++++++ 2 files changed, 10 insertions(+) diff --git a/vertical-pod-autoscaler/pkg/updater/logic/updater_test.go b/vertical-pod-autoscaler/pkg/updater/logic/updater_test.go index 64357d4924e..a38b935831e 100644 --- a/vertical-pod-autoscaler/pkg/updater/logic/updater_test.go +++ b/vertical-pod-autoscaler/pkg/updater/logic/updater_test.go @@ -159,6 +159,10 @@ func testRunOnceBase( Get() pods[i].Labels = labels + // We will test in-place separately, but we do need to account for these calls + eviction.On("CanInPlaceUpdate", pods[i]).Return(false) + eviction.On("IsInPlaceUpdating", pods[i]).Return(false) + eviction.On("CanEvict", pods[i]).Return(true) eviction.On("Evict", pods[i], nil).Return(nil) } diff --git a/vertical-pod-autoscaler/pkg/utils/test/test_utils.go b/vertical-pod-autoscaler/pkg/utils/test/test_utils.go index 5fbc79d2124..f96612013c5 100644 --- a/vertical-pod-autoscaler/pkg/utils/test/test_utils.go +++ b/vertical-pod-autoscaler/pkg/utils/test/test_utils.go @@ -133,6 +133,12 @@ func (m *PodsEvictionRestrictionMock) CanInPlaceUpdate(pod *apiv1.Pod) bool { return args.Bool(0) } +// IsInPlaceUpdating is a mock implementation of PodsEvictionRestriction.IsInPlaceUpdating +func (m *PodsEvictionRestrictionMock) IsInPlaceUpdating(pod *apiv1.Pod) bool { + args := m.Called(pod) + return args.Bool(0) +} + // PodListerMock is a mock of PodLister type PodListerMock struct { mock.Mock From 7fca72df6dcde20f38ebd02db4e54bccd5745049 Mon Sep 17 00:00:00 2001 From: John Kyros Date: Thu, 21 Mar 2024 00:22:38 -0500 Subject: [PATCH 17/26] VPA: add rbac for in-pace updates The updater now needs to be able to update pods. In the current approach, it's so it can add an annotation marking the pod as needing an in-place update. The admission controller is still doing the resource updating as part of patching ,the updater is not updating resources directly. I wonder if it should? --- vertical-pod-autoscaler/deploy/vpa-rbac.yaml | 25 +++++++++++++++++++ .../eviction/pods_eviction_restriction.go | 7 ++++++ 2 files changed, 32 insertions(+) diff --git a/vertical-pod-autoscaler/deploy/vpa-rbac.yaml b/vertical-pod-autoscaler/deploy/vpa-rbac.yaml index a41f468a738..fdc797c3371 100644 --- a/vertical-pod-autoscaler/deploy/vpa-rbac.yaml +++ b/vertical-pod-autoscaler/deploy/vpa-rbac.yaml @@ -121,6 +121,31 @@ rules: - create --- apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: system:vpa-updater-in-place +rules: + - apiGroups: + - "" + resources: + - pods + verbs: + - update +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: system:vpa-updater-in-place-binding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: system:vpa-updater-in-place +subjects: + - kind: ServiceAccount + name: vpa-updater + namespace: kube-system +--- +apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: name: system:metrics-reader diff --git a/vertical-pod-autoscaler/pkg/updater/eviction/pods_eviction_restriction.go b/vertical-pod-autoscaler/pkg/updater/eviction/pods_eviction_restriction.go index 56dc0449452..34539aaae1c 100644 --- a/vertical-pod-autoscaler/pkg/updater/eviction/pods_eviction_restriction.go +++ b/vertical-pod-autoscaler/pkg/updater/eviction/pods_eviction_restriction.go @@ -579,3 +579,10 @@ func IsInPlaceUpdating(podToCheck *apiv1.Pod) (isUpdating bool) { return false } + +/* +func BadResizeStatus(pod *apiv1.Pod) bool { + if pod.Status.Resize == apiv1.PodResizeStatusInfeasible{ + + } +}*/ From 8be8213abe4643a1f1adcecc6ae0b6c53fd6caaa Mon Sep 17 00:00:00 2001 From: John Kyros Date: Fri, 22 Mar 2024 22:33:59 -0500 Subject: [PATCH 18/26] VPA: Add e2e tests for in-place scaling So far this is just: - Make sure it scales when it can But we still need a bunch of other ones like - Test fallback to eviction - Test timeout/eviction when it gets stuck, etc --- vertical-pod-autoscaler/e2e/v1/common.go | 110 +++++++++++++++ vertical-pod-autoscaler/e2e/v1/updater.go | 125 ++++++++++++++++++ .../e2e/v1beta2/updater.go | 2 +- 3 files changed, 236 insertions(+), 1 deletion(-) diff --git a/vertical-pod-autoscaler/e2e/v1/common.go b/vertical-pod-autoscaler/e2e/v1/common.go index 6ce493c9a8b..70be30a2dd8 100644 --- a/vertical-pod-autoscaler/e2e/v1/common.go +++ b/vertical-pod-autoscaler/e2e/v1/common.go @@ -165,6 +165,15 @@ func NewHamsterDeploymentWithResources(f *framework.Framework, cpuQuantity, memo apiv1.ResourceCPU: cpuQuantity, apiv1.ResourceMemory: memoryQuantity, } + // TODO(jkyros): It seems to behave differently if we have limits? + /* + cpuQuantity.Add(resource.MustParse("100m")) + memoryQuantity.Add(resource.MustParse("100Mi")) + + d.Spec.Template.Spec.Containers[0].Resources.Limits = apiv1.ResourceList{ + apiv1.ResourceCPU: cpuQuantity, + apiv1.ResourceMemory: memoryQuantity, + }*/ return d } @@ -555,3 +564,104 @@ func InstallLimitRangeWithMin(f *framework.Framework, minCpuLimit, minMemoryLimi minMemoryLimitQuantity := ParseQuantityOrDie(minMemoryLimit) installLimitRange(f, &minCpuLimitQuantity, &minMemoryLimitQuantity, nil, nil, lrType) } + +// WaitForPodsUpdatedWithoutEviction waits for pods to be updated without any evictions taking place over the polling +// interval. +func WaitForPodsUpdatedWithoutEviction(f *framework.Framework, initialPods, podList *apiv1.PodList) error { + // TODO(jkyros): This needs to be: + // 1. Make sure we wait for each of the containers to get an update queued + // 2. Make sure each of the containers actually finish the update + // 3. Once everyone has gone through 1 cycle, we don't care anymore, we can move on (it will keep scaling obviously) + framework.Logf("waiting for update to start and resources to differ") + var resourcesHaveDiffered bool + err := wait.PollUntilContextTimeout(context.TODO(), pollInterval, pollTimeout, false, func(context.Context) (bool, error) { + // TODO(jkyros): make sure we don't update too many pods at once + podList, err := GetHamsterPods(f) + if err != nil { + return false, err + } + resourcesAreSynced := true + podMissing := false + // Go through the list of initial pods + for _, initialPod := range initialPods.Items { + found := false + // Go through the list of pods we have now + for _, pod := range podList.Items { + // If we still have our initial pod, good + if initialPod.Name == pod.Name { + found = true + + // Check to see if we have our container resources updated + for num, container := range pod.Spec.Containers { + // If our current spec differs from initial, we know we were told to update + if !resourcesHaveDiffered { + for resourceName, resourceLimit := range container.Resources.Limits { + initialResourceLimit := initialPod.Spec.Containers[num].Resources.Limits[resourceName] + if !initialResourceLimit.Equal(resourceLimit) { + framework.Logf("E: %s/%s: %s limit (%v) differs from initial (%v), change has started ", pod.Name, container.Name, resourceName, resourceLimit.String(), initialResourceLimit.String()) + //fmt.Printf("UPD: L:%s: %s/%s %v differs from initial %v\n", resourceName, pod.Name, container.Name, resourceLimit, pod.Status.ContainerStatuses[num].Resources.Limits[resourceName]) + resourcesHaveDiffered = true + + } + + } + for resourceName, resourceRequest := range container.Resources.Requests { + initialResourceRequest := initialPod.Spec.Containers[num].Resources.Requests[resourceName] + if !initialResourceRequest.Equal(resourceRequest) { + framework.Logf("%s/%s: %s request (%v) differs from initial (%v), change has started ", pod.Name, container.Name, resourceName, resourceRequest.String(), initialResourceRequest.String()) + resourcesHaveDiffered = true + + } + } + } + + if len(pod.Status.ContainerStatuses) > num { + if pod.Status.ContainerStatuses[num].Resources != nil { + for resourceName, resourceLimit := range container.Resources.Limits { + statusResourceLimit := pod.Status.ContainerStatuses[num].Resources.Limits[resourceName] + if !statusResourceLimit.Equal(resourceLimit) { + framework.Logf("%s/%s: %s limit status (%v) differs from limit spec (%v), still in progress", pod.Name, container.Name, resourceName, resourceLimit.String(), statusResourceLimit.String()) + + resourcesAreSynced = false + + } + + } + for resourceName, resourceRequest := range container.Resources.Requests { + statusResourceRequest := pod.Status.ContainerStatuses[num].Resources.Requests[resourceName] + if !pod.Status.ContainerStatuses[num].Resources.Requests[resourceName].Equal(resourceRequest) { + framework.Logf("%s/%s: %s request status (%v) differs from request spec(%v), still in progress ", pod.Name, container.Name, resourceName, resourceRequest.String(), statusResourceRequest.String()) + resourcesAreSynced = false + + } + } + + } else { + framework.Logf("SOMEHOW ITS EMPTY\n") + } + } + + } + } + + } + if !found { + //framework.Logf("pod %s was evicted and should not have been\n", initialPod.Name) + podMissing = true + } + + } + if podMissing { + return false, fmt.Errorf("a pod was erroneously evicted") + } + if len(podList.Items) > 0 && resourcesAreSynced { + if !resourcesHaveDiffered { + return false, nil + } + framework.Logf("after checking %d pods, were are in sync\n", len(podList.Items)) + return true, nil + } + return false, nil + }) + return err +} diff --git a/vertical-pod-autoscaler/e2e/v1/updater.go b/vertical-pod-autoscaler/e2e/v1/updater.go index ccf0f07ded1..aab743bfcaf 100644 --- a/vertical-pod-autoscaler/e2e/v1/updater.go +++ b/vertical-pod-autoscaler/e2e/v1/updater.go @@ -38,6 +38,88 @@ var _ = UpdaterE2eDescribe("Updater", func() { f := framework.NewDefaultFramework("vertical-pod-autoscaling") f.NamespacePodSecurityEnforceLevel = podsecurity.LevelBaseline + // TODO(jkyros): it should only evict here if the feature gate is off, so we need to + // check behavior by making sure it aligns with the feature gate. e.g. if it's on, then do this test, if it's not, then skip it + + // 1. check if we have resize policies, if we do, do the test with in-place + // 2. if we don't, then do it the old way + + ginkgo.It("In-place update pods when Admission Controller status available", func() { + const statusUpdateInterval = 10 * time.Second + + ginkgo.By("Setting up the Admission Controller status") + stopCh := make(chan struct{}) + statusUpdater := status.NewUpdater( + f.ClientSet, + status.AdmissionControllerStatusName, + status.AdmissionControllerStatusNamespace, + statusUpdateInterval, + "e2e test", + ) + defer func() { + // Schedule a cleanup of the Admission Controller status. + // Status is created outside the test namespace. + ginkgo.By("Deleting the Admission Controller status") + close(stopCh) + err := f.ClientSet.CoordinationV1().Leases(status.AdmissionControllerStatusNamespace). + Delete(context.TODO(), status.AdmissionControllerStatusName, metav1.DeleteOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }() + statusUpdater.Run(stopCh) + + podList := setupPodsForUpscalingInPlace(f) + if len(podList.Items[0].Spec.Containers[0].ResizePolicy) <= 0 { + // Feature is probably not working here + ginkgo.Skip("Skipping test, InPlacePodVerticalScaling not available") + } + + initialPods := podList.DeepCopy() + // 1. Take initial pod list + // 2. Loop through and compare all the resource values + // 3. When they change, it's good + + ginkgo.By("Waiting for pods to be in-place updated") + + //gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err := WaitForPodsUpdatedWithoutEviction(f, initialPods, podList) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("Does not evict pods for downscaling in-place", func() { + const statusUpdateInterval = 10 * time.Second + + ginkgo.By("Setting up the Admission Controller status") + stopCh := make(chan struct{}) + statusUpdater := status.NewUpdater( + f.ClientSet, + status.AdmissionControllerStatusName, + status.AdmissionControllerStatusNamespace, + statusUpdateInterval, + "e2e test", + ) + defer func() { + // Schedule a cleanup of the Admission Controller status. + // Status is created outside the test namespace. + ginkgo.By("Deleting the Admission Controller status") + close(stopCh) + err := f.ClientSet.CoordinationV1().Leases(status.AdmissionControllerStatusNamespace). + Delete(context.TODO(), status.AdmissionControllerStatusName, metav1.DeleteOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }() + statusUpdater.Run(stopCh) + + podList := setupPodsForDownscalingInPlace(f, nil) + if len(podList.Items[0].Spec.Containers[0].ResizePolicy) <= 0 { + // Feature is probably not working here + ginkgo.Skip("Skipping test, InPlacePodVerticalScaling not available") + } + initialPods := podList.DeepCopy() + + ginkgo.By("Waiting for pods to be in-place downscaled") + err := WaitForPodsUpdatedWithoutEviction(f, initialPods, podList) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + ginkgo.It("evicts pods when Admission Controller status available", func() { const statusUpdateInterval = 10 * time.Second @@ -165,6 +247,49 @@ func setupPodsForEviction(f *framework.Framework, hamsterCPU, hamsterMemory stri WithName("hamster-vpa"). WithNamespace(f.Namespace.Name). WithTargetRef(controller). + WithUpdateMode(vpa_types.UpdateModeRecreate). + WithEvictionRequirements(er). + WithContainer(containerName). + AppendRecommendation( + test.Recommendation(). + WithContainer(containerName). + WithTarget(containerName, "200m"). + WithLowerBound(containerName, "200m"). + WithUpperBound(containerName, "200m"). + GetContainerResources()). + Get() + + InstallVPA(f, vpaCRD) + + return podList +} + +func setupPodsForUpscalingInPlace(f *framework.Framework) *apiv1.PodList { + return setupPodsForInPlace(f, "100m", "100Mi", nil) +} + +func setupPodsForDownscalingInPlace(f *framework.Framework, er []*vpa_types.EvictionRequirement) *apiv1.PodList { + return setupPodsForInPlace(f, "500m", "500Mi", er) +} + +func setupPodsForInPlace(f *framework.Framework, hamsterCPU, hamsterMemory string, er []*vpa_types.EvictionRequirement) *apiv1.PodList { + controller := &autoscaling.CrossVersionObjectReference{ + APIVersion: "apps/v1", + Kind: "Deployment", + Name: "hamster-deployment", + } + ginkgo.By(fmt.Sprintf("Setting up a hamster %v", controller.Kind)) + setupHamsterController(f, controller.Kind, hamsterCPU, hamsterMemory, defaultHamsterReplicas) + podList, err := GetHamsterPods(f) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + ginkgo.By("Setting up a VPA CRD") + containerName := GetHamsterContainerNameByIndex(0) + vpaCRD := test.VerticalPodAutoscaler(). + WithName("hamster-vpa"). + WithNamespace(f.Namespace.Name). + WithTargetRef(controller). + WithUpdateMode(vpa_types.UpdateModeInPlaceOrRecreate). WithEvictionRequirements(er). WithContainer(containerName). AppendRecommendation( diff --git a/vertical-pod-autoscaler/e2e/v1beta2/updater.go b/vertical-pod-autoscaler/e2e/v1beta2/updater.go index 5b8b894d565..33daf4da634 100644 --- a/vertical-pod-autoscaler/e2e/v1beta2/updater.go +++ b/vertical-pod-autoscaler/e2e/v1beta2/updater.go @@ -87,7 +87,7 @@ func setupPodsForEviction(f *framework.Framework) *apiv1.PodList { gomega.Expect(err).NotTo(gomega.HaveOccurred()) ginkgo.By("Setting up a VPA CRD") - SetupVPA(f, "200m", vpa_types.UpdateModeAuto, controller) + SetupVPA(f, "200m", vpa_types.UpdateModeRecreate, controller) return podList } From 1126b7cf552ee57c3fec56fcb8334b6540d46d44 Mon Sep 17 00:00:00 2001 From: John Kyros Date: Fri, 22 Mar 2024 22:40:15 -0500 Subject: [PATCH 19/26] VPA: allow rule-breaking updates if disruptionless In the event that we can't perform the whole update, this calculates a set of updates that should be disruptionless and only queues that partial set, omitting the parts that would cause disruption. --- .../priority/update_priority_calculator.go | 119 +++++++++++++++++- 1 file changed, 114 insertions(+), 5 deletions(-) diff --git a/vertical-pod-autoscaler/pkg/updater/priority/update_priority_calculator.go b/vertical-pod-autoscaler/pkg/updater/priority/update_priority_calculator.go index c6c9127f784..10869e4ad10 100644 --- a/vertical-pod-autoscaler/pkg/updater/priority/update_priority_calculator.go +++ b/vertical-pod-autoscaler/pkg/updater/priority/update_priority_calculator.go @@ -33,7 +33,7 @@ import ( ) var ( - defaultUpdateThreshold = flag.Float64("pod-update-threshold", 0.1, "Ignore updates that have priority lower than the value of this flag") + defaultUpdateThreshold = flag.Float64("pod-update-threshold", 0.1, "Ignore disruptive updates that have priority lower than the value of this flag") podLifetimeUpdateThreshold = flag.Duration("in-recommendation-bounds-eviction-lifetime-threshold", time.Hour*12, "Pods that live for at least that long can be evicted even if their request is within the [MinRecommended...MaxRecommended] range") @@ -48,7 +48,7 @@ var ( // than pod with 100M current memory and 150M recommendation (100% increase vs 50% increase) type UpdatePriorityCalculator struct { vpa *vpa_types.VerticalPodAutoscaler - pods []prioritizedPod + pods []PrioritizedPod config *UpdateConfig recommendationProcessor vpa_api_util.RecommendationProcessor priorityProcessor PriorityProcessor @@ -116,6 +116,8 @@ func (calc *UpdatePriorityCalculator) AddPod(pod *apiv1.Pod, now time.Time) { } } + disruptionlessRecommendation := calc.CalcualteDisruptionFreeActions(pod, processedRecommendation) + // The update is allowed in following cases: // - the request is outside the recommended range for some container. // - the pod lives for at least 24h and the resource diff is >= MinChangePriority. @@ -127,6 +129,16 @@ func (calc *UpdatePriorityCalculator) AddPod(pod *apiv1.Pod, now time.Time) { return } if now.Before(pod.Status.StartTime.Add(*podLifetimeUpdateThreshold)) { + // TODO(jkyros): do we need an in-place update threshold arg ? + // If our recommendations are disruptionless, we can bypass the threshold limit + if len(disruptionlessRecommendation.ContainerRecommendations) > 0 { + klog.V(2).Infof("pod accepted for DISRUPTIONLESS (%d/%d) update %v/%v with priority %v", len(disruptionlessRecommendation.ContainerRecommendations), len(processedRecommendation.ContainerRecommendations), pod.Namespace, pod.Name, updatePriority.ResourceDiff) + updatePriority.Disruptionless = true + calc.pods = append(calc.pods, PrioritizedPod{ + pod: pod, + priority: updatePriority, + recommendation: disruptionlessRecommendation}) + } klog.V(4).InfoS("Not updating a short-lived pod, request within recommended range", "pod", klog.KObj(pod)) return } @@ -142,12 +154,30 @@ func (calc *UpdatePriorityCalculator) AddPod(pod *apiv1.Pod, now time.Time) { return } klog.V(2).InfoS("Pod accepted for update", "pod", klog.KObj(pod), "updatePriority", updatePriority.ResourceDiff, "processedRecommendations", calc.GetProcessedRecommendationTargets(processedRecommendation)) - calc.pods = append(calc.pods, prioritizedPod{ + calc.pods = append(calc.pods, PrioritizedPod{ pod: pod, priority: updatePriority, recommendation: processedRecommendation}) } +// GetSortedPrioritizedPods returns a list of prioritized pods ordered by update priority (highest update priority first). Used instead +// of GetSortedPods when we need access to the priority information +func (calc *UpdatePriorityCalculator) GetSortedPrioritizedPods(admission PodEvictionAdmission) []*PrioritizedPod { + sort.Sort(byPriorityDesc(calc.pods)) + + //result := []*apiv1.Pod{} + result := []*PrioritizedPod{} + for num, podPrio := range calc.pods { + if admission.Admit(podPrio.pod, podPrio.recommendation) { + result = append(result, &calc.pods[num]) + } else { + klog.V(2).Infof("pod removed from update queue by PodEvictionAdmission: %v", podPrio.pod.Name) + } + } + + return result +} + // GetSortedPods returns a list of pods ordered by update priority (highest update priority first) func (calc *UpdatePriorityCalculator) GetSortedPods(admission PodEvictionAdmission) []*apiv1.Pod { sort.Sort(byPriorityDesc(calc.pods)) @@ -206,12 +236,25 @@ func parseVpaObservedContainers(pod *apiv1.Pod) (bool, sets.Set[string]) { return hasObservedContainers, vpaContainerSet } -type prioritizedPod struct { +// PrioritizedPod contains the priority and recommendation details for a pod. +// TODO(jkyros): I made this public, but there may be a cleaner way +type PrioritizedPod struct { pod *apiv1.Pod priority PodPriority recommendation *vpa_types.RecommendedPodResources } +// IsDisruptionless returns the disruptionless status of the underlying pod priority +// TODO(jkyros): scope issues, maybe not the best place to put Disruptionless +func (p PrioritizedPod) IsDisruptionless() bool { + return p.priority.Disruptionless +} + +// Pod returns the underlying private pod +func (p PrioritizedPod) Pod() *apiv1.Pod { + return p.pod +} + // PodPriority contains data for a pod update that can be used to prioritize between updates. type PodPriority struct { // Is any container outside of the recommended range. @@ -220,9 +263,11 @@ type PodPriority struct { ScaleUp bool // Relative difference between the total requested and total recommended resources. ResourceDiff float64 + // Is this update disruptionless + Disruptionless bool } -type byPriorityDesc []prioritizedPod +type byPriorityDesc []PrioritizedPod func (list byPriorityDesc) Len() int { return len(list) @@ -250,3 +295,67 @@ func (p PodPriority) Less(other PodPriority) bool { // 2. A pod with larger value of resourceDiff takes precedence. return p.ResourceDiff < other.ResourceDiff } + +// CalcualteDisruptionFreeActions calculates the set of actions we think we can perform without disruption based on the pod/container resize/restart +// policies and returns that set of actions. +func (calc *UpdatePriorityCalculator) CalcualteDisruptionFreeActions(pod *apiv1.Pod, recommendation *vpa_types.RecommendedPodResources) *vpa_types.RecommendedPodResources { + + var disruptionlessRecommendation = &vpa_types.RecommendedPodResources{} + + for _, container := range pod.Spec.Containers { + // If we don't have a resize policy, we can't check it + if len(container.ResizePolicy) == 0 { + continue + } + + // So we get whatever the recommendation was for this container + resourceRec := getRecommendationForContainerName(container.Name, recommendation) + // If we didn't find a recommendation for this container, we don't have anything to do + if resourceRec == nil { + continue + } + // Then we go through all the resource recommendations it has + for resource := range resourceRec.Target { + // And we look up what the restart policy is for those resources + resourceRestartPolicy := getRestartPolicyForResource(resource, container.ResizePolicy) + // If we don't have one, that's probably bad + if resourceRestartPolicy == nil { + continue + } + // If we do have one, and it's disruptive, then we know this won't work + if *resourceRestartPolicy != apiv1.NotRequired { + continue + } + + } + + // And if we made it here, we should theoretically be able to do this without disruption + disruptionlessRecommendation.ContainerRecommendations = append(disruptionlessRecommendation.ContainerRecommendations, *resourceRec) + + } + + return disruptionlessRecommendation +} + +// getRecommendationForContainerName searches through the list of ContainerRecommendations until it finds one matching the named container. Used +// to match up containers with their recommendations (we have container, we want resource recommendation) +func getRecommendationForContainerName(name string, recommendation *vpa_types.RecommendedPodResources) *vpa_types.RecommendedContainerResources { + for _, recommendationContainer := range recommendation.ContainerRecommendations { + if recommendationContainer.ContainerName == name { + return &recommendationContainer + } + } + return nil +} + +// getRestartPolicyForResource searches through the list of resources in the resize policy until it finds the one matching the named resource. Used +// to match up restart policies with our resource recommendations (we have resource, we want policy). +func getRestartPolicyForResource(resourceName apiv1.ResourceName, policy []apiv1.ContainerResizePolicy) *apiv1.ResourceResizeRestartPolicy { + // TODO(jkyros): can there be duplicate policies for resources? we just take the first one now + for _, resizePolicy := range policy { + if resizePolicy.ResourceName == resourceName { + return &resizePolicy.RestartPolicy + } + } + return nil +} From 7aa271508cdf853827a099dd7eef43024f1ab507 Mon Sep 17 00:00:00 2001 From: John Kyros Date: Thu, 21 Mar 2024 03:51:25 -0500 Subject: [PATCH 20/26] only allow in-place if explicitly set (for testing) --- vertical-pod-autoscaler/pkg/updater/logic/updater.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vertical-pod-autoscaler/pkg/updater/logic/updater.go b/vertical-pod-autoscaler/pkg/updater/logic/updater.go index 39e21f82687..bfbf95cd76a 100644 --- a/vertical-pod-autoscaler/pkg/updater/logic/updater.go +++ b/vertical-pod-autoscaler/pkg/updater/logic/updater.go @@ -380,7 +380,7 @@ func (u *updater) AttemptInPlaceScalingIfPossible(ctx context.Context, vpaSize i return } - if vpa_api_util.GetUpdateMode(vpa) == vpa_types.UpdateModeInPlaceOrRecreate || vpa_api_util.GetUpdateMode(vpa) == vpa_types.UpdateModeAuto { + if vpa_api_util.GetUpdateMode(vpa) == vpa_types.UpdateModeInPlaceOrRecreate { // separate counters/stats for in-place updates withInPlaceUpdatable := false From 325890322b5f465e5aa38b10399cab75cd1d2c15 Mon Sep 17 00:00:00 2001 From: John Kyros Date: Tue, 26 Mar 2024 20:46:19 -0500 Subject: [PATCH 21/26] fixup! VPA: Add UpdateModeInPlaceOrRecreate to types --- vertical-pod-autoscaler/deploy/vpa-v1-crd-gen.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/vertical-pod-autoscaler/deploy/vpa-v1-crd-gen.yaml b/vertical-pod-autoscaler/deploy/vpa-v1-crd-gen.yaml index 6499413aa75..fefa43312d6 100644 --- a/vertical-pod-autoscaler/deploy/vpa-v1-crd-gen.yaml +++ b/vertical-pod-autoscaler/deploy/vpa-v1-crd-gen.yaml @@ -458,6 +458,7 @@ spec: - "Off" - Initial - Recreate + - InPlaceOrRecreate - Auto type: string type: object From 23898894ab3fc137b3437cfd51729c7c6867c597 Mon Sep 17 00:00:00 2001 From: John Kyros Date: Wed, 20 Nov 2024 10:22:32 -0600 Subject: [PATCH 22/26] fixup! VPA: hack unit tests to account for in-place --- .../pkg/updater/logic/updater.go | 1 + .../pkg/updater/logic/updater_test.go | 14 ++++++++++++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/vertical-pod-autoscaler/pkg/updater/logic/updater.go b/vertical-pod-autoscaler/pkg/updater/logic/updater.go index bfbf95cd76a..a921dcabd2b 100644 --- a/vertical-pod-autoscaler/pkg/updater/logic/updater.go +++ b/vertical-pod-autoscaler/pkg/updater/logic/updater.go @@ -243,6 +243,7 @@ func (u *updater) RunOnce(ctx context.Context) { continue } } else { + klog.Infof("Not falling back to eviction, probably because we don't have a recommendation yet?") continue } diff --git a/vertical-pod-autoscaler/pkg/updater/logic/updater_test.go b/vertical-pod-autoscaler/pkg/updater/logic/updater_test.go index a38b935831e..b5169c573bf 100644 --- a/vertical-pod-autoscaler/pkg/updater/logic/updater_test.go +++ b/vertical-pod-autoscaler/pkg/updater/logic/updater_test.go @@ -34,6 +34,7 @@ import ( "k8s.io/apimachinery/pkg/runtime" "k8s.io/client-go/kubernetes/fake" + corev1 "k8s.io/api/core/v1" vpa_types "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1" controllerfetcher "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/target/controller_fetcher" target_mock "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/target/mock" @@ -177,12 +178,16 @@ func testRunOnceBase( Name: rc.Name, APIVersion: rc.APIVersion, } + // TOD0(jkyros): I added the recommendationProvided condition here because in-place needs to wait for a + // recommendation to scale, causing this test to fail (because in-place checks before eviction, and in-place will + // wait to scale -- and not fall back to eviction -- until the VPA has made a recommendation) vpaObj := test.VerticalPodAutoscaler(). WithContainer(containerName). WithTarget("2", "200M"). WithMinAllowed(containerName, "1", "100M"). WithMaxAllowed(containerName, "3", "1G"). - WithTargetRef(targetRef).Get() + WithTargetRef(targetRef). + AppendCondition(vpa_types.RecommendationProvided, corev1.ConditionTrue, "reason", "msg", time.Unix(0, 0)).Get() vpaObj.Spec.UpdatePolicy = &vpa_types.PodUpdatePolicy{UpdateMode: &updateMode} vpaLister.On("List").Return([]*vpa_types.VerticalPodAutoscaler{vpaObj}, nil).Once() @@ -314,13 +319,18 @@ func TestRunOnceIgnoreNamespaceMatchingPods(t *testing.T) { Name: rc.Name, APIVersion: rc.APIVersion, } + + // TOD0(jkyros): I added the recommendationProvided condition here because in-place needs to wait for a + // recommendation to scale, causing this test to fail (because in-place checks before eviction, and in-place will + // wait to scale -- and not fall back to eviction -- until the VPA has made a recommendation) vpaObj := test.VerticalPodAutoscaler(). WithNamespace("default"). WithContainer(containerName). WithTarget("2", "200M"). WithMinAllowed(containerName, "1", "100M"). WithMaxAllowed(containerName, "3", "1G"). - WithTargetRef(targetRef).Get() + WithTargetRef(targetRef). + AppendCondition(vpa_types.RecommendationProvided, corev1.ConditionTrue, "reason", "msg", time.Unix(0, 0)).Get() vpaLister.On("List").Return([]*vpa_types.VerticalPodAutoscaler{vpaObj}, nil).Once() From 71ee8b5aecebe299c0fa9f9380ab15178a9734e5 Mon Sep 17 00:00:00 2001 From: Max Cao Date: Fri, 20 Dec 2024 15:19:46 -0800 Subject: [PATCH 23/26] Allow VPA updater to actuate recommendations in-place Signed-off-by: Max Cao --- .../pkg/admission-controller/config.go | 3 +- .../resource/pod/handler.go | 15 -- .../resource/pod/patch/util.go | 8 - .../pkg/recommender/input/cluster_feeder.go | 27 ---- .../inplace_recommendation_provider.go | 137 ++++++++++++++++++ .../updater/eviction/last_inplace_update.go | 39 +++++ .../eviction/pods_eviction_restriction.go | 87 +++++++---- .../pkg/updater/logic/updater.go | 73 +++++++--- vertical-pod-autoscaler/pkg/updater/main.go | 27 ++-- .../priority/update_priority_calculator.go | 23 +-- .../annotations/vpa_last_inplace_update.go | 53 +++++++ 11 files changed, 372 insertions(+), 120 deletions(-) create mode 100644 vertical-pod-autoscaler/pkg/updater/eviction/inplace_recommendation_provider.go create mode 100644 vertical-pod-autoscaler/pkg/updater/eviction/last_inplace_update.go create mode 100644 vertical-pod-autoscaler/pkg/utils/annotations/vpa_last_inplace_update.go diff --git a/vertical-pod-autoscaler/pkg/admission-controller/config.go b/vertical-pod-autoscaler/pkg/admission-controller/config.go index c73490d592e..e0d8ac0805f 100644 --- a/vertical-pod-autoscaler/pkg/admission-controller/config.go +++ b/vertical-pod-autoscaler/pkg/admission-controller/config.go @@ -157,8 +157,7 @@ func selfRegistration(clientset kubernetes.Interface, caCert []byte, webHookDela AdmissionReviewVersions: []string{"v1"}, Rules: []admissionregistration.RuleWithOperations{ { - // we're watching for updates now also because of in-place VPA, but we only patch if the update was triggered by the updater - Operations: []admissionregistration.OperationType{admissionregistration.Create, admissionregistration.Update}, + Operations: []admissionregistration.OperationType{admissionregistration.Create}, Rule: admissionregistration.Rule{ APIGroups: []string{""}, APIVersions: []string{"v1"}, diff --git a/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/handler.go b/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/handler.go index b0415eabf6d..13c39e40380 100644 --- a/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/handler.go +++ b/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/handler.go @@ -78,15 +78,6 @@ func (h *resourceHandler) GetPatches(ctx context.Context, ar *admissionv1.Admiss pod.Namespace = namespace } - // We're watching for updates now also because of in-place VPA, but we only want to act on the ones - // that were triggered by the updater so we don't violate disruption quotas - if ar.Operation == admissionv1.Update { - // The patcher will remove this annotation, it's just the signal that the updater okayed this - if _, ok := pod.Annotations["autoscaling.k8s.io/resize"]; !ok { - return nil, nil - } - } - klog.V(4).InfoS("Admitting pod", "pod", klog.KObj(&pod)) controllingVpa := h.vpaMatcher.GetMatchingVPA(ctx, &pod) if controllingVpa == nil { @@ -102,12 +93,6 @@ func (h *resourceHandler) GetPatches(ctx context.Context, ar *admissionv1.Admiss if pod.Annotations == nil { patches = append(patches, patch.GetAddEmptyAnnotationsPatch()) } - // TODO(jkyros): this is weird here, work it out with the above block somehow, but - // we need to have this annotation patched out - if ar.Operation == admissionv1.Update { - // TODO(jkyros): the squiggle is escaping the / in the annotation - patches = append(patches, patch.GetRemoveAnnotationPatch("autoscaling.k8s.io~1resize")) - } for _, c := range h.patchCalculators { partialPatches, err := c.CalculatePatches(&pod, controllingVpa) diff --git a/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/patch/util.go b/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/patch/util.go index 7955a6ae54f..7bdea7988ab 100644 --- a/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/patch/util.go +++ b/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/patch/util.go @@ -39,11 +39,3 @@ func GetAddAnnotationPatch(annotationName, annotationValue string) resource_admi Value: annotationValue, } } - -// GetRemoveAnnotationPatch returns a patch that removes the specified annotation. -func GetRemoveAnnotationPatch(annotationName string) resource_admission.PatchRecord { - return resource_admission.PatchRecord{ - Op: "remove", - Path: fmt.Sprintf("/metadata/annotations/%s", annotationName), - } -} diff --git a/vertical-pod-autoscaler/pkg/recommender/input/cluster_feeder.go b/vertical-pod-autoscaler/pkg/recommender/input/cluster_feeder.go index afa181b3264..4e53fec6d4a 100644 --- a/vertical-pod-autoscaler/pkg/recommender/input/cluster_feeder.go +++ b/vertical-pod-autoscaler/pkg/recommender/input/cluster_feeder.go @@ -443,33 +443,6 @@ func (feeder *clusterStateFeeder) LoadPods() { } } -// PruneContainers prunes any containers from the intial aggregate states -// that are no longer present in the aggregate states. This is important -// for cases where a container has been renamed or removed, as otherwise -// the VPA will split the recommended resources across containers that -// are no longer present, resulting in the containers that are still -// present being under-resourced -func (feeder *clusterStateFeeder) PruneContainers2() { - - var keysPruned int - // Look through all of our VPAs - for _, vpa := range feeder.clusterState.Vpas { - // TODO(jkyros): maybe vpa.PruneInitialAggregateContainerStates() ? - aggregates := vpa.AggregateStateByContainerNameWithoutCheckpoints() - // Check each initial state to see if it's still "real" - for container := range vpa.ContainersInitialAggregateState { - if _, ok := aggregates[container]; !ok { - delete(vpa.ContainersInitialAggregateState, container) - keysPruned = keysPruned + 1 - - } - } - } - if keysPruned > 0 { - klog.Infof("Pruned %d stale initial aggregate container keys", keysPruned) - } -} - // PruneContainers removes any containers from the aggregates and initial aggregates that are no longer // present in the feeder's clusterState. Without this, we would be averaging our resource calculations // over containers that no longer exist, and continuing to update checkpoints that should be left to expire. diff --git a/vertical-pod-autoscaler/pkg/updater/eviction/inplace_recommendation_provider.go b/vertical-pod-autoscaler/pkg/updater/eviction/inplace_recommendation_provider.go new file mode 100644 index 00000000000..dfbb0071b78 --- /dev/null +++ b/vertical-pod-autoscaler/pkg/updater/eviction/inplace_recommendation_provider.go @@ -0,0 +1,137 @@ +/* +Copyright 2024 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package eviction + +import ( + "fmt" + + core "k8s.io/api/core/v1" + "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/recommendation" + "k8s.io/klog/v2" + + vpa_types "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1" + "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/limitrange" + vpa_api_util "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/vpa" +) + +type inPlaceRecommendationProvider struct { + limitsRangeCalculator limitrange.LimitRangeCalculator + recommendationProcessor vpa_api_util.RecommendationProcessor +} + +// NewProvider constructs the recommendation provider that can be used to determine recommendations for pods. +func NewInPlaceProvider(calculator limitrange.LimitRangeCalculator, + recommendationProcessor vpa_api_util.RecommendationProcessor) recommendation.Provider { + return &inPlaceRecommendationProvider{ + limitsRangeCalculator: calculator, + recommendationProcessor: recommendationProcessor, + } +} + +// TODO(maxcao13): Strip down function to remove unnecessary stuff, or refactor so that it can be used in the admission controller as well +// GetContainersResources returns the recommended resources for each container in the given pod in the same order they are specified in the pod.Spec. +// If addAll is set to true, containers w/o a recommendation are also added to the list (and their non-recommended requests and limits will always be preserved if present), +// otherwise they're skipped (default behaviour). +func GetContainersResources(pod *core.Pod, vpaResourcePolicy *vpa_types.PodResourcePolicy, podRecommendation vpa_types.RecommendedPodResources, limitRange *core.LimitRangeItem, + addAll bool) []vpa_api_util.ContainerResources { + resources := make([]vpa_api_util.ContainerResources, len(pod.Spec.Containers)) + for i, container := range pod.Spec.Containers { + recommendation := vpa_api_util.GetRecommendationForContainer(container.Name, &podRecommendation) + if recommendation == nil { + if !addAll { + klog.V(2).InfoS("No recommendation found for container, skipping", "container", container.Name) + continue + } + klog.V(2).InfoS("No match found for container, using Pod request", "container", container.Name) + resources[i].Requests = container.Resources.Requests + } else { + resources[i].Requests = recommendation.Target + } + defaultLimit := core.ResourceList{} + if limitRange != nil { + defaultLimit = limitRange.Default + } + containerControlledValues := vpa_api_util.GetContainerControlledValues(container.Name, vpaResourcePolicy) + if containerControlledValues == vpa_types.ContainerControlledValuesRequestsAndLimits { + proportionalLimits, _ := vpa_api_util.GetProportionalLimit(container.Resources.Limits, container.Resources.Requests, resources[i].Requests, defaultLimit) + if proportionalLimits != nil { + resources[i].Limits = proportionalLimits + } + } + // If the recommendation only contains CPU or Memory (if the VPA was configured this way), we need to make sure we "backfill" the other. + // Only do this when the addAll flag is true. + if addAll { + cpuRequest, hasCpuRequest := container.Resources.Requests[core.ResourceCPU] + if _, ok := resources[i].Requests[core.ResourceCPU]; !ok && hasCpuRequest { + resources[i].Requests[core.ResourceCPU] = cpuRequest + } + memRequest, hasMemRequest := container.Resources.Requests[core.ResourceMemory] + if _, ok := resources[i].Requests[core.ResourceMemory]; !ok && hasMemRequest { + resources[i].Requests[core.ResourceMemory] = memRequest + } + cpuLimit, hasCpuLimit := container.Resources.Limits[core.ResourceCPU] + if _, ok := resources[i].Limits[core.ResourceCPU]; !ok && hasCpuLimit { + resources[i].Limits[core.ResourceCPU] = cpuLimit + } + memLimit, hasMemLimit := container.Resources.Limits[core.ResourceMemory] + if _, ok := resources[i].Limits[core.ResourceMemory]; !ok && hasMemLimit { + resources[i].Limits[core.ResourceMemory] = memLimit + } + } + } + return resources +} + +// GetContainersResourcesForPod returns recommended request for a given pod and associated annotations. +// The returned slice corresponds 1-1 to containers in the Pod. +func (p *inPlaceRecommendationProvider) GetContainersResourcesForPod(pod *core.Pod, vpa *vpa_types.VerticalPodAutoscaler) ([]vpa_api_util.ContainerResources, vpa_api_util.ContainerToAnnotationsMap, error) { + if vpa == nil || pod == nil { + klog.V(2).InfoS("Can't calculate recommendations, one of VPA or Pod is nil", "vpa", vpa, "pod", pod) + return nil, nil, nil + } + klog.V(2).InfoS("Updating requirements for pod", "pod", pod.Name) + + recommendedPodResources := &vpa_types.RecommendedPodResources{} + + if vpa.Status.Recommendation != nil { + var err error + // ignore annotations as they are not used in the in-place update + recommendedPodResources, _, err = p.recommendationProcessor.Apply(vpa, pod) + if err != nil { + klog.V(2).InfoS("Cannot process recommendation for pod", "pod", klog.KObj(pod)) + return nil, nil, err + } + } + containerLimitRange, err := p.limitsRangeCalculator.GetContainerLimitRangeItem(pod.Namespace) + if err != nil { + return nil, nil, fmt.Errorf("error getting containerLimitRange: %s", err) + } + var resourcePolicy *vpa_types.PodResourcePolicy + if vpa.Spec.UpdatePolicy == nil || vpa.Spec.UpdatePolicy.UpdateMode == nil || *vpa.Spec.UpdatePolicy.UpdateMode != vpa_types.UpdateModeOff { + resourcePolicy = vpa.Spec.ResourcePolicy + } + containerResources := GetContainersResources(pod, resourcePolicy, *recommendedPodResources, containerLimitRange, false) + + // Ensure that we are not propagating empty resource key if any. + for _, resource := range containerResources { + if resource.RemoveEmptyResourceKeyIfAny() { + klog.InfoS("An empty resource key was found and purged", "pod", klog.KObj(pod), "vpa", klog.KObj(vpa)) + } + } + + return containerResources, nil, nil +} diff --git a/vertical-pod-autoscaler/pkg/updater/eviction/last_inplace_update.go b/vertical-pod-autoscaler/pkg/updater/eviction/last_inplace_update.go new file mode 100644 index 00000000000..b7692e09c7b --- /dev/null +++ b/vertical-pod-autoscaler/pkg/updater/eviction/last_inplace_update.go @@ -0,0 +1,39 @@ +/* +Copyright 2024 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package eviction + +import ( + core "k8s.io/api/core/v1" + resource_admission "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/admission-controller/resource" + "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/patch" + + vpa_types "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1" + "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/annotations" +) + +type observedContainers struct{} + +func (*observedContainers) CalculatePatches(pod *core.Pod, _ *vpa_types.VerticalPodAutoscaler) ([]resource_admission.PatchRecord, error) { + vpaObservedContainersValue := annotations.GetVpaObservedContainersValue(pod) + return []resource_admission.PatchRecord{patch.GetAddAnnotationPatch(annotations.VpaObservedContainersLabel, vpaObservedContainersValue)}, nil +} + +// NewLastInPlaceUpdateCalculator returns calculator for +// observed containers patches. +func NewLastInPlaceUpdateCalculator() patch.Calculator { + return &observedContainers{} +} diff --git a/vertical-pod-autoscaler/pkg/updater/eviction/pods_eviction_restriction.go b/vertical-pod-autoscaler/pkg/updater/eviction/pods_eviction_restriction.go index 34539aaae1c..45d00cbe406 100644 --- a/vertical-pod-autoscaler/pkg/updater/eviction/pods_eviction_restriction.go +++ b/vertical-pod-autoscaler/pkg/updater/eviction/pods_eviction_restriction.go @@ -18,6 +18,7 @@ package eviction import ( "context" + "encoding/json" "fmt" "time" @@ -25,6 +26,9 @@ import ( apiv1 "k8s.io/api/core/v1" policyv1 "k8s.io/api/policy/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + k8stypes "k8s.io/apimachinery/pkg/types" + resource_updates "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/admission-controller/resource" + "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/patch" vpa_types "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1" appsinformer "k8s.io/client-go/informers/apps/v1" coreinformer "k8s.io/client-go/informers/core/v1" @@ -49,7 +53,7 @@ type PodsEvictionRestriction interface { CanEvict(pod *apiv1.Pod) bool // InPlaceUpdate updates the pod resources in-place - InPlaceUpdate(pod *apiv1.Pod, eventRecorder record.EventRecorder) error + InPlaceUpdate(pod *apiv1.Pod, vpa *vpa_types.VerticalPodAutoscaler, eventRecorder record.EventRecorder) error // CanEvict checks if pod can be safely evicted CanInPlaceUpdate(pod *apiv1.Pod) bool } @@ -58,6 +62,7 @@ type podsEvictionRestrictionImpl struct { client kube_client.Interface podToReplicaCreatorMap map[string]podReplicaCreator creatorToSingleGroupStatsMap map[podReplicaCreator]singleGroupStats + patchCalculators []patch.Calculator } type singleGroupStats struct { @@ -73,7 +78,7 @@ type singleGroupStats struct { type PodsEvictionRestrictionFactory interface { // NewPodsEvictionRestriction creates PodsEvictionRestriction for given set of pods, // controlled by a single VPA object. - NewPodsEvictionRestriction(pods []*apiv1.Pod, vpa *vpa_types.VerticalPodAutoscaler) PodsEvictionRestriction + NewPodsEvictionRestriction(pods []*apiv1.Pod, vpa *vpa_types.VerticalPodAutoscaler, patchCalculators []patch.Calculator) PodsEvictionRestriction } type podsEvictionRestrictionFactoryImpl struct { @@ -218,7 +223,7 @@ func NewPodsEvictionRestrictionFactory(client kube_client.Interface, minReplicas // NewPodsEvictionRestriction creates PodsEvictionRestriction for a given set of pods, // controlled by a single VPA object. -func (f *podsEvictionRestrictionFactoryImpl) NewPodsEvictionRestriction(pods []*apiv1.Pod, vpa *vpa_types.VerticalPodAutoscaler) PodsEvictionRestriction { +func (f *podsEvictionRestrictionFactoryImpl) NewPodsEvictionRestriction(pods []*apiv1.Pod, vpa *vpa_types.VerticalPodAutoscaler, patchCalculators []patch.Calculator) PodsEvictionRestriction { // We can evict pod only if it is a part of replica set // For each replica set we can evict only a fraction of pods. // Evictions may be later limited by pod disruption budget if configured. @@ -290,7 +295,9 @@ func (f *podsEvictionRestrictionFactoryImpl) NewPodsEvictionRestriction(pods []* return &podsEvictionRestrictionImpl{ client: f.client, podToReplicaCreatorMap: podToReplicaCreatorMap, - creatorToSingleGroupStatsMap: creatorToSingleGroupStatsMap} + creatorToSingleGroupStatsMap: creatorToSingleGroupStatsMap, + patchCalculators: patchCalculators, + } } func getPodReplicaCreator(pod *apiv1.Pod) (*podReplicaCreator, error) { @@ -429,18 +436,17 @@ func setUpInformer(kubeClient kube_client.Interface, kind controllerKind) (cache func (e *podsEvictionRestrictionImpl) CanInPlaceUpdate(pod *apiv1.Pod) bool { cr, present := e.podToReplicaCreatorMap[getPodID(pod)] - // TODO(jkyros): why is present checked twice? if present { // If our QoS class is guaranteed, we can't change the resources without a restart if pod.Status.QOSClass == apiv1.PodQOSGuaranteed { - klog.Warning("Can't resize %s in-place, pod QoS is %s", pod.Status.QOSClass) + klog.Warningf("Can't resize %s in-place, pod QoS is %s", pod.Name, pod.Status.QOSClass) return false } // If we're already resizing this pod, don't do it again if IsInPlaceUpdating(pod) { - klog.Warning("Not resizing %s, already resizing %s", pod.Name) + klog.Warningf("Not resizing %s, already resizing it", pod.Name) return false } @@ -454,6 +460,7 @@ func (e *podsEvictionRestrictionImpl) CanInPlaceUpdate(pod *apiv1.Pod) bool { noRestartPoliciesPopulated = false } + // TODO(maxcao13): Do we have to check the policy resource too? i.e. if only memory is getting scaled, then only check the memory resize policy? for _, policy := range container.ResizePolicy { if policy.RestartPolicy != apiv1.NotRequired { klog.Warningf("in-place resize of %s will cause container disruption, container %s restart policy is %v", pod.Name, container.Name, policy.RestartPolicy) @@ -469,7 +476,7 @@ func (e *podsEvictionRestrictionImpl) CanInPlaceUpdate(pod *apiv1.Pod) bool { // If none of the policies are populated, our feature is probably not enabled, so we can't in-place regardless if noRestartPoliciesPopulated { - klog.Warning("impossible to resize %s in-place, container resize policies are not populated", pod.Name) + klog.Warningf("impossible to resize %s in-place, container resize policies are not populated", pod.Name) } //TODO(jkyros): Come back and handle sidecar containers at some point since they're weird? @@ -480,7 +487,7 @@ func (e *podsEvictionRestrictionImpl) CanInPlaceUpdate(pod *apiv1.Pod) bool { klog.V(4).Infof("Can't resize pending pod %s", pod.Name) return false } - // This second "present" check is against the crator-to-group-stats map, not the pod-to-replica map + // This second "present" check is against the creator-to-group-stats map, not the pod-to-replica map if present { klog.V(4).Infof("pod %s disruption tolerance run: %d config: %d tolerance: %d evicted: %d updating: %d", pod.Name, singleGroupStats.running, singleGroupStats.configured, singleGroupStats.evictionTolerance, singleGroupStats.evicted, singleGroupStats.inPlaceUpdating) shouldBeAlive := singleGroupStats.configured - singleGroupStats.evictionTolerance @@ -500,9 +507,9 @@ func (e *podsEvictionRestrictionImpl) CanInPlaceUpdate(pod *apiv1.Pod) bool { return false } -// InPlaceUpdate sends eviction instruction to api client. Returns error if pod cannot be in-place updated or if client returned error +// InPlaceUpdate sends calculates patches and sends resize request to api client. Returns error if pod cannot be in-place updated or if client returned error. // Does not check if pod was actually in-place updated after grace period. -func (e *podsEvictionRestrictionImpl) InPlaceUpdate(podToUpdate *apiv1.Pod, eventRecorder record.EventRecorder) error { +func (e *podsEvictionRestrictionImpl) InPlaceUpdate(podToUpdate *apiv1.Pod, vpa *vpa_types.VerticalPodAutoscaler, eventRecorder record.EventRecorder) error { cr, present := e.podToReplicaCreatorMap[getPodID(podToUpdate)] if !present { return fmt.Errorf("pod not suitable for eviction %v : not in replicated pods map", podToUpdate.Name) @@ -512,35 +519,53 @@ func (e *podsEvictionRestrictionImpl) InPlaceUpdate(podToUpdate *apiv1.Pod, even return fmt.Errorf("cannot update pod %v in place : number of in-flight updates exceeded", podToUpdate.Name) } - // TODO(jkyros): for now I'm just going to annotate the pod - - // Modify the pod with the "hey please inplace update me" annotation - // We'll have the admission controller update the limits like it does - // today, and then remove the annotation with the patch - modifiedPod := podToUpdate.DeepCopy() - if modifiedPod.Annotations == nil { - modifiedPod.Annotations = make(map[string]string) + // TODO(maxcao13): There's probably a more efficient way to do this, but this is what we have for now + // Don't reinvent the wheel, just use the resource updates patch calculator using by the admission controller + var patches []resource_updates.PatchRecord + for _, calculator := range e.patchCalculators { + p, err := calculator.CalculatePatches(podToUpdate, vpa) + if err != nil { + return fmt.Errorf("failed to calculate resource patch for pod %s/%s: %v", podToUpdate.Namespace, podToUpdate.Name, err) + } + klog.V(4).Infof("Calculated patches for pod %s/%s: %v", podToUpdate.Namespace, podToUpdate.Name, p) + patches = append(patches, p...) } - modifiedPod.Annotations["autoscaling.k8s.io/resize"] = "true" - // Give the update to the APIserver - _, err := e.client.CoreV1().Pods(podToUpdate.Namespace).Update(context.TODO(), modifiedPod, metav1.UpdateOptions{}) - if err != nil { - klog.Errorf("failed to update pod %s/%s, error: %v", podToUpdate.Namespace, podToUpdate.Name, err) + if len(patches) > 0 { + patch, err := json.Marshal(patches) + if err != nil { + klog.Errorf("Cannot marshal the patch %v: %v", patches, err) + return err + } + + res, err := e.client.CoreV1().Pods(podToUpdate.Namespace).Patch(context.TODO(), podToUpdate.Name, k8stypes.JSONPatchType, patch, metav1.PatchOptions{}, "resize") + if err != nil { + klog.ErrorS(err, "Failed to patch pod", "pod", klog.KObj(podToUpdate)) + return err + } + klog.V(4).InfoS("In-place patched pod /resize subresource using patches ", "pod", klog.KObj(res), "patches", string(patch)) + + } else { + err := fmt.Errorf("no patches to apply to %s", podToUpdate.Name) + klog.ErrorS(err, "Failed to patch pod", "pod", klog.KObj(podToUpdate)) return err } - eventRecorder.Event(podToUpdate, apiv1.EventTypeNormal, "MarkedByVPA", - "Pod was marked by VPA Updater to be updated in-place.") + + // TODO(maxcao13): If this keeps getting called on the same object with the same reason, it is considered a patch request. + // And we fail to have the corresponding rbac for it. So figure out if we need this later. + eventRecorder.Event(podToUpdate, apiv1.EventTypeNormal, "InPlaceResizedByVPA", + "Pod was resized in place by VPA Updater.") // TODO(jkyros): You need to do this regardless once you update the pod, if it changes phases here as a result, you still // need to catalog what you did if podToUpdate.Status.Phase == apiv1.PodRunning { singleGroupStats, present := e.creatorToSingleGroupStatsMap[cr] if !present { - return fmt.Errorf("Internal error - cannot find stats for replication group %v", cr) + klog.Errorf("Internal error - cannot find stats for replication group %v", cr) + } else { + singleGroupStats.inPlaceUpdating = singleGroupStats.inPlaceUpdating + 1 + e.creatorToSingleGroupStatsMap[cr] = singleGroupStats } - singleGroupStats.inPlaceUpdating = singleGroupStats.inPlaceUpdating + 1 - e.creatorToSingleGroupStatsMap[cr] = singleGroupStats } else { klog.Warningf("I updated, but my pod phase was %s", podToUpdate.Status.Phase) } @@ -548,6 +573,10 @@ func (e *podsEvictionRestrictionImpl) InPlaceUpdate(podToUpdate *apiv1.Pod, even return nil } +// func UpdatePodResources(pod *apiv1.Pod, vpa *vpa_types.VerticalPodAutoscaler, inPlaceRecommendationProvider inPlaceRecommendationProvider) ([]vpa_api_util.ContainerResources, error) { +// return nil, nil +// } + // IsInPlaceUpdating checks whether or not the given pod is currently in the middle of an in-place update func IsInPlaceUpdating(podToCheck *apiv1.Pod) (isUpdating bool) { // If the pod is currently updating we need to tally that diff --git a/vertical-pod-autoscaler/pkg/updater/logic/updater.go b/vertical-pod-autoscaler/pkg/updater/logic/updater.go index a921dcabd2b..9a9aa80ff17 100644 --- a/vertical-pod-autoscaler/pkg/updater/logic/updater.go +++ b/vertical-pod-autoscaler/pkg/updater/logic/updater.go @@ -37,6 +37,7 @@ import ( "k8s.io/client-go/tools/record" "k8s.io/klog/v2" + "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/patch" vpa_types "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1" vpa_clientset "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/client/clientset/versioned" "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/client/clientset/versioned/scheme" @@ -45,11 +46,17 @@ import ( controllerfetcher "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/target/controller_fetcher" "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/updater/eviction" "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/updater/priority" + "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/annotations" metrics_updater "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/metrics/updater" "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/status" vpa_api_util "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/vpa" ) +const ( + DeferredResizeUpdateTimeout = 1 * time.Minute + InProgressResizeUpdateTimeout = 1 * time.Hour +) + // Updater performs updates on pods if recommended by Vertical Pod Autoscaler type Updater interface { // RunOnce represents single iteration in the main-loop of Updater @@ -70,6 +77,7 @@ type updater struct { statusValidator status.Validator controllerFetcher controllerfetcher.ControllerFetcher ignoredNamespaces []string + patchCalculators []patch.Calculator } // NewUpdater creates Updater with given configuration @@ -89,6 +97,7 @@ func NewUpdater( priorityProcessor priority.PriorityProcessor, namespace string, ignoredNamespaces []string, + patchCalculators []patch.Calculator, ) (Updater, error) { evictionRateLimiter := getRateLimiter(evictionRateLimit, evictionRateBurst) factory, err := eviction.NewPodsEvictionRestrictionFactory(kubeClient, minReplicasForEvicition, evictionToleranceFraction) @@ -113,6 +122,7 @@ func NewUpdater( statusNamespace, ), ignoredNamespaces: ignoredNamespaces, + patchCalculators: patchCalculators, }, nil } @@ -148,7 +158,7 @@ func (u *updater) RunOnce(ctx context.Context) { } if vpa_api_util.GetUpdateMode(vpa) != vpa_types.UpdateModeRecreate && vpa_api_util.GetUpdateMode(vpa) != vpa_types.UpdateModeAuto && vpa_api_util.GetUpdateMode(vpa) != vpa_types.UpdateModeInPlaceOrRecreate { - klog.V(3).InfoS("Skipping VPA object because its mode is not \"UpdateOrRecreate\", \"Recreate\" or \"Auto\"", "vpa", klog.KObj(vpa)) + klog.V(3).InfoS("Skipping VPA object because its mode is not \"InPlaceOrRecreate\", \"Recreate\" or \"Auto\"", "vpa", klog.KObj(vpa)) continue } selector, err := u.selectorFetcher.Fetch(ctx, vpa) @@ -212,13 +222,16 @@ func (u *updater) RunOnce(ctx context.Context) { defer vpasWithInPlaceUpdatedPodsCounter.Observe() // NOTE: this loop assumes that controlledPods are filtered - // to contain only Pods controlled by a VPA in auto or recreate mode + // to contain only Pods controlled by a VPA in auto, recreate, or inPlaceOrRecreate mode + klog.V(2).InfoS("Processing controlled pods", "count", len(allLivePods)) for vpa, livePods := range controlledPods { vpaSize := len(livePods) controlledPodsCounter.Add(vpaSize, vpaSize) - evictionLimiter := u.evictionFactory.NewPodsEvictionRestriction(livePods, vpa) + evictionLimiter := u.evictionFactory.NewPodsEvictionRestriction(livePods, vpa, u.patchCalculators) // TODO(jkyros): I need to know the priority details here so I can use them to determine what we want to do to the pod // previously it was just "evict" but now we have to make decisions, so we need to know + // TODO(macao): Does the below filter mean that we may not be able to inplace scale pods that could be inplaced scaled, but non-evictable? + // Seems so, and if so, we should rework this filter to allow inplace scaling for non-evictable pods (is that what John was commenting above me? :p) podsForUpdate := u.getPodsUpdateOrder(filterNonEvictablePods(livePods, evictionLimiter), vpa) evictablePodsCounter.Add(vpaSize, len(podsForUpdate)) @@ -229,7 +242,8 @@ func (u *updater) RunOnce(ctx context.Context) { pod := prioritizedPod.Pod() - // TODO(jkyros): Not ideal, but try to corral the mess from in-place VPA :) + // somewhere we should check if inplace pods actually need scaling or not, and if they will be disruptionless + // according to AEP: fallBackToEviction, err := u.AttemptInPlaceScalingIfPossible(ctx, vpaSize, vpa, pod, evictionLimiter, vpasWithInPlaceUpdatablePodsCounter, vpasWithInPlaceUpdatedPodsCounter) if err != nil { klog.Warningf("error attemptng to scale pod %v in-place: %v", pod.Name, err) @@ -243,7 +257,7 @@ func (u *updater) RunOnce(ctx context.Context) { continue } } else { - klog.Infof("Not falling back to eviction, probably because we don't have a recommendation yet?") + klog.Infof("Not falling back to eviction, because we don't have a recommendation yet? %v", pod.Name) continue } @@ -387,32 +401,51 @@ func (u *updater) AttemptInPlaceScalingIfPossible(ctx context.Context, vpaSize i withInPlaceUpdatable := false withInPlaceUpdated := false - // TODO(jkyros): I don't think this can happen, it gets removed immediately by admission if admitted - if _, ok := pod.Annotations["autoscaling.k8s.io/resize"]; ok { - klog.V(4).Infof("Pod is %s already marked for resize, ignoring for now", pod.Name) - return - } - klog.V(4).Infof("Looks like we might be able to in-place update %s..", pod.Name) withInPlaceUpdatable = true // If I can't update if !evictionLimiter.CanInPlaceUpdate(pod) { - // But it's not because we're updating already... + // If we are already updating, wait for the next loop to progress if !eviction.IsInPlaceUpdating(pod) { + // But it's not in-place updating, something went wrong (e.g. the operation would change pods QoS) + // fall back to eviction klog.V(4).Infof("Can't in-place update pod %s, falling back to eviction, it might say no", pod.Name) fallBackToEviction = true return + } + // get lastInPlaceUpdateTime annotation, if doesn't exist, set it to now + lastUpdateTime, updateErr := annotations.ParseVpaLastInPlaceUpdateTimeValue(pod) + if updateErr != nil { + lastUpdateTime = time.Now() + klog.V(4).ErrorS(updateErr, "Error parsing lastInPlaceUpdateTime annotation, setting it to now", "pod", pod.Name) } - if pod.Status.Resize != apiv1.PodResizeStatusDeferred && pod.Status.Resize != apiv1.PodResizeStatusInfeasible { - klog.V(4).Infof("In-place update in progress for %s, not falling back to eviction", pod.Name) - fallBackToEviction = false - return + + // if currently inPlaceUpdating, we should only fallback to eviction if the update has failed, as in one of the following conditions: + // 1. .status.resize: Infeasible + // 2. .status.resize: Deferred + more than 1 minute has elapsed since the lastInPlaceUpdateTime + // 3. .status.resize: InProgress + more than 1 hour has elapsed since the lastInPlaceUpdateTime + switch pod.Status.Resize { + case apiv1.PodResizeStatusDeferred: + if time.Since(lastUpdateTime) > DeferredResizeUpdateTimeout { + klog.V(4).Infof("In-place update deferred for more than &v for %s, falling back to eviction", DeferredResizeUpdateTimeout, pod.Name) + fallBackToEviction = true + } + klog.V(4).Infof("In-place update deferred for %s, NOT falling back to eviction yet", pod.Name) + case apiv1.PodResizeStatusInProgress: + if time.Since(lastUpdateTime) > InProgressResizeUpdateTimeout { + klog.V(4).Infof("In-place update in progress for more than &v for %s, falling back to eviction", InProgressResizeUpdateTimeout, pod.Name) + fallBackToEviction = true + } + klog.V(4).Infof("In-place update in progress for %s, NOT falling back to eviction yet", pod.Name) + case apiv1.PodResizeStatusInfeasible: + klog.V(4).Infof("In-place update infeasible for %s, falling back to eviction", pod.Name) + fallBackToEviction = true + default: + klog.V(4).Infof("In-place update status unknown for %s: , falling back to eviction", pod.Status.Resize, pod.Name) + fallBackToEviction = true } - klog.V(4).Infof("In-place update looks stuck for %s, falling back to eviction", pod.Name) - fallBackToEviction = true return - } // TODO(jkyros): need our own rate limiter or can we freeload off the eviction one? @@ -424,7 +457,7 @@ func (u *updater) AttemptInPlaceScalingIfPossible(ctx context.Context, vpaSize i } klog.V(2).Infof("updating pod %v", pod.Name) - evictErr := evictionLimiter.InPlaceUpdate(pod, u.eventRecorder) + evictErr := evictionLimiter.InPlaceUpdate(pod, vpa, u.eventRecorder) if evictErr != nil { klog.Warningf("updating pod %v failed: %v", pod.Name, evictErr) } else { diff --git a/vertical-pod-autoscaler/pkg/updater/main.go b/vertical-pod-autoscaler/pkg/updater/main.go index 762a301c1f8..5bade7a647d 100644 --- a/vertical-pod-autoscaler/pkg/updater/main.go +++ b/vertical-pod-autoscaler/pkg/updater/main.go @@ -26,19 +26,12 @@ import ( "github.com/spf13/pflag" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/uuid" - "k8s.io/client-go/informers" - kube_client "k8s.io/client-go/kubernetes" - "k8s.io/client-go/tools/leaderelection" - "k8s.io/client-go/tools/leaderelection/resourcelock" - kube_flag "k8s.io/component-base/cli/flag" - componentbaseconfig "k8s.io/component-base/config" - componentbaseoptions "k8s.io/component-base/config/options" - "k8s.io/klog/v2" - "k8s.io/autoscaler/vertical-pod-autoscaler/common" + "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/patch" vpa_clientset "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/client/clientset/versioned" "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/target" controllerfetcher "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/target/controller_fetcher" + "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/updater/eviction" updater "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/updater/logic" "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/updater/priority" "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/limitrange" @@ -47,6 +40,14 @@ import ( "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/server" "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/status" vpa_api_util "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/vpa" + "k8s.io/client-go/informers" + kube_client "k8s.io/client-go/kubernetes" + "k8s.io/client-go/tools/leaderelection" + "k8s.io/client-go/tools/leaderelection/resourcelock" + kube_flag "k8s.io/component-base/cli/flag" + componentbaseconfig "k8s.io/component-base/config" + componentbaseoptions "k8s.io/component-base/config/options" + "k8s.io/klog/v2" ) var ( @@ -89,7 +90,7 @@ func main() { componentbaseoptions.BindLeaderElectionFlags(&leaderElection, pflag.CommandLine) kube_flag.InitFlags() - klog.V(1).InfoS("Vertical Pod Autoscaler Updater", "version", common.VerticalPodAutoscalerVersion) + klog.V(1).InfoS("Vertical Pod Autoscaler Updater FOR INPALCE!!!!!!", "version", common.VerticalPodAutoscalerVersion) if len(commonFlags.VpaObjectNamespace) > 0 && len(commonFlags.IgnoredVpaObjectNamespaces) > 0 { klog.Fatalf("--vpa-object-namespace and --ignored-vpa-object-namespaces are mutually exclusive and can't be set together.") @@ -182,6 +183,11 @@ func run(healthCheck *metrics.HealthCheck, commonFlag *common.CommonFlags) { ignoredNamespaces := strings.Split(commonFlag.IgnoredVpaObjectNamespaces, ",") + // in-place recommendation provider + inPlaceRecommendationProvider := eviction.NewInPlaceProvider(limitRangeCalculator, vpa_api_util.NewCappingRecommendationProcessor(limitRangeCalculator)) + // resource calculator which will generate patches + calculators := []patch.Calculator{patch.NewResourceUpdatesCalculator(inPlaceRecommendationProvider)} + // TODO: use SharedInformerFactory in updater updater, err := updater.NewUpdater( kubeClient, @@ -199,6 +205,7 @@ func run(healthCheck *metrics.HealthCheck, commonFlag *common.CommonFlags) { priority.NewProcessor(), commonFlag.VpaObjectNamespace, ignoredNamespaces, + calculators, ) if err != nil { klog.Fatalf("Failed to create updater: %v", err) diff --git a/vertical-pod-autoscaler/pkg/updater/priority/update_priority_calculator.go b/vertical-pod-autoscaler/pkg/updater/priority/update_priority_calculator.go index 10869e4ad10..aa563a5a5e8 100644 --- a/vertical-pod-autoscaler/pkg/updater/priority/update_priority_calculator.go +++ b/vertical-pod-autoscaler/pkg/updater/priority/update_priority_calculator.go @@ -116,7 +116,7 @@ func (calc *UpdatePriorityCalculator) AddPod(pod *apiv1.Pod, now time.Time) { } } - disruptionlessRecommendation := calc.CalcualteDisruptionFreeActions(pod, processedRecommendation) + disruptionlessRecommendation := calc.CalculateDisruptionFreeActions(pod, processedRecommendation) // The update is allowed in following cases: // - the request is outside the recommended range for some container. @@ -128,22 +128,27 @@ func (calc *UpdatePriorityCalculator) AddPod(pod *apiv1.Pod, now time.Time) { klog.V(4).InfoS("Not updating pod, missing field pod.Status.StartTime", "pod", klog.KObj(pod)) return } + // TODO(maxcao13): hopefully this doesn't break anything but we switch the order so that significant change is checked first before lifetime + // this way we don't in-place scale it for insignificant change, else we would will mark it disruptionless and still have an in-place update + if updatePriority.ResourceDiff < calc.config.MinChangePriority { + klog.V(4).InfoS("Not updating pod, resource diff too low", "pod", klog.KObj(pod), "updatePriority", updatePriority) + return + } if now.Before(pod.Status.StartTime.Add(*podLifetimeUpdateThreshold)) { // TODO(jkyros): do we need an in-place update threshold arg ? // If our recommendations are disruptionless, we can bypass the threshold limit if len(disruptionlessRecommendation.ContainerRecommendations) > 0 { - klog.V(2).Infof("pod accepted for DISRUPTIONLESS (%d/%d) update %v/%v with priority %v", len(disruptionlessRecommendation.ContainerRecommendations), len(processedRecommendation.ContainerRecommendations), pod.Namespace, pod.Name, updatePriority.ResourceDiff) + klog.V(2).Infof("Short-lived, but pod still accepted for DISRUPTIONLESS (%d/%d) in-place update %v/%v with priority %v", len(disruptionlessRecommendation.ContainerRecommendations), len(processedRecommendation.ContainerRecommendations), pod.Namespace, pod.Name, updatePriority.ResourceDiff) updatePriority.Disruptionless = true calc.pods = append(calc.pods, PrioritizedPod{ pod: pod, priority: updatePriority, recommendation: disruptionlessRecommendation}) + } else { + // if it's not disruptionless, we fallback to the Recreate conditions which already failed + // (quick oom, outside recommended range, long-lived + significant change), so don't update this pod + klog.V(4).InfoS("Not updating a short-lived pod, request within recommended range", "pod", klog.KObj(pod)) } - klog.V(4).InfoS("Not updating a short-lived pod, request within recommended range", "pod", klog.KObj(pod)) - return - } - if updatePriority.ResourceDiff < calc.config.MinChangePriority { - klog.V(4).InfoS("Not updating pod, resource diff too low", "pod", klog.KObj(pod), "updatePriority", updatePriority) return } } @@ -296,9 +301,9 @@ func (p PodPriority) Less(other PodPriority) bool { return p.ResourceDiff < other.ResourceDiff } -// CalcualteDisruptionFreeActions calculates the set of actions we think we can perform without disruption based on the pod/container resize/restart +// CalculateDisruptionFreeActions calculates the set of actions we think we can perform without disruption based on the pod/container resize/restart // policies and returns that set of actions. -func (calc *UpdatePriorityCalculator) CalcualteDisruptionFreeActions(pod *apiv1.Pod, recommendation *vpa_types.RecommendedPodResources) *vpa_types.RecommendedPodResources { +func (calc *UpdatePriorityCalculator) CalculateDisruptionFreeActions(pod *apiv1.Pod, recommendation *vpa_types.RecommendedPodResources) *vpa_types.RecommendedPodResources { var disruptionlessRecommendation = &vpa_types.RecommendedPodResources{} diff --git a/vertical-pod-autoscaler/pkg/utils/annotations/vpa_last_inplace_update.go b/vertical-pod-autoscaler/pkg/utils/annotations/vpa_last_inplace_update.go new file mode 100644 index 00000000000..a130bfdf28c --- /dev/null +++ b/vertical-pod-autoscaler/pkg/utils/annotations/vpa_last_inplace_update.go @@ -0,0 +1,53 @@ +/* +Copyright 2020 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package annotations + +import ( + "fmt" + "time" + + apiv1 "k8s.io/api/core/v1" +) + +const ( + // VpaLastInPlaceUpdateTime is a label used by the vpa last in place update annotation. + VpaLastInPlaceUpdateTime = "vpaLastInPlaceUpdateTime" +) + +// GetVpaLastInPlaceUpdatedTimeValue creates an annotation value for a given pod. +func GetVpaLastInPlaceUpdatedTimeValue(lastUpdated time.Time) string { + // return string version of v1.Time + return lastUpdated.Format(time.RFC3339) +} + +// ParseVpaLastInPlaceUpdateTimeValue returns the last in-place update time from a pod annotation. +func ParseVpaLastInPlaceUpdateTimeValue(pod *apiv1.Pod) (time.Time, error) { + if pod == nil { + return time.Time{}, fmt.Errorf("pod is nil") + } + + if pod.Annotations == nil { + return time.Time{}, fmt.Errorf("pod.Annotations is nil") + } + + lastInPlaceUpdateTime, ok := pod.Annotations[VpaLastInPlaceUpdateTime] + if !ok { + return time.Time{}, fmt.Errorf("pod.Annotations[VpaLastInPlaceUpdateTime] is nil") + } + + return time.Parse(time.RFC3339, lastInPlaceUpdateTime) +} From b9b92179a5afb70ce8407b6b04290c300333765f Mon Sep 17 00:00:00 2001 From: Max Cao Date: Thu, 2 Jan 2025 16:26:10 -0800 Subject: [PATCH 24/26] VPA: add in-place vertical scaling actuation tests Signed-off-by: Max Cao --- vertical-pod-autoscaler/deploy/vpa-rbac.yaml | 4 +- vertical-pod-autoscaler/e2e/v1/actuation.go | 348 ++++++++++++++++++ .../e2e/v1/admission_controller.go | 32 ++ vertical-pod-autoscaler/e2e/v1/common.go | 21 +- .../hack/inplace-kind-config.yaml | 4 + .../hack/run-e2e-locally.sh | 4 +- .../updater/eviction/last_inplace_update.go | 14 +- .../eviction/pods_eviction_restriction.go | 52 +-- .../pods_eviction_restriction_test.go | 43 ++- .../pkg/updater/logic/updater.go | 85 +++-- .../pkg/updater/logic/updater_test.go | 10 +- vertical-pod-autoscaler/pkg/updater/main.go | 8 +- .../utils/annotations/vpa_inplace_update.go | 32 ++ .../annotations/vpa_last_inplace_update.go | 53 --- .../pkg/utils/test/test_utils.go | 2 +- 15 files changed, 561 insertions(+), 151 deletions(-) create mode 100644 vertical-pod-autoscaler/hack/inplace-kind-config.yaml create mode 100644 vertical-pod-autoscaler/pkg/utils/annotations/vpa_inplace_update.go delete mode 100644 vertical-pod-autoscaler/pkg/utils/annotations/vpa_last_inplace_update.go diff --git a/vertical-pod-autoscaler/deploy/vpa-rbac.yaml b/vertical-pod-autoscaler/deploy/vpa-rbac.yaml index fdc797c3371..fc1c04154e7 100644 --- a/vertical-pod-autoscaler/deploy/vpa-rbac.yaml +++ b/vertical-pod-autoscaler/deploy/vpa-rbac.yaml @@ -128,9 +128,9 @@ rules: - apiGroups: - "" resources: - - pods + - pods/resize verbs: - - update + - patch --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding diff --git a/vertical-pod-autoscaler/e2e/v1/actuation.go b/vertical-pod-autoscaler/e2e/v1/actuation.go index 124d2c40822..7bcca72fe96 100644 --- a/vertical-pod-autoscaler/e2e/v1/actuation.go +++ b/vertical-pod-autoscaler/e2e/v1/actuation.go @@ -167,6 +167,346 @@ var _ = ActuationSuiteE2eDescribe("Actuation", func() { gomega.Expect(foundUpdated).To(gomega.Equal(1)) }) + ginkgo.It("still applies recommendations on restart when update mode is InPlaceOrRecreate", func() { + ginkgo.By("Setting up a hamster deployment") + SetupHamsterDeployment(f, "100m", "100Mi", defaultHamsterReplicas) + podList, err := GetHamsterPods(f) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + podSet := MakePodSet(podList) + + ginkgo.By("Setting up a VPA CRD in mode InPlaceOrRecreate") + containerName := GetHamsterContainerNameByIndex(0) + vpaCRD := test.VerticalPodAutoscaler(). + WithName("hamster-vpa"). + WithNamespace(f.Namespace.Name). + WithTargetRef(hamsterTargetRef). + WithUpdateMode(vpa_types.UpdateModeInPlaceOrRecreate). + WithContainer(containerName). + AppendRecommendation( + test.Recommendation(). + WithContainer(containerName). + WithTarget("200m", ""). + WithLowerBound("200m", ""). + WithUpperBound("200m", ""). + GetContainerResources()). + Get() + + InstallVPA(f, vpaCRD) + updatedCPURequest := ParseQuantityOrDie("200m") + + ginkgo.By(fmt.Sprintf("Waiting for pods to be evicted, hoping it won't happen, sleep for %s", VpaEvictionTimeout.String())) + CheckNoPodsEvicted(f, podSet) + ginkgo.By("Forcefully killing one pod") + killPod(f, podList) + + ginkgo.By("Checking that request was modified after forceful restart") + updatedPodList, _ := GetHamsterPods(f) + var foundUpdated int32 + for _, pod := range updatedPodList.Items { + podRequest := getCPURequest(pod.Spec) + framework.Logf("podReq: %v", podRequest) + if podRequest.Cmp(updatedCPURequest) == 0 { + foundUpdated += 1 + } + } + gomega.Expect(foundUpdated).To(gomega.Equal(defaultHamsterReplicas)) + }) + + // TODO(maxcao13): disruptionless means no container/pod restart; however NotRequired policy does not guarantee this... + ginkgo.It("applies disruptionless in-place updates to all containers where request is within bounds when update mode is InPlaceOrRecreate", func() { + ginkgo.By("Setting up a hamster deployment with all containers with NotRequired resize policies") + replicas := int32(2) + expectedContainerRestarts := int32(0) + SetupHamsterDeployment(f, "100m", "100Mi", replicas) + podList, err := GetHamsterPods(f) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + podSet := MakePodSet(podList) + + ginkgo.By("Setting up a VPA CRD") + containerName := GetHamsterContainerNameByIndex(0) + updatedCPU := "250m" + vpaCRD := test.VerticalPodAutoscaler(). + WithName("hamster-vpa"). + WithNamespace(f.Namespace.Name). + WithTargetRef(hamsterTargetRef). + WithContainer(containerName). + WithUpdateMode(vpa_types.UpdateModeInPlaceOrRecreate). + AppendRecommendation( + test.Recommendation(). + WithContainer(containerName). + WithTarget(updatedCPU, "200Mi"). + WithLowerBound("100m", "100Mi"). + WithUpperBound("300m", "250Mi"). + GetContainerResources()). + Get() + + InstallVPA(f, vpaCRD) + + ginkgo.By(fmt.Sprintf("Waiting for pods to be evicted, hoping it won't happen, sleep for %s", VpaEvictionTimeout.String())) + CheckNoPodsEvicted(f, podSet) + + ginkgo.By("Checking that request was modified after a while due to in-place update, and was disruptionless") + updatedPodList, _ := GetHamsterPods(f) + var foundUpdated, foundContainerRestarts int32 = 0, 0 + for _, pod := range updatedPodList.Items { + podRequest := getCPURequest(pod.Spec) + containerRestarts := getContainerRestarts(pod.Status) + framework.Logf("podReq: %v, containerRestarts: %v", podRequest, getContainerRestarts(pod.Status)) + if podRequest.Cmp(ParseQuantityOrDie(updatedCPU)) == 0 { + foundUpdated += 1 + } + foundContainerRestarts += containerRestarts + } + gomega.Expect(foundUpdated).To(gomega.Equal(replicas)) + gomega.Expect(foundContainerRestarts).To(gomega.Equal(expectedContainerRestarts)) + }) + + ginkgo.It("applies partial disruptionless in-place updates to a pod when request is within bounds when update mode is InPlaceOrRecreate", func() { + ginkgo.By("Setting up a hamster deployment with first container using RestartContainer resize policies") + cpuQuantity := ParseQuantityOrDie("100m") + memoryQuantity := ParseQuantityOrDie("100Mi") + replicas := int32(2) + containers := 2 + expectedContainerRestarts := int32(1) * replicas // 1 container restart per pod + d := NewNHamstersDeployment(f, containers) + d.Spec.Template.Spec.Containers[0].Resources.Requests = apiv1.ResourceList{ + apiv1.ResourceCPU: cpuQuantity, + apiv1.ResourceMemory: memoryQuantity, + } + d.Spec.Replicas = &replicas + d.Spec.Template.Spec.Containers[0].ResizePolicy = []apiv1.ContainerResizePolicy{ + { + ResourceName: apiv1.ResourceCPU, + RestartPolicy: apiv1.RestartContainer, + }, + { + ResourceName: apiv1.ResourceMemory, + RestartPolicy: apiv1.RestartContainer, + }, + } + d, err := f.ClientSet.AppsV1().Deployments(f.Namespace.Name).Create(context.TODO(), d, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred(), "unexpected error when starting deployment creation") + err = framework_deployment.WaitForDeploymentComplete(f.ClientSet, d) + gomega.Expect(err).NotTo(gomega.HaveOccurred(), "unexpected error waiting for deployment creation to finish") + + podList, err := GetHamsterPods(f) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + podSet := MakePodSet(podList) + + ginkgo.By("Setting up a VPA CRD") + containerName := GetHamsterContainerNameByIndex(0) + updatedCPU := "250m" + vpaCRD := test.VerticalPodAutoscaler(). + WithName("hamster-vpa"). + WithNamespace(f.Namespace.Name). + WithTargetRef(hamsterTargetRef). + WithContainer(containerName). + WithUpdateMode(vpa_types.UpdateModeInPlaceOrRecreate). + AppendRecommendation( + test.Recommendation(). + WithContainer(containerName). + WithTarget(updatedCPU, "200Mi"). + WithLowerBound("100m", "100Mi"). + WithUpperBound("300m", "250Mi"). + GetContainerResources()). + Get() + + InstallVPA(f, vpaCRD) + + ginkgo.By(fmt.Sprintf("Waiting for pods to be evicted, hoping it won't happen, sleep for %s", VpaEvictionTimeout.String())) + CheckNoPodsEvicted(f, podSet) + + ginkgo.By("Checking that request was modified after a while due to in-place update, and was partially disruptionless") + updatedPodList, _ := GetHamsterPods(f) + var foundUpdated, foundContainerRestarts int32 = 0, 0 + for _, pod := range updatedPodList.Items { + podRequest := getCPURequest(pod.Spec) + containerRestarts := getContainerRestarts(pod.Status) + framework.Logf("podReq: %v, containerRestarts: %v", podRequest, getContainerRestarts(pod.Status)) + if podRequest.Cmp(ParseQuantityOrDie(updatedCPU)) == 0 { + foundUpdated += 1 + } + foundContainerRestarts += containerRestarts + } + gomega.Expect(foundUpdated).To(gomega.Equal(replicas)) + gomega.Expect(foundContainerRestarts).To(gomega.Equal(expectedContainerRestarts)) + }) + + // TODO(maxcao13): To me how this test fits in the test plan does not make sense. Whether the container is + // able to be disrupted is not determined by the VPA but is by the kubelet, so why would we force the container + // to restart if the request is out of the recommendation bounds? I must be missing something... + ginkgo.It("applies disruptive in-place updates to a container in all pods when request out of bounds when update mode is InPlaceOrRecreate", func() { + ginkgo.Skip("This test should be re-enabled once we have a better understanding of how to test this scenario") + + ginkgo.By("Setting up a hamster deployment with a container using NotRequired resize policies") + replicas := int32(2) + expectedContainerRestarts := int32(1) * replicas // 1 container restart per pod + SetupHamsterDeployment(f, "1", "1Gi", replicas) // outside recommendation range + podList, err := GetHamsterPods(f) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + podSet := MakePodSet(podList) + + ginkgo.By("Setting up a VPA CRD") + containerName := GetHamsterContainerNameByIndex(0) + updatedCPU := "250m" + vpaCRD := test.VerticalPodAutoscaler(). + WithName("hamster-vpa"). + WithNamespace(f.Namespace.Name). + WithTargetRef(hamsterTargetRef). + WithContainer(containerName). + WithUpdateMode(vpa_types.UpdateModeInPlaceOrRecreate). + AppendRecommendation( + test.Recommendation(). + WithContainer(containerName). + WithTarget(updatedCPU, "200Mi"). + WithLowerBound("150m", "150Mi"). + WithUpperBound("300m", "250Mi"). + GetContainerResources()). + Get() + + InstallVPA(f, vpaCRD) + + ginkgo.By(fmt.Sprintf("Waiting for pods to be evicted, hoping it won't happen, sleep for %s", VpaEvictionTimeout.String())) + CheckNoPodsEvicted(f, podSet) + + ginkgo.By("Checking that request was modified after a while due to in-place update, and was disruptionless") + updatedPodList, _ := GetHamsterPods(f) + var foundUpdated, foundContainerRestarts int32 = 0, 0 + for _, pod := range updatedPodList.Items { + podRequest := getCPURequest(pod.Spec) + containerRestarts := getContainerRestarts(pod.Status) + framework.Logf("podReq: %v, containerRestarts: %v", podRequest, getContainerRestarts(pod.Status)) + if podRequest.Cmp(ParseQuantityOrDie(updatedCPU)) == 0 { + foundUpdated += 1 + } + foundContainerRestarts += containerRestarts + } + gomega.Expect(foundUpdated).To(gomega.Equal(replicas)) + gomega.Expect(foundContainerRestarts).To(gomega.Equal(expectedContainerRestarts)) + }) + + // TODO(maxcao13): disruptive in-place fails and we have to evict (InPlaceOrRecreate) + // This particular test checks if we fallback after a resize infeasible, particularly from the node not having enough resources (as specified in the AEP) + // Should we check other conditions that result in eviction fallback? + ginkgo.It("falls back to evicting pods when in-place update is Infeasible when update mode is InPlaceOrRecreate", func() { + ginkgo.By("Setting up a hamster deployment") + replicas := int32(2) + SetupHamsterDeployment(f, "100m", "100Mi", replicas) + podList, err := GetHamsterPods(f) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + ginkgo.By("Setting up a VPA CRD") + containerName := GetHamsterContainerNameByIndex(0) + updatedCPU := "999" // infeasible target + vpaCRD := test.VerticalPodAutoscaler(). + WithName("hamster-vpa"). + WithNamespace(f.Namespace.Name). + WithTargetRef(hamsterTargetRef). + WithContainer(containerName). + WithUpdateMode(vpa_types.UpdateModeInPlaceOrRecreate). + AppendRecommendation( + test.Recommendation(). + WithContainer(containerName). + WithTarget(updatedCPU, "200Mi"). + WithLowerBound("150m", "150Mi"). + WithUpperBound("300m", "250Mi"). + GetContainerResources()). + Get() + + InstallVPA(f, vpaCRD) + + ginkgo.By(fmt.Sprintf("Waiting for in-place update, hoping it won't happen, sleep for %s", VpaEvictionTimeout.String())) + CheckNoContainersRestarted(f) + + ginkgo.By("Waiting for pods to be evicted") + err = WaitForPodsEvicted(f, podList) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("falls back to evicting pods when pod QoS class changes when update mode is InPlaceOrRecreate", func() { + ginkgo.By("Setting up a hamster deployment") + replicas := int32(2) + + d := NewHamsterDeploymentWithGuaranteedResources(f, ParseQuantityOrDie("300m"), ParseQuantityOrDie("400Mi")) + d.Spec.Replicas = &replicas + d, err := f.ClientSet.AppsV1().Deployments(f.Namespace.Name).Create(context.TODO(), d, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred(), "unexpected error when starting deployment creation") + err = framework_deployment.WaitForDeploymentComplete(f.ClientSet, d) + gomega.Expect(err).NotTo(gomega.HaveOccurred(), "unexpected error waiting for deployment creation to finish") + + podList, err := GetHamsterPods(f) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + ginkgo.By("Setting up a VPA CRD") + containerName := GetHamsterContainerNameByIndex(0) + updatedCPU := "200m" + vpaCRD := test.VerticalPodAutoscaler(). + WithName("hamster-vpa"). + WithNamespace(f.Namespace.Name). + WithTargetRef(hamsterTargetRef). + WithContainer(containerName). + WithUpdateMode(vpa_types.UpdateModeInPlaceOrRecreate). + AppendRecommendation( + test.Recommendation(). + WithContainer(containerName). + WithTarget(updatedCPU, "200Mi"). + WithLowerBound("150m", "150Mi"). + WithUpperBound("300m", "250Mi"). + GetContainerResources()). + Get() + + InstallVPA(f, vpaCRD) + + ginkgo.By(fmt.Sprintf("Waiting for in-place update, hoping it won't happen, sleep for %s", VpaEvictionTimeout.String())) + CheckNoContainersRestarted(f) + + ginkgo.By("Waiting for pods to be evicted") + err = WaitForPodsEvicted(f, podList) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("falls back to evicting pods when resize is Deferred and more than 1 minute has elapsed since last in-place update when update mode is InPlaceOrRecreate", func() { + ginkgo.By("Setting up a hamster deployment") + replicas := int32(2) + SetupHamsterDeployment(f, "100m", "100Mi", replicas) + podList, err := GetHamsterPods(f) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + ginkgo.By("Setting up a VPA CRD") + containerName := GetHamsterContainerNameByIndex(0) + + // get node name + nodeName := podList.Items[0].Spec.NodeName + node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + allocatableCPU := node.Status.Allocatable[apiv1.ResourceCPU] + updatedCPU := allocatableCPU.String() + + vpaCRD := test.VerticalPodAutoscaler(). + WithName("hamster-vpa"). + WithNamespace(f.Namespace.Name). + WithTargetRef(hamsterTargetRef). + WithContainer(containerName). + WithUpdateMode(vpa_types.UpdateModeInPlaceOrRecreate). + AppendRecommendation( + test.Recommendation(). + WithContainer(containerName). + WithTarget(updatedCPU, "200Mi"). + WithLowerBound("150m", "150Mi"). + WithUpperBound("300m", "250Mi"). + GetContainerResources()). + Get() + + InstallVPA(f, vpaCRD) + + ginkgo.By(fmt.Sprintf("Waiting for in-place update, hoping it won't happen, sleep for %s", VpaEvictionTimeout.String())) + CheckNoContainersRestarted(f) + + ginkgo.By("Waiting for pods to be evicted") + err = WaitForPodsEvicted(f, podList) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + perControllerTests := []struct { apiVersion string kind string @@ -519,6 +859,14 @@ func getCPURequest(podSpec apiv1.PodSpec) resource.Quantity { return podSpec.Containers[0].Resources.Requests[apiv1.ResourceCPU] } +// getContainerRestarts returns the sum of container restartCounts for all containers in the pod +func getContainerRestarts(podStatus apiv1.PodStatus) (restarts int32) { + for _, containerStatus := range podStatus.ContainerStatuses { + restarts += containerStatus.RestartCount + } + return +} + func killPod(f *framework.Framework, podList *apiv1.PodList) { f.ClientSet.CoreV1().Pods(f.Namespace.Name).Delete(context.TODO(), podList.Items[0].Name, metav1.DeleteOptions{}) err := WaitForPodsRestarted(f, podList) diff --git a/vertical-pod-autoscaler/e2e/v1/admission_controller.go b/vertical-pod-autoscaler/e2e/v1/admission_controller.go index d4e79c77828..9fc98f48b25 100644 --- a/vertical-pod-autoscaler/e2e/v1/admission_controller.go +++ b/vertical-pod-autoscaler/e2e/v1/admission_controller.go @@ -907,6 +907,38 @@ var _ = AdmissionControllerE2eDescribe("Admission-controller", func() { gomega.Expect(err.Error()).To(gomega.MatchRegexp(`.*admission webhook .*vpa.* denied the request: .*`), "Admission controller did not inspect the object") }) + ginkgo.It("starts pods with new recommended request with InPlaceOrRecreate mode", func() { + d := NewHamsterDeploymentWithResources(f, ParseQuantityOrDie("100m") /*cpu*/, ParseQuantityOrDie("100Mi") /*memory*/) + + ginkgo.By("Setting up a VPA CRD") + containerName := GetHamsterContainerNameByIndex(0) + vpaCRD := test.VerticalPodAutoscaler(). + WithName("hamster-vpa"). + WithNamespace(f.Namespace.Name). + WithTargetRef(hamsterTargetRef). + WithContainer(containerName). + WithUpdateMode(vpa_types.UpdateModeInPlaceOrRecreate). + AppendRecommendation( + test.Recommendation(). + WithContainer(containerName). + WithTarget("250m", "200Mi"). + WithLowerBound("250m", "200Mi"). + WithUpperBound("250m", "200Mi"). + GetContainerResources()). + Get() + + InstallVPA(f, vpaCRD) + + ginkgo.By("Setting up a hamster deployment") + podList := startDeploymentPods(f, d) + + // Originally Pods had 100m CPU, 100Mi of memory, but admission controller + // should change it to recommended 250m CPU and 200Mi of memory. + for _, pod := range podList.Items { + gomega.Expect(pod.Spec.Containers[0].Resources.Requests[apiv1.ResourceCPU]).To(gomega.Equal(ParseQuantityOrDie("250m"))) + gomega.Expect(pod.Spec.Containers[0].Resources.Requests[apiv1.ResourceMemory]).To(gomega.Equal(ParseQuantityOrDie("200Mi"))) + } + }) }) func startDeploymentPods(f *framework.Framework, deployment *appsv1.Deployment) *apiv1.PodList { diff --git a/vertical-pod-autoscaler/e2e/v1/common.go b/vertical-pod-autoscaler/e2e/v1/common.go index 70be30a2dd8..3eaf89bd9a8 100644 --- a/vertical-pod-autoscaler/e2e/v1/common.go +++ b/vertical-pod-autoscaler/e2e/v1/common.go @@ -165,15 +165,6 @@ func NewHamsterDeploymentWithResources(f *framework.Framework, cpuQuantity, memo apiv1.ResourceCPU: cpuQuantity, apiv1.ResourceMemory: memoryQuantity, } - // TODO(jkyros): It seems to behave differently if we have limits? - /* - cpuQuantity.Add(resource.MustParse("100m")) - memoryQuantity.Add(resource.MustParse("100Mi")) - - d.Spec.Template.Spec.Containers[0].Resources.Limits = apiv1.ResourceList{ - apiv1.ResourceCPU: cpuQuantity, - apiv1.ResourceMemory: memoryQuantity, - }*/ return d } @@ -464,6 +455,18 @@ func CheckNoPodsEvicted(f *framework.Framework, initialPodSet PodSet) { gomega.Expect(restarted).To(gomega.Equal(0), "there should be no pod evictions") } +func CheckNoContainersRestarted(f *framework.Framework) { + var foundContainerRestarts int32 + time.Sleep(VpaEvictionTimeout) + podList, err := GetHamsterPods(f) + for _, pod := range podList.Items { + containerRestarts := getContainerRestarts(pod.Status) + foundContainerRestarts += containerRestarts + } + gomega.Expect(err).NotTo(gomega.HaveOccurred(), "unexpected error when listing hamster pods to check number of container restarts") + gomega.Expect(foundContainerRestarts).To(gomega.Equal(int32(0)), "there should be no container restarts") +} + // WaitForVPAMatch pools VPA object until match function returns true. Returns // polled vpa object. On timeout returns error. func WaitForVPAMatch(c vpa_clientset.Interface, vpa *vpa_types.VerticalPodAutoscaler, match func(vpa *vpa_types.VerticalPodAutoscaler) bool) (*vpa_types.VerticalPodAutoscaler, error) { diff --git a/vertical-pod-autoscaler/hack/inplace-kind-config.yaml b/vertical-pod-autoscaler/hack/inplace-kind-config.yaml new file mode 100644 index 00000000000..9d20acec394 --- /dev/null +++ b/vertical-pod-autoscaler/hack/inplace-kind-config.yaml @@ -0,0 +1,4 @@ +kind: Cluster +apiVersion: kind.x-k8s.io/v1alpha4 +featureGates: + InPlacePodVerticalScaling: true diff --git a/vertical-pod-autoscaler/hack/run-e2e-locally.sh b/vertical-pod-autoscaler/hack/run-e2e-locally.sh index 48d8e0f789f..ca0ce71ef5b 100755 --- a/vertical-pod-autoscaler/hack/run-e2e-locally.sh +++ b/vertical-pod-autoscaler/hack/run-e2e-locally.sh @@ -74,8 +74,8 @@ echo "Deleting KIND cluster 'kind'." kind delete cluster -n kind -q echo "Creating KIND cluster 'kind'" -KIND_VERSION="kindest/node:v1.26.3" -kind create cluster --image=${KIND_VERSION} +KIND_VERSION="kindest/node:v1.32.0" +kind create cluster --image=${KIND_VERSION} --config ${SCRIPT_ROOT}/hack/inplace-kind-config.yaml echo "Building metrics-pump image" docker build -t localhost:5001/write-metrics:dev -f ${SCRIPT_ROOT}/hack/e2e/Dockerfile.externalmetrics-writer ${SCRIPT_ROOT}/hack diff --git a/vertical-pod-autoscaler/pkg/updater/eviction/last_inplace_update.go b/vertical-pod-autoscaler/pkg/updater/eviction/last_inplace_update.go index b7692e09c7b..98792d7b2c0 100644 --- a/vertical-pod-autoscaler/pkg/updater/eviction/last_inplace_update.go +++ b/vertical-pod-autoscaler/pkg/updater/eviction/last_inplace_update.go @@ -25,15 +25,15 @@ import ( "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/annotations" ) -type observedContainers struct{} +type inPlaceUpdate struct{} -func (*observedContainers) CalculatePatches(pod *core.Pod, _ *vpa_types.VerticalPodAutoscaler) ([]resource_admission.PatchRecord, error) { - vpaObservedContainersValue := annotations.GetVpaObservedContainersValue(pod) - return []resource_admission.PatchRecord{patch.GetAddAnnotationPatch(annotations.VpaObservedContainersLabel, vpaObservedContainersValue)}, nil +func (*inPlaceUpdate) CalculatePatches(pod *core.Pod, _ *vpa_types.VerticalPodAutoscaler) ([]resource_admission.PatchRecord, error) { + vpaInPlaceUpdatedValue := annotations.GetVpaInPlaceUpdatedValue() + return []resource_admission.PatchRecord{patch.GetAddAnnotationPatch(annotations.VpaInPlaceUpdatedLabel, vpaInPlaceUpdatedValue)}, nil } -// NewLastInPlaceUpdateCalculator returns calculator for +// NewInPlaceUpdatedCalculator returns calculator for // observed containers patches. -func NewLastInPlaceUpdateCalculator() patch.Calculator { - return &observedContainers{} +func NewInPlaceUpdatedCalculator() patch.Calculator { + return &inPlaceUpdate{} } diff --git a/vertical-pod-autoscaler/pkg/updater/eviction/pods_eviction_restriction.go b/vertical-pod-autoscaler/pkg/updater/eviction/pods_eviction_restriction.go index 45d00cbe406..725f0bc45d3 100644 --- a/vertical-pod-autoscaler/pkg/updater/eviction/pods_eviction_restriction.go +++ b/vertical-pod-autoscaler/pkg/updater/eviction/pods_eviction_restriction.go @@ -109,7 +109,7 @@ type podReplicaCreator struct { // CanEvict checks if pod can be safely evicted func (e *podsEvictionRestrictionImpl) CanEvict(pod *apiv1.Pod) bool { - cr, present := e.podToReplicaCreatorMap[getPodID(pod)] + cr, present := e.podToReplicaCreatorMap[GetPodID(pod)] if present { singleGroupStats, present := e.creatorToSingleGroupStatsMap[cr] if pod.Status.Phase == apiv1.PodPending { @@ -123,7 +123,14 @@ func (e *podsEvictionRestrictionImpl) CanEvict(pod *apiv1.Pod) bool { // we need eviction to take the numbers into account so we don't violate our disruption dolerances. // If we're already resizing this pod, don't do anything to it, unless we failed to resize it, then we want to evict it. if IsInPlaceUpdating(pod) { - klog.V(4).Infof("pod %s disruption tolerance: %d config: %d tolerance: %d evicted: %d updating: %d", pod.Name, singleGroupStats.running, singleGroupStats.configured, singleGroupStats.evictionTolerance, singleGroupStats.evicted, singleGroupStats.inPlaceUpdating) + klog.V(4).InfoS("Pod disruption tolerance", + "pod", pod.Name, + "running", singleGroupStats.running, + "configured", singleGroupStats.configured, + "tolerance", singleGroupStats.evictionTolerance, + "evicted", singleGroupStats.evicted, + "updating", singleGroupStats.inPlaceUpdating) + if singleGroupStats.running-(singleGroupStats.evicted+(singleGroupStats.inPlaceUpdating-1)) > shouldBeAlive { klog.V(4).Infof("Would be able to evict, but already resizing %s", pod.Name) @@ -154,7 +161,7 @@ func (e *podsEvictionRestrictionImpl) CanEvict(pod *apiv1.Pod) bool { // Evict sends eviction instruction to api client. Returns error if pod cannot be evicted or if client returned error // Does not check if pod was actually evicted after eviction grace period. func (e *podsEvictionRestrictionImpl) Evict(podToEvict *apiv1.Pod, vpa *vpa_types.VerticalPodAutoscaler, eventRecorder record.EventRecorder) error { - cr, present := e.podToReplicaCreatorMap[getPodID(podToEvict)] + cr, present := e.podToReplicaCreatorMap[GetPodID(podToEvict)] if !present { return fmt.Errorf("pod not suitable for eviction %s/%s: not in replicated pods map", podToEvict.Namespace, podToEvict.Name) } @@ -255,6 +262,7 @@ func (f *podsEvictionRestrictionFactoryImpl) NewPodsEvictionRestriction(pods []* for creator, replicas := range livePods { actual := len(replicas) + // TODO(maxcao13): Does minReplicas matter if the update is in-place? There should be no downtime if the update is disruptionless if actual < required { klog.V(2).InfoS("Too few replicas", "kind", creator.Kind, "object", klog.KRef(creator.Namespace, creator.Name), "livePods", actual, "requiredPods", required, "globalMinReplicas", f.minReplicas) continue @@ -277,7 +285,7 @@ func (f *podsEvictionRestrictionFactoryImpl) NewPodsEvictionRestriction(pods []* singleGroup.configured = configured singleGroup.evictionTolerance = int(float64(configured) * f.evictionToleranceFraction) for _, pod := range replicas { - podToReplicaCreatorMap[getPodID(pod)] = creator + podToReplicaCreatorMap[GetPodID(pod)] = creator if pod.Status.Phase == apiv1.PodPending { singleGroup.pending = singleGroup.pending + 1 } @@ -313,7 +321,7 @@ func getPodReplicaCreator(pod *apiv1.Pod) (*podReplicaCreator, error) { return podReplicaCreator, nil } -func getPodID(pod *apiv1.Pod) string { +func GetPodID(pod *apiv1.Pod) string { if pod == nil { return "" } @@ -435,10 +443,11 @@ func setUpInformer(kubeClient kube_client.Interface, kind controllerKind) (cache // CanInPlaceUpdate performs the same checks func (e *podsEvictionRestrictionImpl) CanInPlaceUpdate(pod *apiv1.Pod) bool { - cr, present := e.podToReplicaCreatorMap[getPodID(pod)] + cr, present := e.podToReplicaCreatorMap[GetPodID(pod)] if present { // If our QoS class is guaranteed, we can't change the resources without a restart + // TODO(maxcao13): kubelet already prevents a resize of a guaranteed pod, so should we still check this early? if pod.Status.QOSClass == apiv1.PodQOSGuaranteed { klog.Warningf("Can't resize %s in-place, pod QoS is %s", pod.Name, pod.Status.QOSClass) return false @@ -507,10 +516,11 @@ func (e *podsEvictionRestrictionImpl) CanInPlaceUpdate(pod *apiv1.Pod) bool { return false } +// TODO(maxcao13): Maybe we want to move all this updating logic outisde of this method receiver or into a different package/file? // InPlaceUpdate sends calculates patches and sends resize request to api client. Returns error if pod cannot be in-place updated or if client returned error. // Does not check if pod was actually in-place updated after grace period. func (e *podsEvictionRestrictionImpl) InPlaceUpdate(podToUpdate *apiv1.Pod, vpa *vpa_types.VerticalPodAutoscaler, eventRecorder record.EventRecorder) error { - cr, present := e.podToReplicaCreatorMap[getPodID(podToUpdate)] + cr, present := e.podToReplicaCreatorMap[GetPodID(podToUpdate)] if !present { return fmt.Errorf("pod not suitable for eviction %v : not in replicated pods map", podToUpdate.Name) } @@ -521,13 +531,17 @@ func (e *podsEvictionRestrictionImpl) InPlaceUpdate(podToUpdate *apiv1.Pod, vpa // TODO(maxcao13): There's probably a more efficient way to do this, but this is what we have for now // Don't reinvent the wheel, just use the resource updates patch calculator using by the admission controller - var patches []resource_updates.PatchRecord + // See the below TODO for refactoring this + patches := []resource_updates.PatchRecord{} + if podToUpdate.Annotations == nil { + patches = append(patches, patch.GetAddEmptyAnnotationsPatch()) + } for _, calculator := range e.patchCalculators { p, err := calculator.CalculatePatches(podToUpdate, vpa) if err != nil { return fmt.Errorf("failed to calculate resource patch for pod %s/%s: %v", podToUpdate.Namespace, podToUpdate.Name, err) } - klog.V(4).Infof("Calculated patches for pod %s/%s: %v", podToUpdate.Namespace, podToUpdate.Name, p) + klog.V(4).InfoS("Calculated patches for pod", "pod", klog.KObj(podToUpdate), "patches", p) patches = append(patches, p...) } @@ -538,6 +552,9 @@ func (e *podsEvictionRestrictionImpl) InPlaceUpdate(podToUpdate *apiv1.Pod, vpa return err } + // TODO(maxcao13): for now, this does not allow other patches to be applied since we are patching the resize subresource + // If we want other annotations to be patched in the pod using patchCalculators, we need to generalize this and refactor the + // NewResourceUpdatesCalculator and create a stripped down calculator just for calculating in-place resize patches res, err := e.client.CoreV1().Pods(podToUpdate.Namespace).Patch(context.TODO(), podToUpdate.Name, k8stypes.JSONPatchType, patch, metav1.PatchOptions{}, "resize") if err != nil { klog.ErrorS(err, "Failed to patch pod", "pod", klog.KObj(podToUpdate)) @@ -573,20 +590,16 @@ func (e *podsEvictionRestrictionImpl) InPlaceUpdate(podToUpdate *apiv1.Pod, vpa return nil } -// func UpdatePodResources(pod *apiv1.Pod, vpa *vpa_types.VerticalPodAutoscaler, inPlaceRecommendationProvider inPlaceRecommendationProvider) ([]vpa_api_util.ContainerResources, error) { -// return nil, nil -// } - // IsInPlaceUpdating checks whether or not the given pod is currently in the middle of an in-place update func IsInPlaceUpdating(podToCheck *apiv1.Pod) (isUpdating bool) { // If the pod is currently updating we need to tally that if podToCheck.Status.Resize != "" { - klog.V(4).Infof("Resize of %s is in %s phase", podToCheck.Name, podToCheck.Status.Resize) + klog.V(4).InfoS("Pod is currently resizing", "pod", klog.KObj(podToCheck), "status", podToCheck.Status.Resize) // Proposed -> Deferred -> InProgress, but what about Infeasible? if podToCheck.Status.Resize == apiv1.PodResizeStatusInfeasible { - klog.V(4).Infof("Resource propopsal for %s is %v, we're probably stuck like this until we evict", podToCheck.Status.Resize) + klog.V(4).InfoS("Resource proposal for pod is Infeasible, we're probably stuck like this until we evict", "pod", klog.KObj(podToCheck)) } else if podToCheck.Status.Resize == apiv1.PodResizeStatusDeferred { - klog.V(4).Infof("Resource propopsal for %s is %v, our resize can't be satisfied by our Node right now", podToCheck.Status.Resize) + klog.V(4).InfoS("Resource proposal for pod is Deferred, we're probably stuck like this until we evict", "pod", klog.KObj(podToCheck)) } return true } @@ -608,10 +621,3 @@ func IsInPlaceUpdating(podToCheck *apiv1.Pod) (isUpdating bool) { return false } - -/* -func BadResizeStatus(pod *apiv1.Pod) bool { - if pod.Status.Resize == apiv1.PodResizeStatusInfeasible{ - - } -}*/ diff --git a/vertical-pod-autoscaler/pkg/updater/eviction/pods_eviction_restriction_test.go b/vertical-pod-autoscaler/pkg/updater/eviction/pods_eviction_restriction_test.go index 6685c144cc4..6a37ff1c99a 100644 --- a/vertical-pod-autoscaler/pkg/updater/eviction/pods_eviction_restriction_test.go +++ b/vertical-pod-autoscaler/pkg/updater/eviction/pods_eviction_restriction_test.go @@ -28,6 +28,7 @@ import ( batchv1 "k8s.io/api/batch/v1" apiv1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/patch" vpa_types "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1" "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/test" appsinformer "k8s.io/client-go/informers/apps/v1" @@ -46,6 +47,17 @@ func getBasicVpa() *vpa_types.VerticalPodAutoscaler { return test.VerticalPodAutoscaler().WithContainer("any").Get() } +func getNoopPatchCalculators() []patch.Calculator { + return []patch.Calculator{} +} + +// func getPatchCalculators() []patch.Calculator { +// return []patch.Calculator{ +// patch.NewResourceUpdatesCalculator(inPlaceRecommendationProvider), +// eviction.NewLastInPlaceUpdateCalculator(), +// } +// } + func TestEvictReplicatedByController(t *testing.T) { rc := apiv1.ReplicationController{ ObjectMeta: metav1.ObjectMeta{ @@ -72,6 +84,7 @@ func TestEvictReplicatedByController(t *testing.T) { replicas int32 evictionTolerance float64 vpa *vpa_types.VerticalPodAutoscaler + calculators []patch.Calculator pods []podWithExpectations }{ { @@ -79,6 +92,7 @@ func TestEvictReplicatedByController(t *testing.T) { replicas: 3, evictionTolerance: 0.5, vpa: getBasicVpa(), + calculators: getNoopPatchCalculators(), pods: []podWithExpectations{ { pod: generatePod().Get(), @@ -102,6 +116,7 @@ func TestEvictReplicatedByController(t *testing.T) { replicas: 4, evictionTolerance: 0.5, vpa: getBasicVpa(), + calculators: getNoopPatchCalculators(), pods: []podWithExpectations{ { @@ -131,6 +146,7 @@ func TestEvictReplicatedByController(t *testing.T) { replicas: 4, evictionTolerance: 0.5, vpa: getBasicVpa(), + calculators: getNoopPatchCalculators(), pods: []podWithExpectations{ { pod: generatePod().Get(), @@ -154,6 +170,7 @@ func TestEvictReplicatedByController(t *testing.T) { replicas: 3, evictionTolerance: 0.1, vpa: getBasicVpa(), + calculators: getNoopPatchCalculators(), pods: []podWithExpectations{ { pod: generatePod().Get(), @@ -177,6 +194,7 @@ func TestEvictReplicatedByController(t *testing.T) { replicas: 3, evictionTolerance: 0.1, vpa: getBasicVpa(), + calculators: getNoopPatchCalculators(), pods: []podWithExpectations{ { pod: generatePod().Get(), @@ -195,6 +213,7 @@ func TestEvictReplicatedByController(t *testing.T) { replicas: 3, evictionTolerance: 0.5, vpa: getBasicVpa(), + calculators: getNoopPatchCalculators(), pods: []podWithExpectations{ { pod: generatePod().Get(), @@ -218,6 +237,7 @@ func TestEvictReplicatedByController(t *testing.T) { replicas: 4, evictionTolerance: 0.5, vpa: getBasicVpa(), + calculators: getNoopPatchCalculators(), pods: []podWithExpectations{ { pod: generatePod().Get(), @@ -246,6 +266,7 @@ func TestEvictReplicatedByController(t *testing.T) { replicas: 1, evictionTolerance: 0.5, vpa: getBasicVpa(), + calculators: getNoopPatchCalculators(), pods: []podWithExpectations{ { pod: generatePod().Get(), @@ -259,6 +280,7 @@ func TestEvictReplicatedByController(t *testing.T) { replicas: 1, evictionTolerance: 0.5, vpa: vpaSingleReplica, + calculators: getNoopPatchCalculators(), pods: []podWithExpectations{ { pod: generatePod().Get(), @@ -278,7 +300,7 @@ func TestEvictReplicatedByController(t *testing.T) { pods = append(pods, p.pod) } factory, _ := getEvictionRestrictionFactory(&rc, nil, nil, nil, 2, testCase.evictionTolerance) - eviction := factory.NewPodsEvictionRestriction(pods, testCase.vpa) + eviction := factory.NewPodsEvictionRestriction(pods, testCase.vpa, testCase.calculators) for i, p := range testCase.pods { assert.Equalf(t, p.canEvict, eviction.CanEvict(p.pod), "TC %v - unexpected CanEvict result for pod-%v %#v", testCase.name, i, p.pod) } @@ -317,7 +339,7 @@ func TestEvictReplicatedByReplicaSet(t *testing.T) { basicVpa := getBasicVpa() factory, _ := getEvictionRestrictionFactory(nil, &rs, nil, nil, 2, 0.5) - eviction := factory.NewPodsEvictionRestriction(pods, basicVpa) + eviction := factory.NewPodsEvictionRestriction(pods, basicVpa, getNoopPatchCalculators()) for _, pod := range pods { assert.True(t, eviction.CanEvict(pod)) @@ -357,7 +379,7 @@ func TestEvictReplicatedByStatefulSet(t *testing.T) { basicVpa := getBasicVpa() factory, _ := getEvictionRestrictionFactory(nil, nil, &ss, nil, 2, 0.5) - eviction := factory.NewPodsEvictionRestriction(pods, basicVpa) + eviction := factory.NewPodsEvictionRestriction(pods, basicVpa, getNoopPatchCalculators()) for _, pod := range pods { assert.True(t, eviction.CanEvict(pod)) @@ -396,7 +418,7 @@ func TestEvictReplicatedByDaemonSet(t *testing.T) { basicVpa := getBasicVpa() factory, _ := getEvictionRestrictionFactory(nil, nil, nil, &ds, 2, 0.5) - eviction := factory.NewPodsEvictionRestriction(pods, basicVpa) + eviction := factory.NewPodsEvictionRestriction(pods, basicVpa, getNoopPatchCalculators()) for _, pod := range pods { assert.True(t, eviction.CanEvict(pod)) @@ -432,7 +454,7 @@ func TestEvictReplicatedByJob(t *testing.T) { basicVpa := getBasicVpa() factory, _ := getEvictionRestrictionFactory(nil, nil, nil, nil, 2, 0.5) - eviction := factory.NewPodsEvictionRestriction(pods, basicVpa) + eviction := factory.NewPodsEvictionRestriction(pods, basicVpa, getNoopPatchCalculators()) for _, pod := range pods { assert.True(t, eviction.CanEvict(pod)) @@ -472,7 +494,7 @@ func TestEvictTooFewReplicas(t *testing.T) { basicVpa := getBasicVpa() factory, _ := getEvictionRestrictionFactory(&rc, nil, nil, nil, 10, 0.5) - eviction := factory.NewPodsEvictionRestriction(pods, basicVpa) + eviction := factory.NewPodsEvictionRestriction(pods, basicVpa, getNoopPatchCalculators()) for _, pod := range pods { assert.False(t, eviction.CanEvict(pod)) @@ -509,7 +531,7 @@ func TestEvictionTolerance(t *testing.T) { basicVpa := getBasicVpa() factory, _ := getEvictionRestrictionFactory(&rc, nil, nil, nil, 2 /*minReplicas*/, tolerance) - eviction := factory.NewPodsEvictionRestriction(pods, basicVpa) + eviction := factory.NewPodsEvictionRestriction(pods, basicVpa, getNoopPatchCalculators()) for _, pod := range pods { assert.True(t, eviction.CanEvict(pod)) @@ -550,7 +572,7 @@ func TestEvictAtLeastOne(t *testing.T) { basicVpa := getBasicVpa() factory, _ := getEvictionRestrictionFactory(&rc, nil, nil, nil, 2, tolerance) - eviction := factory.NewPodsEvictionRestriction(pods, basicVpa) + eviction := factory.NewPodsEvictionRestriction(pods, basicVpa, getNoopPatchCalculators()) for _, pod := range pods { assert.True(t, eviction.CanEvict(pod)) @@ -590,6 +612,7 @@ func TestEvictEmitEvent(t *testing.T) { replicas int32 evictionTolerance float64 vpa *vpa_types.VerticalPodAutoscaler + calculators []patch.Calculator pods []podWithExpectations errorExpected bool }{ @@ -598,6 +621,7 @@ func TestEvictEmitEvent(t *testing.T) { replicas: 4, evictionTolerance: 0.5, vpa: basicVpa, + calculators: getNoopPatchCalculators(), pods: []podWithExpectations{ { pod: generatePod().WithPhase(apiv1.PodPending).Get(), @@ -617,6 +641,7 @@ func TestEvictEmitEvent(t *testing.T) { replicas: 4, evictionTolerance: 0.5, vpa: basicVpa, + calculators: getNoopPatchCalculators(), pods: []podWithExpectations{ { @@ -638,7 +663,7 @@ func TestEvictEmitEvent(t *testing.T) { pods = append(pods, p.pod) } factory, _ := getEvictionRestrictionFactory(&rc, nil, nil, nil, 2, testCase.evictionTolerance) - eviction := factory.NewPodsEvictionRestriction(pods, testCase.vpa) + eviction := factory.NewPodsEvictionRestriction(pods, testCase.vpa, testCase.calculators) for _, p := range testCase.pods { mockRecorder := test.MockEventRecorder() diff --git a/vertical-pod-autoscaler/pkg/updater/logic/updater.go b/vertical-pod-autoscaler/pkg/updater/logic/updater.go index 9a9aa80ff17..c2f1734f142 100644 --- a/vertical-pod-autoscaler/pkg/updater/logic/updater.go +++ b/vertical-pod-autoscaler/pkg/updater/logic/updater.go @@ -46,7 +46,6 @@ import ( controllerfetcher "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/target/controller_fetcher" "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/updater/eviction" "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/updater/priority" - "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/annotations" metrics_updater "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/metrics/updater" "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/status" vpa_api_util "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/vpa" @@ -64,20 +63,21 @@ type Updater interface { } type updater struct { - vpaLister vpa_lister.VerticalPodAutoscalerLister - podLister v1lister.PodLister - eventRecorder record.EventRecorder - evictionFactory eviction.PodsEvictionRestrictionFactory - recommendationProcessor vpa_api_util.RecommendationProcessor - evictionAdmission priority.PodEvictionAdmission - priorityProcessor priority.PriorityProcessor - evictionRateLimiter *rate.Limiter - selectorFetcher target.VpaTargetSelectorFetcher - useAdmissionControllerStatus bool - statusValidator status.Validator - controllerFetcher controllerfetcher.ControllerFetcher - ignoredNamespaces []string - patchCalculators []patch.Calculator + vpaLister vpa_lister.VerticalPodAutoscalerLister + podLister v1lister.PodLister + eventRecorder record.EventRecorder + evictionFactory eviction.PodsEvictionRestrictionFactory + recommendationProcessor vpa_api_util.RecommendationProcessor + evictionAdmission priority.PodEvictionAdmission + priorityProcessor priority.PriorityProcessor + evictionRateLimiter *rate.Limiter + selectorFetcher target.VpaTargetSelectorFetcher + useAdmissionControllerStatus bool + statusValidator status.Validator + controllerFetcher controllerfetcher.ControllerFetcher + ignoredNamespaces []string + patchCalculators []patch.Calculator + lastInPlaceUpdateAttemptTimeMap map[string]time.Time } // NewUpdater creates Updater with given configuration @@ -121,8 +121,9 @@ func NewUpdater( status.AdmissionControllerStatusName, statusNamespace, ), - ignoredNamespaces: ignoredNamespaces, - patchCalculators: patchCalculators, + ignoredNamespaces: ignoredNamespaces, + patchCalculators: patchCalculators, + lastInPlaceUpdateAttemptTimeMap: make(map[string]time.Time), }, nil } @@ -255,6 +256,8 @@ func (u *updater) RunOnce(ctx context.Context) { if prioritizedPod.IsDisruptionless() { klog.Infof("Not falling back to eviction, %v was supposed to be disruptionless", pod.Name) continue + } else { + klog.V(4).Infof("Falling back to eviction for %s", pod.Name) } } else { klog.Infof("Not falling back to eviction, because we don't have a recommendation yet? %v", pod.Name) @@ -290,18 +293,18 @@ func (u *updater) RunOnce(ctx context.Context) { timer.ObserveStep("EvictPods") } -// VpaReommendationProvided checks the VPA status to see if it has provided a recommendation yet. Used +// VpaRecommendationProvided checks the VPA status to see if it has provided a recommendation yet. Used // to make sure we don't get bogus values for in-place scaling // TODO(jkyros): take this out when you find the proper place to gate this -func VpaReommendationProvided(vpa *vpa_types.VerticalPodAutoscaler) bool { - for _, condition := range vpa.Status.Conditions { - - if condition.Type == vpa_types.RecommendationProvided && condition.Status == apiv1.ConditionTrue { - return true - } - } - return false - +func VpaRecommendationProvided(vpa *vpa_types.VerticalPodAutoscaler) bool { + // for _, condition := range vpa.Status.Conditions { + // if condition.Type == vpa_types.RecommendationProvided && condition.Status == apiv1.ConditionTrue { + // return true + // } + // } + // TODO(maxcao13): The above condition doesn't work in tests because sometimes there is no recommender to set this status + // so we should check the recommendation field directly. Or we can set the above condition manually in tests. + return vpa.Status.Recommendation != nil } func getRateLimiter(evictionRateLimit float64, evictionRateLimitBurst int) *rate.Limiter { @@ -387,7 +390,7 @@ func (u *updater) AttemptInPlaceScalingIfPossible(ctx context.Context, vpaSize i // don't hit it (maybe they take too long?). We end up with 0's for resource recommendations because we // queue this for in-place before the VPA has made a recommendation. - if !VpaReommendationProvided(vpa) { + if !VpaRecommendationProvided(vpa) { klog.V(4).Infof("VPA hasn't made a recommendation yet, we're early on %s for some reason", pod.Name) // TODO(jkyros): so we must have had some erroneous evictions before, but we were passing the test suite? But for now if I want to test // in-place I need it to not evict immediately if I can't in-place (because then it will never in-place) @@ -415,34 +418,37 @@ func (u *updater) AttemptInPlaceScalingIfPossible(ctx context.Context, vpaSize i } // get lastInPlaceUpdateTime annotation, if doesn't exist, set it to now - lastUpdateTime, updateErr := annotations.ParseVpaLastInPlaceUpdateTimeValue(pod) - if updateErr != nil { + lastUpdateTime, exists := u.lastInPlaceUpdateAttemptTimeMap[eviction.GetPodID(pod)] + if !exists { + klog.V(4).Infof("In-place update in progress for %s/%s, but no lastInPlaceUpdateTime found, setting it to now", pod.Namespace, pod.Name) lastUpdateTime = time.Now() - klog.V(4).ErrorS(updateErr, "Error parsing lastInPlaceUpdateTime annotation, setting it to now", "pod", pod.Name) + u.lastInPlaceUpdateAttemptTimeMap[eviction.GetPodID(pod)] = lastUpdateTime } - // if currently inPlaceUpdating, we should only fallback to eviction if the update has failed, as in one of the following conditions: + // if currently inPlaceUpdating, we should only fallback to eviction if the update has failed. i.e: one of the following conditions: // 1. .status.resize: Infeasible // 2. .status.resize: Deferred + more than 1 minute has elapsed since the lastInPlaceUpdateTime // 3. .status.resize: InProgress + more than 1 hour has elapsed since the lastInPlaceUpdateTime switch pod.Status.Resize { case apiv1.PodResizeStatusDeferred: if time.Since(lastUpdateTime) > DeferredResizeUpdateTimeout { - klog.V(4).Infof("In-place update deferred for more than &v for %s, falling back to eviction", DeferredResizeUpdateTimeout, pod.Name) + klog.V(4).InfoS(fmt.Sprintf("In-place update deferred for more than %v, falling back to eviction", DeferredResizeUpdateTimeout), "pod", pod.Name) fallBackToEviction = true + } else { + klog.V(4).Infof("In-place update deferred for %s, NOT falling back to eviction yet", pod.Name) } - klog.V(4).Infof("In-place update deferred for %s, NOT falling back to eviction yet", pod.Name) case apiv1.PodResizeStatusInProgress: if time.Since(lastUpdateTime) > InProgressResizeUpdateTimeout { - klog.V(4).Infof("In-place update in progress for more than &v for %s, falling back to eviction", InProgressResizeUpdateTimeout, pod.Name) + klog.V(4).InfoS(fmt.Sprintf("In-place update in progress for more than %v, falling back to eviction", InProgressResizeUpdateTimeout), "pod", pod.Name) fallBackToEviction = true + } else { + klog.V(4).InfoS("In-place update in progress, NOT falling back to eviction yet", "pod", pod.Name) } - klog.V(4).Infof("In-place update in progress for %s, NOT falling back to eviction yet", pod.Name) case apiv1.PodResizeStatusInfeasible: - klog.V(4).Infof("In-place update infeasible for %s, falling back to eviction", pod.Name) + klog.V(4).InfoS("In-place update infeasible, falling back to eviction", "pod", pod.Name) fallBackToEviction = true default: - klog.V(4).Infof("In-place update status unknown for %s: , falling back to eviction", pod.Status.Resize, pod.Name) + klog.V(4).InfoS("In-place update status unknown, falling back to eviction", "pod", pod.Name) fallBackToEviction = true } return @@ -456,7 +462,8 @@ func (u *updater) AttemptInPlaceScalingIfPossible(ctx context.Context, vpaSize i return } - klog.V(2).Infof("updating pod %v", pod.Name) + klog.V(2).Infof("attempting to in-place update pod %v", pod.Name) + u.lastInPlaceUpdateAttemptTimeMap[eviction.GetPodID(pod)] = time.Now() evictErr := evictionLimiter.InPlaceUpdate(pod, vpa, u.eventRecorder) if evictErr != nil { klog.Warningf("updating pod %v failed: %v", pod.Name, evictErr) diff --git a/vertical-pod-autoscaler/pkg/updater/logic/updater_test.go b/vertical-pod-autoscaler/pkg/updater/logic/updater_test.go index b5169c573bf..a6299d8f703 100644 --- a/vertical-pod-autoscaler/pkg/updater/logic/updater_test.go +++ b/vertical-pod-autoscaler/pkg/updater/logic/updater_test.go @@ -34,7 +34,7 @@ import ( "k8s.io/apimachinery/pkg/runtime" "k8s.io/client-go/kubernetes/fake" - corev1 "k8s.io/api/core/v1" + "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/patch" vpa_types "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1" controllerfetcher "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/target/controller_fetcher" target_mock "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/target/mock" @@ -187,7 +187,7 @@ func testRunOnceBase( WithMinAllowed(containerName, "1", "100M"). WithMaxAllowed(containerName, "3", "1G"). WithTargetRef(targetRef). - AppendCondition(vpa_types.RecommendationProvided, corev1.ConditionTrue, "reason", "msg", time.Unix(0, 0)).Get() + AppendCondition(vpa_types.RecommendationProvided, apiv1.ConditionTrue, "reason", "msg", time.Unix(0, 0)).Get() vpaObj.Spec.UpdatePolicy = &vpa_types.PodUpdatePolicy{UpdateMode: &updateMode} vpaLister.On("List").Return([]*vpa_types.VerticalPodAutoscaler{vpaObj}, nil).Once() @@ -256,7 +256,7 @@ type fakeEvictFactory struct { evict eviction.PodsEvictionRestriction } -func (f fakeEvictFactory) NewPodsEvictionRestriction(pods []*apiv1.Pod, vpa *vpa_types.VerticalPodAutoscaler) eviction.PodsEvictionRestriction { +func (f fakeEvictFactory) NewPodsEvictionRestriction(pods []*apiv1.Pod, vpa *vpa_types.VerticalPodAutoscaler, patchCalculators []patch.Calculator) eviction.PodsEvictionRestriction { return f.evict } @@ -323,6 +323,8 @@ func TestRunOnceIgnoreNamespaceMatchingPods(t *testing.T) { // TOD0(jkyros): I added the recommendationProvided condition here because in-place needs to wait for a // recommendation to scale, causing this test to fail (because in-place checks before eviction, and in-place will // wait to scale -- and not fall back to eviction -- until the VPA has made a recommendation) + // TODO(maxcao13): We can either just add these conditions on every test, or we can change the VpaRecommendationProvided condition check in the code + // which I have already did. Either way should be fine unless vpa.Status.Recommendation != nil does not imply RecommendationProvided condition vpaObj := test.VerticalPodAutoscaler(). WithNamespace("default"). WithContainer(containerName). @@ -330,7 +332,7 @@ func TestRunOnceIgnoreNamespaceMatchingPods(t *testing.T) { WithMinAllowed(containerName, "1", "100M"). WithMaxAllowed(containerName, "3", "1G"). WithTargetRef(targetRef). - AppendCondition(vpa_types.RecommendationProvided, corev1.ConditionTrue, "reason", "msg", time.Unix(0, 0)).Get() + AppendCondition(vpa_types.RecommendationProvided, apiv1.ConditionTrue, "reason", "msg", time.Unix(0, 0)).Get() vpaLister.On("List").Return([]*vpa_types.VerticalPodAutoscaler{vpaObj}, nil).Once() diff --git a/vertical-pod-autoscaler/pkg/updater/main.go b/vertical-pod-autoscaler/pkg/updater/main.go index 5bade7a647d..675e0ee93ea 100644 --- a/vertical-pod-autoscaler/pkg/updater/main.go +++ b/vertical-pod-autoscaler/pkg/updater/main.go @@ -183,9 +183,13 @@ func run(healthCheck *metrics.HealthCheck, commonFlag *common.CommonFlags) { ignoredNamespaces := strings.Split(commonFlag.IgnoredVpaObjectNamespaces, ",") - // in-place recommendation provider inPlaceRecommendationProvider := eviction.NewInPlaceProvider(limitRangeCalculator, vpa_api_util.NewCappingRecommendationProcessor(limitRangeCalculator)) - // resource calculator which will generate patches + + // TODO(maxcao13): figure out if we need to use NewInPlaceUpdatedCalculator; does it help the user to know if their pod was updated in-place as an annotation? + // TODO(maxcao13): also figure out if we should strip down the resourceUpdatesCalculator just for in-place updates into a new calculator + // The use of resourceUpdatesCalculator adds extra unneccessary annotations since it is duplicated in the admission controller + + // calculators := []patch.Calculator{patch.NewResourceUpdatesCalculator(inPlaceRecommendationProvider), eviction.NewInPlaceUpdatedCalculator()} calculators := []patch.Calculator{patch.NewResourceUpdatesCalculator(inPlaceRecommendationProvider)} // TODO: use SharedInformerFactory in updater diff --git a/vertical-pod-autoscaler/pkg/utils/annotations/vpa_inplace_update.go b/vertical-pod-autoscaler/pkg/utils/annotations/vpa_inplace_update.go new file mode 100644 index 00000000000..fde9960dc85 --- /dev/null +++ b/vertical-pod-autoscaler/pkg/utils/annotations/vpa_inplace_update.go @@ -0,0 +1,32 @@ +/* +Copyright 2020 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package annotations + +const ( + // VpaInPlaceUpdatedLabel is a label used by the vpa last in place update annotation. + VpaInPlaceUpdatedLabel = "vpaInPlaceUpdated" +) + +// GetVpaInPlaceUpdatedValue creates an annotation value for a given pod. +func GetVpaInPlaceUpdatedValue() string { + return "vpaInPlaceUpdated" +} + +// ParseVpaInPlaceUpdatedValue returns last in place update time +// func ParseVpaInPlaceUpdatedValue() error { +// return nil +// } diff --git a/vertical-pod-autoscaler/pkg/utils/annotations/vpa_last_inplace_update.go b/vertical-pod-autoscaler/pkg/utils/annotations/vpa_last_inplace_update.go deleted file mode 100644 index a130bfdf28c..00000000000 --- a/vertical-pod-autoscaler/pkg/utils/annotations/vpa_last_inplace_update.go +++ /dev/null @@ -1,53 +0,0 @@ -/* -Copyright 2020 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package annotations - -import ( - "fmt" - "time" - - apiv1 "k8s.io/api/core/v1" -) - -const ( - // VpaLastInPlaceUpdateTime is a label used by the vpa last in place update annotation. - VpaLastInPlaceUpdateTime = "vpaLastInPlaceUpdateTime" -) - -// GetVpaLastInPlaceUpdatedTimeValue creates an annotation value for a given pod. -func GetVpaLastInPlaceUpdatedTimeValue(lastUpdated time.Time) string { - // return string version of v1.Time - return lastUpdated.Format(time.RFC3339) -} - -// ParseVpaLastInPlaceUpdateTimeValue returns the last in-place update time from a pod annotation. -func ParseVpaLastInPlaceUpdateTimeValue(pod *apiv1.Pod) (time.Time, error) { - if pod == nil { - return time.Time{}, fmt.Errorf("pod is nil") - } - - if pod.Annotations == nil { - return time.Time{}, fmt.Errorf("pod.Annotations is nil") - } - - lastInPlaceUpdateTime, ok := pod.Annotations[VpaLastInPlaceUpdateTime] - if !ok { - return time.Time{}, fmt.Errorf("pod.Annotations[VpaLastInPlaceUpdateTime] is nil") - } - - return time.Parse(time.RFC3339, lastInPlaceUpdateTime) -} diff --git a/vertical-pod-autoscaler/pkg/utils/test/test_utils.go b/vertical-pod-autoscaler/pkg/utils/test/test_utils.go index f96612013c5..3d282f5a073 100644 --- a/vertical-pod-autoscaler/pkg/utils/test/test_utils.go +++ b/vertical-pod-autoscaler/pkg/utils/test/test_utils.go @@ -122,7 +122,7 @@ func (m *PodsEvictionRestrictionMock) CanEvict(pod *apiv1.Pod) bool { } // InPlaceUpdate is a mock implementation of PodsEvictionRestriction.InPlaceUpdate -func (m *PodsEvictionRestrictionMock) InPlaceUpdate(pod *apiv1.Pod, eventRecorder record.EventRecorder) error { +func (m *PodsEvictionRestrictionMock) InPlaceUpdate(pod *apiv1.Pod, vpa *vpa_types.VerticalPodAutoscaler, eventRecorder record.EventRecorder) error { args := m.Called(pod, eventRecorder) return args.Error(0) } From 40ad8c2f3624997003b7ada626a08ba2c00f370b Mon Sep 17 00:00:00 2001 From: Max Cao Date: Thu, 2 Jan 2025 16:51:11 -0800 Subject: [PATCH 25/26] fixup! VPA: add in-place vertical scaling actuation tests VPA: remove resolved comments and cleanup stale code Signed-off-by: Max Cao --- .../pkg/admission-controller/resource/pod/handler.go | 2 -- .../pkg/updater/eviction/inplace_recommendation_provider.go | 3 ++- .../pkg/updater/eviction/pods_eviction_restriction.go | 1 + vertical-pod-autoscaler/pkg/updater/logic/updater.go | 2 -- vertical-pod-autoscaler/pkg/updater/main.go | 4 ++-- .../pkg/updater/priority/update_priority_calculator.go | 2 +- .../pkg/utils/annotations/vpa_inplace_update.go | 5 ----- 7 files changed, 6 insertions(+), 13 deletions(-) diff --git a/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/handler.go b/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/handler.go index 13c39e40380..7d8e704f743 100644 --- a/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/handler.go +++ b/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/handler.go @@ -77,7 +77,6 @@ func (h *resourceHandler) GetPatches(ctx context.Context, ar *admissionv1.Admiss pod.Name = pod.GenerateName + "%" pod.Namespace = namespace } - klog.V(4).InfoS("Admitting pod", "pod", klog.KObj(&pod)) controllingVpa := h.vpaMatcher.GetMatchingVPA(ctx, &pod) if controllingVpa == nil { @@ -93,7 +92,6 @@ func (h *resourceHandler) GetPatches(ctx context.Context, ar *admissionv1.Admiss if pod.Annotations == nil { patches = append(patches, patch.GetAddEmptyAnnotationsPatch()) } - for _, c := range h.patchCalculators { partialPatches, err := c.CalculatePatches(&pod, controllingVpa) if err != nil { diff --git a/vertical-pod-autoscaler/pkg/updater/eviction/inplace_recommendation_provider.go b/vertical-pod-autoscaler/pkg/updater/eviction/inplace_recommendation_provider.go index dfbb0071b78..f8d62af76ba 100644 --- a/vertical-pod-autoscaler/pkg/updater/eviction/inplace_recommendation_provider.go +++ b/vertical-pod-autoscaler/pkg/updater/eviction/inplace_recommendation_provider.go @@ -33,7 +33,7 @@ type inPlaceRecommendationProvider struct { recommendationProcessor vpa_api_util.RecommendationProcessor } -// NewProvider constructs the recommendation provider that can be used to determine recommendations for pods. +// NewInPlaceProvider constructs the recommendation provider that can be used to determine recommendations for pods. func NewInPlaceProvider(calculator limitrange.LimitRangeCalculator, recommendationProcessor vpa_api_util.RecommendationProcessor) recommendation.Provider { return &inPlaceRecommendationProvider{ @@ -43,6 +43,7 @@ func NewInPlaceProvider(calculator limitrange.LimitRangeCalculator, } // TODO(maxcao13): Strip down function to remove unnecessary stuff, or refactor so that it can be used in the admission controller as well + // GetContainersResources returns the recommended resources for each container in the given pod in the same order they are specified in the pod.Spec. // If addAll is set to true, containers w/o a recommendation are also added to the list (and their non-recommended requests and limits will always be preserved if present), // otherwise they're skipped (default behaviour). diff --git a/vertical-pod-autoscaler/pkg/updater/eviction/pods_eviction_restriction.go b/vertical-pod-autoscaler/pkg/updater/eviction/pods_eviction_restriction.go index 725f0bc45d3..b8d048be6ba 100644 --- a/vertical-pod-autoscaler/pkg/updater/eviction/pods_eviction_restriction.go +++ b/vertical-pod-autoscaler/pkg/updater/eviction/pods_eviction_restriction.go @@ -321,6 +321,7 @@ func getPodReplicaCreator(pod *apiv1.Pod) (*podReplicaCreator, error) { return podReplicaCreator, nil } +// GetPodID returns a string that uniquely identifies a pod by namespace and name func GetPodID(pod *apiv1.Pod) string { if pod == nil { return "" diff --git a/vertical-pod-autoscaler/pkg/updater/logic/updater.go b/vertical-pod-autoscaler/pkg/updater/logic/updater.go index c2f1734f142..98950928a09 100644 --- a/vertical-pod-autoscaler/pkg/updater/logic/updater.go +++ b/vertical-pod-autoscaler/pkg/updater/logic/updater.go @@ -243,8 +243,6 @@ func (u *updater) RunOnce(ctx context.Context) { pod := prioritizedPod.Pod() - // somewhere we should check if inplace pods actually need scaling or not, and if they will be disruptionless - // according to AEP: fallBackToEviction, err := u.AttemptInPlaceScalingIfPossible(ctx, vpaSize, vpa, pod, evictionLimiter, vpasWithInPlaceUpdatablePodsCounter, vpasWithInPlaceUpdatedPodsCounter) if err != nil { klog.Warningf("error attemptng to scale pod %v in-place: %v", pod.Name, err) diff --git a/vertical-pod-autoscaler/pkg/updater/main.go b/vertical-pod-autoscaler/pkg/updater/main.go index 675e0ee93ea..78e3707f53d 100644 --- a/vertical-pod-autoscaler/pkg/updater/main.go +++ b/vertical-pod-autoscaler/pkg/updater/main.go @@ -90,7 +90,7 @@ func main() { componentbaseoptions.BindLeaderElectionFlags(&leaderElection, pflag.CommandLine) kube_flag.InitFlags() - klog.V(1).InfoS("Vertical Pod Autoscaler Updater FOR INPALCE!!!!!!", "version", common.VerticalPodAutoscalerVersion) + klog.V(1).InfoS("Vertical Pod Autoscaler Updater", "version", common.VerticalPodAutoscalerVersion) if len(commonFlags.VpaObjectNamespace) > 0 && len(commonFlags.IgnoredVpaObjectNamespaces) > 0 { klog.Fatalf("--vpa-object-namespace and --ignored-vpa-object-namespaces are mutually exclusive and can't be set together.") @@ -187,7 +187,7 @@ func run(healthCheck *metrics.HealthCheck, commonFlag *common.CommonFlags) { // TODO(maxcao13): figure out if we need to use NewInPlaceUpdatedCalculator; does it help the user to know if their pod was updated in-place as an annotation? // TODO(maxcao13): also figure out if we should strip down the resourceUpdatesCalculator just for in-place updates into a new calculator - // The use of resourceUpdatesCalculator adds extra unneccessary annotations since it is duplicated in the admission controller + // The use of resourceUpdatesCalculator adds extra unnecessary annotations since it is duplicated in the admission controller // calculators := []patch.Calculator{patch.NewResourceUpdatesCalculator(inPlaceRecommendationProvider), eviction.NewInPlaceUpdatedCalculator()} calculators := []patch.Calculator{patch.NewResourceUpdatesCalculator(inPlaceRecommendationProvider)} diff --git a/vertical-pod-autoscaler/pkg/updater/priority/update_priority_calculator.go b/vertical-pod-autoscaler/pkg/updater/priority/update_priority_calculator.go index aa563a5a5e8..aafd33d98cd 100644 --- a/vertical-pod-autoscaler/pkg/updater/priority/update_priority_calculator.go +++ b/vertical-pod-autoscaler/pkg/updater/priority/update_priority_calculator.go @@ -129,7 +129,7 @@ func (calc *UpdatePriorityCalculator) AddPod(pod *apiv1.Pod, now time.Time) { return } // TODO(maxcao13): hopefully this doesn't break anything but we switch the order so that significant change is checked first before lifetime - // this way we don't in-place scale it for insignificant change, else we would will mark it disruptionless and still have an in-place update + // this way we don't in-place scale it for insignificant change, else we would mark it disruptionless and still have an in-place update if updatePriority.ResourceDiff < calc.config.MinChangePriority { klog.V(4).InfoS("Not updating pod, resource diff too low", "pod", klog.KObj(pod), "updatePriority", updatePriority) return diff --git a/vertical-pod-autoscaler/pkg/utils/annotations/vpa_inplace_update.go b/vertical-pod-autoscaler/pkg/utils/annotations/vpa_inplace_update.go index fde9960dc85..441de7cb88b 100644 --- a/vertical-pod-autoscaler/pkg/utils/annotations/vpa_inplace_update.go +++ b/vertical-pod-autoscaler/pkg/utils/annotations/vpa_inplace_update.go @@ -25,8 +25,3 @@ const ( func GetVpaInPlaceUpdatedValue() string { return "vpaInPlaceUpdated" } - -// ParseVpaInPlaceUpdatedValue returns last in place update time -// func ParseVpaInPlaceUpdatedValue() error { -// return nil -// } From 7de6fa3efbba923d915d8a5e71de9a211282832c Mon Sep 17 00:00:00 2001 From: Max Cao Date: Mon, 6 Jan 2025 19:02:45 -0800 Subject: [PATCH 26/26] fixup! Allow VPA updater to actuate recommendations in-place Signed-off-by: Max Cao --- vertical-pod-autoscaler/deploy/vpa-rbac.yaml | 1 + vertical-pod-autoscaler/e2e/v1/common.go | 2 + .../resource/pod/patch/resource_updates.go | 12 +- .../inplace_recommendation_provider.go | 138 ------------------ .../eviction/pods_eviction_restriction.go | 100 +++++++------ .../inplace_recommendation_provider.go | 83 +++++++++++ .../inplace_updated.go} | 2 +- .../pkg/updater/inplace/resource_updates.go | 83 +++++++++++ .../pkg/updater/logic/updater.go | 1 - vertical-pod-autoscaler/pkg/updater/main.go | 10 +- .../utils/annotations/vpa_inplace_update.go | 4 +- 11 files changed, 238 insertions(+), 198 deletions(-) delete mode 100644 vertical-pod-autoscaler/pkg/updater/eviction/inplace_recommendation_provider.go create mode 100644 vertical-pod-autoscaler/pkg/updater/inplace/inplace_recommendation_provider.go rename vertical-pod-autoscaler/pkg/updater/{eviction/last_inplace_update.go => inplace/inplace_updated.go} (98%) create mode 100644 vertical-pod-autoscaler/pkg/updater/inplace/resource_updates.go diff --git a/vertical-pod-autoscaler/deploy/vpa-rbac.yaml b/vertical-pod-autoscaler/deploy/vpa-rbac.yaml index fc1c04154e7..6ca91b7b5c7 100644 --- a/vertical-pod-autoscaler/deploy/vpa-rbac.yaml +++ b/vertical-pod-autoscaler/deploy/vpa-rbac.yaml @@ -129,6 +129,7 @@ rules: - "" resources: - pods/resize + - pods verbs: - patch --- diff --git a/vertical-pod-autoscaler/e2e/v1/common.go b/vertical-pod-autoscaler/e2e/v1/common.go index 3eaf89bd9a8..4ad73d303d3 100644 --- a/vertical-pod-autoscaler/e2e/v1/common.go +++ b/vertical-pod-autoscaler/e2e/v1/common.go @@ -455,6 +455,8 @@ func CheckNoPodsEvicted(f *framework.Framework, initialPodSet PodSet) { gomega.Expect(restarted).To(gomega.Equal(0), "there should be no pod evictions") } +// CheckNoContainersRestarted waits for long enough period for VPA to start +// updating containers in-place and checks that no containers were restarted. func CheckNoContainersRestarted(f *framework.Framework) { var foundContainerRestarts int32 time.Sleep(VpaEvictionTimeout) diff --git a/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/patch/resource_updates.go b/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/patch/resource_updates.go index cddec1aa83f..7310ccc82ac 100644 --- a/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/patch/resource_updates.go +++ b/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/patch/resource_updates.go @@ -77,7 +77,7 @@ func getContainerPatch(pod *core.Pod, i int, annotationsPerContainer vpa_api_uti // Add empty resources object if missing. if pod.Spec.Containers[i].Resources.Limits == nil && pod.Spec.Containers[i].Resources.Requests == nil { - patches = append(patches, getPatchInitializingEmptyResources(i)) + patches = append(patches, GetPatchInitializingEmptyResources(i)) } annotations, found := annotationsPerContainer[pod.Spec.Containers[i].Name] @@ -95,23 +95,23 @@ func getContainerPatch(pod *core.Pod, i int, annotationsPerContainer vpa_api_uti func appendPatchesAndAnnotations(patches []resource_admission.PatchRecord, annotations []string, current core.ResourceList, containerIndex int, resources core.ResourceList, fieldName, resourceName string) ([]resource_admission.PatchRecord, []string) { // Add empty object if it's missing and we're about to fill it. if current == nil && len(resources) > 0 { - patches = append(patches, getPatchInitializingEmptyResourcesSubfield(containerIndex, fieldName)) + patches = append(patches, GetPatchInitializingEmptyResourcesSubfield(containerIndex, fieldName)) } for resource, request := range resources { - patches = append(patches, getAddResourceRequirementValuePatch(containerIndex, fieldName, resource, request)) + patches = append(patches, GetAddResourceRequirementValuePatch(containerIndex, fieldName, resource, request)) annotations = append(annotations, fmt.Sprintf("%s %s", resource, resourceName)) } return patches, annotations } -func getAddResourceRequirementValuePatch(i int, kind string, resource core.ResourceName, quantity resource.Quantity) resource_admission.PatchRecord { +func GetAddResourceRequirementValuePatch(i int, kind string, resource core.ResourceName, quantity resource.Quantity) resource_admission.PatchRecord { return resource_admission.PatchRecord{ Op: "add", Path: fmt.Sprintf("/spec/containers/%d/resources/%s/%s", i, kind, resource), Value: quantity.String()} } -func getPatchInitializingEmptyResources(i int) resource_admission.PatchRecord { +func GetPatchInitializingEmptyResources(i int) resource_admission.PatchRecord { return resource_admission.PatchRecord{ Op: "add", Path: fmt.Sprintf("/spec/containers/%d/resources", i), @@ -119,7 +119,7 @@ func getPatchInitializingEmptyResources(i int) resource_admission.PatchRecord { } } -func getPatchInitializingEmptyResourcesSubfield(i int, kind string) resource_admission.PatchRecord { +func GetPatchInitializingEmptyResourcesSubfield(i int, kind string) resource_admission.PatchRecord { return resource_admission.PatchRecord{ Op: "add", Path: fmt.Sprintf("/spec/containers/%d/resources/%s", i, kind), diff --git a/vertical-pod-autoscaler/pkg/updater/eviction/inplace_recommendation_provider.go b/vertical-pod-autoscaler/pkg/updater/eviction/inplace_recommendation_provider.go deleted file mode 100644 index f8d62af76ba..00000000000 --- a/vertical-pod-autoscaler/pkg/updater/eviction/inplace_recommendation_provider.go +++ /dev/null @@ -1,138 +0,0 @@ -/* -Copyright 2024 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package eviction - -import ( - "fmt" - - core "k8s.io/api/core/v1" - "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/recommendation" - "k8s.io/klog/v2" - - vpa_types "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1" - "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/limitrange" - vpa_api_util "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/vpa" -) - -type inPlaceRecommendationProvider struct { - limitsRangeCalculator limitrange.LimitRangeCalculator - recommendationProcessor vpa_api_util.RecommendationProcessor -} - -// NewInPlaceProvider constructs the recommendation provider that can be used to determine recommendations for pods. -func NewInPlaceProvider(calculator limitrange.LimitRangeCalculator, - recommendationProcessor vpa_api_util.RecommendationProcessor) recommendation.Provider { - return &inPlaceRecommendationProvider{ - limitsRangeCalculator: calculator, - recommendationProcessor: recommendationProcessor, - } -} - -// TODO(maxcao13): Strip down function to remove unnecessary stuff, or refactor so that it can be used in the admission controller as well - -// GetContainersResources returns the recommended resources for each container in the given pod in the same order they are specified in the pod.Spec. -// If addAll is set to true, containers w/o a recommendation are also added to the list (and their non-recommended requests and limits will always be preserved if present), -// otherwise they're skipped (default behaviour). -func GetContainersResources(pod *core.Pod, vpaResourcePolicy *vpa_types.PodResourcePolicy, podRecommendation vpa_types.RecommendedPodResources, limitRange *core.LimitRangeItem, - addAll bool) []vpa_api_util.ContainerResources { - resources := make([]vpa_api_util.ContainerResources, len(pod.Spec.Containers)) - for i, container := range pod.Spec.Containers { - recommendation := vpa_api_util.GetRecommendationForContainer(container.Name, &podRecommendation) - if recommendation == nil { - if !addAll { - klog.V(2).InfoS("No recommendation found for container, skipping", "container", container.Name) - continue - } - klog.V(2).InfoS("No match found for container, using Pod request", "container", container.Name) - resources[i].Requests = container.Resources.Requests - } else { - resources[i].Requests = recommendation.Target - } - defaultLimit := core.ResourceList{} - if limitRange != nil { - defaultLimit = limitRange.Default - } - containerControlledValues := vpa_api_util.GetContainerControlledValues(container.Name, vpaResourcePolicy) - if containerControlledValues == vpa_types.ContainerControlledValuesRequestsAndLimits { - proportionalLimits, _ := vpa_api_util.GetProportionalLimit(container.Resources.Limits, container.Resources.Requests, resources[i].Requests, defaultLimit) - if proportionalLimits != nil { - resources[i].Limits = proportionalLimits - } - } - // If the recommendation only contains CPU or Memory (if the VPA was configured this way), we need to make sure we "backfill" the other. - // Only do this when the addAll flag is true. - if addAll { - cpuRequest, hasCpuRequest := container.Resources.Requests[core.ResourceCPU] - if _, ok := resources[i].Requests[core.ResourceCPU]; !ok && hasCpuRequest { - resources[i].Requests[core.ResourceCPU] = cpuRequest - } - memRequest, hasMemRequest := container.Resources.Requests[core.ResourceMemory] - if _, ok := resources[i].Requests[core.ResourceMemory]; !ok && hasMemRequest { - resources[i].Requests[core.ResourceMemory] = memRequest - } - cpuLimit, hasCpuLimit := container.Resources.Limits[core.ResourceCPU] - if _, ok := resources[i].Limits[core.ResourceCPU]; !ok && hasCpuLimit { - resources[i].Limits[core.ResourceCPU] = cpuLimit - } - memLimit, hasMemLimit := container.Resources.Limits[core.ResourceMemory] - if _, ok := resources[i].Limits[core.ResourceMemory]; !ok && hasMemLimit { - resources[i].Limits[core.ResourceMemory] = memLimit - } - } - } - return resources -} - -// GetContainersResourcesForPod returns recommended request for a given pod and associated annotations. -// The returned slice corresponds 1-1 to containers in the Pod. -func (p *inPlaceRecommendationProvider) GetContainersResourcesForPod(pod *core.Pod, vpa *vpa_types.VerticalPodAutoscaler) ([]vpa_api_util.ContainerResources, vpa_api_util.ContainerToAnnotationsMap, error) { - if vpa == nil || pod == nil { - klog.V(2).InfoS("Can't calculate recommendations, one of VPA or Pod is nil", "vpa", vpa, "pod", pod) - return nil, nil, nil - } - klog.V(2).InfoS("Updating requirements for pod", "pod", pod.Name) - - recommendedPodResources := &vpa_types.RecommendedPodResources{} - - if vpa.Status.Recommendation != nil { - var err error - // ignore annotations as they are not used in the in-place update - recommendedPodResources, _, err = p.recommendationProcessor.Apply(vpa, pod) - if err != nil { - klog.V(2).InfoS("Cannot process recommendation for pod", "pod", klog.KObj(pod)) - return nil, nil, err - } - } - containerLimitRange, err := p.limitsRangeCalculator.GetContainerLimitRangeItem(pod.Namespace) - if err != nil { - return nil, nil, fmt.Errorf("error getting containerLimitRange: %s", err) - } - var resourcePolicy *vpa_types.PodResourcePolicy - if vpa.Spec.UpdatePolicy == nil || vpa.Spec.UpdatePolicy.UpdateMode == nil || *vpa.Spec.UpdatePolicy.UpdateMode != vpa_types.UpdateModeOff { - resourcePolicy = vpa.Spec.ResourcePolicy - } - containerResources := GetContainersResources(pod, resourcePolicy, *recommendedPodResources, containerLimitRange, false) - - // Ensure that we are not propagating empty resource key if any. - for _, resource := range containerResources { - if resource.RemoveEmptyResourceKeyIfAny() { - klog.InfoS("An empty resource key was found and purged", "pod", klog.KObj(pod), "vpa", klog.KObj(vpa)) - } - } - - return containerResources, nil, nil -} diff --git a/vertical-pod-autoscaler/pkg/updater/eviction/pods_eviction_restriction.go b/vertical-pod-autoscaler/pkg/updater/eviction/pods_eviction_restriction.go index b8d048be6ba..e553907b972 100644 --- a/vertical-pod-autoscaler/pkg/updater/eviction/pods_eviction_restriction.go +++ b/vertical-pod-autoscaler/pkg/updater/eviction/pods_eviction_restriction.go @@ -291,12 +291,9 @@ func (f *podsEvictionRestrictionFactoryImpl) NewPodsEvictionRestriction(pods []* } if IsInPlaceUpdating(pod) { singleGroup.inPlaceUpdating = singleGroup.inPlaceUpdating + 1 - } } singleGroup.running = len(replicas) - singleGroup.pending - - // This has to happen last, singlegroup never gets returned, only this does creatorToSingleGroupStatsMap[creator] = singleGroup } @@ -460,8 +457,6 @@ func (e *podsEvictionRestrictionImpl) CanInPlaceUpdate(pod *apiv1.Pod) bool { return false } - // TODO(jkyros): is there a pod-level thing we can use? - // Go through each container, and check to see if this is going to cause a disruption or not noRestartPoliciesPopulated := true for _, container := range pod.Spec.Containers { @@ -498,18 +493,31 @@ func (e *podsEvictionRestrictionImpl) CanInPlaceUpdate(pod *apiv1.Pod) bool { return false } // This second "present" check is against the creator-to-group-stats map, not the pod-to-replica map + // TODO(maxcao13): Not sure, but do we need disruption tolerance for in-place updates? I guess we do since they are not guaranteed to be disruptionless... + // TODO(maxcao13): If this is okay, I may have to rename evictionTolerance to disruption tolerance if present { - klog.V(4).Infof("pod %s disruption tolerance run: %d config: %d tolerance: %d evicted: %d updating: %d", pod.Name, singleGroupStats.running, singleGroupStats.configured, singleGroupStats.evictionTolerance, singleGroupStats.evicted, singleGroupStats.inPlaceUpdating) + klog.V(4).InfoS("Checking pod disruption tolerance", + "podName", pod.Name, + "configuredPods", singleGroupStats.configured, + "runningPods", singleGroupStats.running, + "evictedPods", singleGroupStats.evicted, + "inPlaceUpdatingPods", singleGroupStats.inPlaceUpdating, + "evictionTolerance", singleGroupStats.evictionTolerance, + ) + // minimum number of pods that should be running to tolerate disruptions shouldBeAlive := singleGroupStats.configured - singleGroupStats.evictionTolerance - if singleGroupStats.running-(singleGroupStats.evicted+singleGroupStats.inPlaceUpdating) > shouldBeAlive { - klog.V(4).Infof("Should be alive: %d, Actually alive: %d", shouldBeAlive, singleGroupStats.running-(singleGroupStats.evicted+singleGroupStats.inPlaceUpdating)) + // number of pods that are actually running + actuallyAlive := singleGroupStats.running - (singleGroupStats.evicted + singleGroupStats.inPlaceUpdating) + if actuallyAlive > shouldBeAlive { + klog.V(4).InfoS("Pod can be resized in-place; more pods are running than required", "podName", pod.Name, "shouldBeAlive", shouldBeAlive, "actuallyAlive", actuallyAlive) return true } - // If all pods are running and eviction tolerance is small update 1 pod. + + // If all pods are running, no pods are being evicted or updated, and eviction tolerance is small, we can resize in-place if singleGroupStats.running == singleGroupStats.configured && singleGroupStats.evictionTolerance == 0 && singleGroupStats.evicted == 0 && singleGroupStats.inPlaceUpdating == 0 { - klog.V(4).Infof("--> we are in good shape on %s, it is tolerant", pod.Name) + klog.V(4).InfoS("Pod can be resized in-place; all pods are running and eviction tolerance is 0", "podName", pod.Name) return true } } @@ -526,36 +534,40 @@ func (e *podsEvictionRestrictionImpl) InPlaceUpdate(podToUpdate *apiv1.Pod, vpa return fmt.Errorf("pod not suitable for eviction %v : not in replicated pods map", podToUpdate.Name) } - if !e.CanInPlaceUpdate(podToUpdate) { - return fmt.Errorf("cannot update pod %v in place : number of in-flight updates exceeded", podToUpdate.Name) - } + // TODO(maxcao13): Not sure if we need to check again here, but commenting it out for now in case we do + // if !e.CanInPlaceUpdate(podToUpdate) { + // return fmt.Errorf("cannot update pod %v in place : number of in-flight updates exceeded", podToUpdate.Name) + // } - // TODO(maxcao13): There's probably a more efficient way to do this, but this is what we have for now - // Don't reinvent the wheel, just use the resource updates patch calculator using by the admission controller - // See the below TODO for refactoring this - patches := []resource_updates.PatchRecord{} + // TODO(maxcao13): There's maybe a more efficient way to do this, but this is what we have for now + + // separate patches since we have to patch resize and spec separately + resourcePatches := []resource_updates.PatchRecord{} + annotationPatches := []resource_updates.PatchRecord{} if podToUpdate.Annotations == nil { - patches = append(patches, patch.GetAddEmptyAnnotationsPatch()) + annotationPatches = append(annotationPatches, patch.GetAddEmptyAnnotationsPatch()) } - for _, calculator := range e.patchCalculators { + for i, calculator := range e.patchCalculators { p, err := calculator.CalculatePatches(podToUpdate, vpa) if err != nil { return fmt.Errorf("failed to calculate resource patch for pod %s/%s: %v", podToUpdate.Namespace, podToUpdate.Name, err) } klog.V(4).InfoS("Calculated patches for pod", "pod", klog.KObj(podToUpdate), "patches", p) - patches = append(patches, p...) + // TODO(maxcao13): change how this works later, this is gross and depends on the resource calculator being first in the slice + // we may not even want the updater to patch pod annotations at all + if i == 0 { + resourcePatches = append(resourcePatches, p...) + } else { + annotationPatches = append(annotationPatches, p...) + } } - - if len(patches) > 0 { - patch, err := json.Marshal(patches) + if len(resourcePatches) > 0 { + patch, err := json.Marshal(resourcePatches) if err != nil { - klog.Errorf("Cannot marshal the patch %v: %v", patches, err) + klog.Errorf("Cannot marshal the patch %v: %v", resourcePatches, err) return err } - // TODO(maxcao13): for now, this does not allow other patches to be applied since we are patching the resize subresource - // If we want other annotations to be patched in the pod using patchCalculators, we need to generalize this and refactor the - // NewResourceUpdatesCalculator and create a stripped down calculator just for calculating in-place resize patches res, err := e.client.CoreV1().Pods(podToUpdate.Namespace).Patch(context.TODO(), podToUpdate.Name, k8stypes.JSONPatchType, patch, metav1.PatchOptions{}, "resize") if err != nil { klog.ErrorS(err, "Failed to patch pod", "pod", klog.KObj(podToUpdate)) @@ -563,6 +575,20 @@ func (e *podsEvictionRestrictionImpl) InPlaceUpdate(podToUpdate *apiv1.Pod, vpa } klog.V(4).InfoS("In-place patched pod /resize subresource using patches ", "pod", klog.KObj(res), "patches", string(patch)) + // TODO(maxcao13): whether or not we apply annotation patches should depend on resource patches? + if len(annotationPatches) > 0 { + patch, err := json.Marshal(annotationPatches) + if err != nil { + klog.Errorf("Cannot marshal the patch %v: %v", annotationPatches, err) + return err + } + res, err = e.client.CoreV1().Pods(podToUpdate.Namespace).Patch(context.TODO(), podToUpdate.Name, k8stypes.JSONPatchType, patch, metav1.PatchOptions{}) + if err != nil { + klog.ErrorS(err, "Failed to patch pod", "pod", klog.KObj(podToUpdate)) + return err + } + klog.V(4).InfoS("Patched pod annotations", "pod", klog.KObj(res), "patches", string(patch)) + } } else { err := fmt.Errorf("no patches to apply to %s", podToUpdate.Name) klog.ErrorS(err, "Failed to patch pod", "pod", klog.KObj(podToUpdate)) @@ -571,8 +597,8 @@ func (e *podsEvictionRestrictionImpl) InPlaceUpdate(podToUpdate *apiv1.Pod, vpa // TODO(maxcao13): If this keeps getting called on the same object with the same reason, it is considered a patch request. // And we fail to have the corresponding rbac for it. So figure out if we need this later. - eventRecorder.Event(podToUpdate, apiv1.EventTypeNormal, "InPlaceResizedByVPA", - "Pod was resized in place by VPA Updater.") + // TODO(maxcao13): Do we even need to emit an event? The api server might reject the resize request. Should we rename this to InPlaceResizeAttempted? + // eventRecorder.Event(podToUpdate, apiv1.EventTypeNormal, "InPlaceResizedByVPA", "Pod was resized in place by VPA Updater.") // TODO(jkyros): You need to do this regardless once you update the pod, if it changes phases here as a result, you still // need to catalog what you did @@ -604,21 +630,5 @@ func IsInPlaceUpdating(podToCheck *apiv1.Pod) (isUpdating bool) { } return true } - - // If any of the container resources don't match their spec, it's...updating but the lifecycle hasn't kicked in yet? So we - // also need to mark that? - /* - for num, container := range podToCheck.Spec.Containers { - // TODO(jkyros): supported resources only? - // Resources can be nil, especially if the feature gate isn't on - if podToCheck.Status.ContainerStatuses[num].Resources != nil { - - if !reflect.DeepEqual(container.Resources, *podToCheck.Status.ContainerStatuses[num].Resources) { - klog.V(4).Infof("Resize must be in progress for %s, resources for container %s don't match", podToCheck.Name, container.Name) - return true - } - } - }*/ return false - } diff --git a/vertical-pod-autoscaler/pkg/updater/inplace/inplace_recommendation_provider.go b/vertical-pod-autoscaler/pkg/updater/inplace/inplace_recommendation_provider.go new file mode 100644 index 00000000000..f89d2405614 --- /dev/null +++ b/vertical-pod-autoscaler/pkg/updater/inplace/inplace_recommendation_provider.go @@ -0,0 +1,83 @@ +/* +Copyright 2024 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package inplace + +import ( + "fmt" + + core "k8s.io/api/core/v1" + "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/recommendation" + "k8s.io/klog/v2" + + vpa_types "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1" + "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/limitrange" + vpa_api_util "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/vpa" +) + +type inPlaceRecommendationProvider struct { + limitsRangeCalculator limitrange.LimitRangeCalculator + recommendationProcessor vpa_api_util.RecommendationProcessor +} + +// NewInPlaceRecommendationProvider constructs the recommendation provider that can be used to determine recommendations for pods. +func NewInPlaceRecommendationProvider(calculator limitrange.LimitRangeCalculator, + recommendationProcessor vpa_api_util.RecommendationProcessor) recommendation.Provider { + return &inPlaceRecommendationProvider{ + limitsRangeCalculator: calculator, + recommendationProcessor: recommendationProcessor, + } +} + +// GetContainersResourcesForPod returns recommended request for a given pod. +// The returned slice corresponds 1-1 to containers in the Pod. +func (p *inPlaceRecommendationProvider) GetContainersResourcesForPod(pod *core.Pod, vpa *vpa_types.VerticalPodAutoscaler) ([]vpa_api_util.ContainerResources, vpa_api_util.ContainerToAnnotationsMap, error) { + if vpa == nil || pod == nil { + klog.V(2).InfoS("Can't calculate recommendations, one of VPA or Pod is nil", "vpa", vpa, "pod", pod) + return nil, nil, nil + } + klog.V(2).InfoS("Updating requirements for pod", "pod", pod.Name) + + recommendedPodResources := &vpa_types.RecommendedPodResources{} + + if vpa.Status.Recommendation != nil { + var err error + // ignore annotations as they are cannot be used when patching resize subresource + recommendedPodResources, _, err = p.recommendationProcessor.Apply(vpa, pod) + if err != nil { + klog.V(2).InfoS("Cannot process recommendation for pod", "pod", klog.KObj(pod)) + return nil, nil, err + } + } + containerLimitRange, err := p.limitsRangeCalculator.GetContainerLimitRangeItem(pod.Namespace) + if err != nil { + return nil, nil, fmt.Errorf("error getting containerLimitRange: %s", err) + } + var resourcePolicy *vpa_types.PodResourcePolicy + if vpa.Spec.UpdatePolicy == nil || vpa.Spec.UpdatePolicy.UpdateMode == nil || *vpa.Spec.UpdatePolicy.UpdateMode != vpa_types.UpdateModeOff { + resourcePolicy = vpa.Spec.ResourcePolicy + } + containerResources := recommendation.GetContainersResources(pod, resourcePolicy, *recommendedPodResources, containerLimitRange, false, nil) + + // Ensure that we are not propagating empty resource key if any. + for _, resource := range containerResources { + if resource.RemoveEmptyResourceKeyIfAny() { + klog.InfoS("An empty resource key was found and purged", "pod", klog.KObj(pod), "vpa", klog.KObj(vpa)) + } + } + + return containerResources, nil, nil +} diff --git a/vertical-pod-autoscaler/pkg/updater/eviction/last_inplace_update.go b/vertical-pod-autoscaler/pkg/updater/inplace/inplace_updated.go similarity index 98% rename from vertical-pod-autoscaler/pkg/updater/eviction/last_inplace_update.go rename to vertical-pod-autoscaler/pkg/updater/inplace/inplace_updated.go index 98792d7b2c0..c353325df43 100644 --- a/vertical-pod-autoscaler/pkg/updater/eviction/last_inplace_update.go +++ b/vertical-pod-autoscaler/pkg/updater/inplace/inplace_updated.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package eviction +package inplace import ( core "k8s.io/api/core/v1" diff --git a/vertical-pod-autoscaler/pkg/updater/inplace/resource_updates.go b/vertical-pod-autoscaler/pkg/updater/inplace/resource_updates.go new file mode 100644 index 00000000000..9d68e78f67d --- /dev/null +++ b/vertical-pod-autoscaler/pkg/updater/inplace/resource_updates.go @@ -0,0 +1,83 @@ +/* +Copyright 2020 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package inplace + +import ( + "fmt" + + core "k8s.io/api/core/v1" + resource_admission "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/admission-controller/resource" + "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/patch" + "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/recommendation" + vpa_types "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1" + vpa_api_util "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/vpa" +) + +type resourcesInplaceUpdatesPatchCalculator struct { + recommendationProvider recommendation.Provider +} + +// NewResourceInPlaceUpdatesCalculator returns a calculator for +// resource update patches. +func NewResourceInPlaceUpdatesCalculator(recommendationProvider recommendation.Provider) patch.Calculator { + return &resourcesInplaceUpdatesPatchCalculator{ + recommendationProvider: recommendationProvider, + } +} + +// TODO(maxcao13): this calculator's patches should only be marshalled as a JSON patch to the pod "resize" subresource. it won't be able to patch pod annotations +// if we DO need to calculate patches to add annotations, we can create a separate calculator to add that +func (c *resourcesInplaceUpdatesPatchCalculator) CalculatePatches(pod *core.Pod, vpa *vpa_types.VerticalPodAutoscaler) ([]resource_admission.PatchRecord, error) { + result := []resource_admission.PatchRecord{} + + containersResources, _, err := c.recommendationProvider.GetContainersResourcesForPod(pod, vpa) + if err != nil { + return []resource_admission.PatchRecord{}, fmt.Errorf("Failed to calculate resource patch for pod %s/%s: %v", pod.Namespace, pod.Name, err) + } + + for i, containerResources := range containersResources { + newPatches := getContainerPatch(pod, i, containerResources) + result = append(result, newPatches...) + } + + return result, nil +} + +func getContainerPatch(pod *core.Pod, i int, containerResources vpa_api_util.ContainerResources) []resource_admission.PatchRecord { + var patches []resource_admission.PatchRecord + // Add empty resources object if missing. + if pod.Spec.Containers[i].Resources.Limits == nil && + pod.Spec.Containers[i].Resources.Requests == nil { + patches = append(patches, patch.GetPatchInitializingEmptyResources(i)) + } + + patches = appendPatches(patches, pod.Spec.Containers[i].Resources.Requests, i, containerResources.Requests, "requests") + patches = appendPatches(patches, pod.Spec.Containers[i].Resources.Limits, i, containerResources.Limits, "limits") + + return patches +} + +func appendPatches(patches []resource_admission.PatchRecord, current core.ResourceList, containerIndex int, resources core.ResourceList, fieldName string) []resource_admission.PatchRecord { + // Add empty object if it's missing and we're about to fill it. + if current == nil && len(resources) > 0 { + patches = append(patches, patch.GetPatchInitializingEmptyResourcesSubfield(containerIndex, fieldName)) + } + for resource, request := range resources { + patches = append(patches, patch.GetAddResourceRequirementValuePatch(containerIndex, fieldName, resource, request)) + } + return patches +} diff --git a/vertical-pod-autoscaler/pkg/updater/logic/updater.go b/vertical-pod-autoscaler/pkg/updater/logic/updater.go index 98950928a09..83257c007ac 100644 --- a/vertical-pod-autoscaler/pkg/updater/logic/updater.go +++ b/vertical-pod-autoscaler/pkg/updater/logic/updater.go @@ -415,7 +415,6 @@ func (u *updater) AttemptInPlaceScalingIfPossible(ctx context.Context, vpaSize i return } - // get lastInPlaceUpdateTime annotation, if doesn't exist, set it to now lastUpdateTime, exists := u.lastInPlaceUpdateAttemptTimeMap[eviction.GetPodID(pod)] if !exists { klog.V(4).Infof("In-place update in progress for %s/%s, but no lastInPlaceUpdateTime found, setting it to now", pod.Namespace, pod.Name) diff --git a/vertical-pod-autoscaler/pkg/updater/main.go b/vertical-pod-autoscaler/pkg/updater/main.go index 78e3707f53d..4b7f6a805bc 100644 --- a/vertical-pod-autoscaler/pkg/updater/main.go +++ b/vertical-pod-autoscaler/pkg/updater/main.go @@ -31,7 +31,7 @@ import ( vpa_clientset "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/client/clientset/versioned" "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/target" controllerfetcher "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/target/controller_fetcher" - "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/updater/eviction" + "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/updater/inplace" updater "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/updater/logic" "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/updater/priority" "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/limitrange" @@ -57,6 +57,7 @@ var ( minReplicas = flag.Int("min-replicas", 2, `Minimum number of replicas to perform update`) + // TODO(maxcao13): Should this be combined into disruption tolerance, or should we have a separate flag for that, or we just don't rename? evictionToleranceFraction = flag.Float64("eviction-tolerance", 0.5, `Fraction of replica count that can be evicted for update, if more than one pod can be evicted.`) @@ -183,14 +184,11 @@ func run(healthCheck *metrics.HealthCheck, commonFlag *common.CommonFlags) { ignoredNamespaces := strings.Split(commonFlag.IgnoredVpaObjectNamespaces, ",") - inPlaceRecommendationProvider := eviction.NewInPlaceProvider(limitRangeCalculator, vpa_api_util.NewCappingRecommendationProcessor(limitRangeCalculator)) + inPlaceRecommendationProvider := inplace.NewInPlaceRecommendationProvider(limitRangeCalculator, vpa_api_util.NewCappingRecommendationProcessor(limitRangeCalculator)) // TODO(maxcao13): figure out if we need to use NewInPlaceUpdatedCalculator; does it help the user to know if their pod was updated in-place as an annotation? - // TODO(maxcao13): also figure out if we should strip down the resourceUpdatesCalculator just for in-place updates into a new calculator - // The use of resourceUpdatesCalculator adds extra unnecessary annotations since it is duplicated in the admission controller - // calculators := []patch.Calculator{patch.NewResourceUpdatesCalculator(inPlaceRecommendationProvider), eviction.NewInPlaceUpdatedCalculator()} - calculators := []patch.Calculator{patch.NewResourceUpdatesCalculator(inPlaceRecommendationProvider)} + calculators := []patch.Calculator{inplace.NewResourceInPlaceUpdatesCalculator(inPlaceRecommendationProvider), inplace.NewInPlaceUpdatedCalculator()} // TODO: use SharedInformerFactory in updater updater, err := updater.NewUpdater( diff --git a/vertical-pod-autoscaler/pkg/utils/annotations/vpa_inplace_update.go b/vertical-pod-autoscaler/pkg/utils/annotations/vpa_inplace_update.go index 441de7cb88b..2cb64ed1e64 100644 --- a/vertical-pod-autoscaler/pkg/utils/annotations/vpa_inplace_update.go +++ b/vertical-pod-autoscaler/pkg/utils/annotations/vpa_inplace_update.go @@ -16,8 +16,10 @@ limitations under the License. package annotations +// TODO(maxcao13): This annotation currently doesn't do anything. Do we want an annotation to show vpa inplace resized only for cosmetic reasons? + const ( - // VpaInPlaceUpdatedLabel is a label used by the vpa last in place update annotation. + // VpaInPlaceUpdatedLabel is a label used by the vpa inplace updated annotation. VpaInPlaceUpdatedLabel = "vpaInPlaceUpdated" )