@@ -291,12 +291,9 @@ func (f *podsEvictionRestrictionFactoryImpl) NewPodsEvictionRestriction(pods []*
291291 }
292292 if IsInPlaceUpdating (pod ) {
293293 singleGroup .inPlaceUpdating = singleGroup .inPlaceUpdating + 1
294-
295294 }
296295 }
297296 singleGroup .running = len (replicas ) - singleGroup .pending
298-
299- // This has to happen last, singlegroup never gets returned, only this does
300297 creatorToSingleGroupStatsMap [creator ] = singleGroup
301298
302299 }
@@ -460,8 +457,6 @@ func (e *podsEvictionRestrictionImpl) CanInPlaceUpdate(pod *apiv1.Pod) bool {
460457 return false
461458 }
462459
463- // TODO(jkyros): is there a pod-level thing we can use?
464- // Go through each container, and check to see if this is going to cause a disruption or not
465460 noRestartPoliciesPopulated := true
466461
467462 for _ , container := range pod .Spec .Containers {
@@ -498,18 +493,31 @@ func (e *podsEvictionRestrictionImpl) CanInPlaceUpdate(pod *apiv1.Pod) bool {
498493 return false
499494 }
500495 // This second "present" check is against the creator-to-group-stats map, not the pod-to-replica map
496+ // TODO(maxcao13): Not sure, but do we need disruption tolerance for in-place updates? I guess we do since they are not guaranteed to be disruptionless...
497+ // TODO(maxcao13): If this is okay, I may have to rename evictionTolerance to disruption tolerance
501498 if present {
502- klog .V (4 ).Infof ("pod %s disruption tolerance run: %d config: %d tolerance: %d evicted: %d updating: %d" , pod .Name , singleGroupStats .running , singleGroupStats .configured , singleGroupStats .evictionTolerance , singleGroupStats .evicted , singleGroupStats .inPlaceUpdating )
499+ klog .V (4 ).InfoS ("Checking pod disruption tolerance" ,
500+ "podName" , pod .Name ,
501+ "configuredPods" , singleGroupStats .configured ,
502+ "runningPods" , singleGroupStats .running ,
503+ "evictedPods" , singleGroupStats .evicted ,
504+ "inPlaceUpdatingPods" , singleGroupStats .inPlaceUpdating ,
505+ "evictionTolerance" , singleGroupStats .evictionTolerance ,
506+ )
507+ // minimum number of pods that should be running to tolerate disruptions
503508 shouldBeAlive := singleGroupStats .configured - singleGroupStats .evictionTolerance
504- if singleGroupStats .running - (singleGroupStats .evicted + singleGroupStats .inPlaceUpdating ) > shouldBeAlive {
505- klog .V (4 ).Infof ("Should be alive: %d, Actually alive: %d" , shouldBeAlive , singleGroupStats .running - (singleGroupStats .evicted + singleGroupStats .inPlaceUpdating ))
509+ // number of pods that are actually running
510+ actuallyAlive := singleGroupStats .running - (singleGroupStats .evicted + singleGroupStats .inPlaceUpdating )
511+ if actuallyAlive > shouldBeAlive {
512+ klog .V (4 ).InfoS ("Pod can be resized in-place; more pods are running than required" , "podName" , pod .Name , "shouldBeAlive" , shouldBeAlive , "actuallyAlive" , actuallyAlive )
506513 return true
507514 }
508- // If all pods are running and eviction tolerance is small update 1 pod.
515+
516+ // If all pods are running, no pods are being evicted or updated, and eviction tolerance is small, we can resize in-place
509517 if singleGroupStats .running == singleGroupStats .configured &&
510518 singleGroupStats .evictionTolerance == 0 &&
511519 singleGroupStats .evicted == 0 && singleGroupStats .inPlaceUpdating == 0 {
512- klog .V (4 ).Infof ( "--> we are in good shape on %s, it is tolerant " , pod .Name )
520+ klog .V (4 ).InfoS ( "Pod can be resized in-place; all pods are running and eviction tolerance is 0" , "podName " , pod .Name )
513521 return true
514522 }
515523 }
@@ -526,43 +534,61 @@ func (e *podsEvictionRestrictionImpl) InPlaceUpdate(podToUpdate *apiv1.Pod, vpa
526534 return fmt .Errorf ("pod not suitable for eviction %v : not in replicated pods map" , podToUpdate .Name )
527535 }
528536
529- if ! e .CanInPlaceUpdate (podToUpdate ) {
530- return fmt .Errorf ("cannot update pod %v in place : number of in-flight updates exceeded" , podToUpdate .Name )
531- }
537+ // TODO(maxcao13): Not sure if we need to check again here, but commenting it out for now in case we do
538+ // if !e.CanInPlaceUpdate(podToUpdate) {
539+ // return fmt.Errorf("cannot update pod %v in place : number of in-flight updates exceeded", podToUpdate.Name)
540+ // }
532541
533- // TODO(maxcao13): There's probably a more efficient way to do this, but this is what we have for now
534- // Don't reinvent the wheel, just use the resource updates patch calculator using by the admission controller
535- // See the below TODO for refactoring this
536- patches := []resource_updates.PatchRecord {}
542+ // TODO(maxcao13): There's maybe a more efficient way to do this, but this is what we have for now
543+
544+ // separate patches since we have to patch resize and spec separately
545+ resourcePatches := []resource_updates.PatchRecord {}
546+ annotationPatches := []resource_updates.PatchRecord {}
537547 if podToUpdate .Annotations == nil {
538- patches = append (patches , patch .GetAddEmptyAnnotationsPatch ())
548+ annotationPatches = append (annotationPatches , patch .GetAddEmptyAnnotationsPatch ())
539549 }
540- for _ , calculator := range e .patchCalculators {
550+ for i , calculator := range e .patchCalculators {
541551 p , err := calculator .CalculatePatches (podToUpdate , vpa )
542552 if err != nil {
543553 return fmt .Errorf ("failed to calculate resource patch for pod %s/%s: %v" , podToUpdate .Namespace , podToUpdate .Name , err )
544554 }
545555 klog .V (4 ).InfoS ("Calculated patches for pod" , "pod" , klog .KObj (podToUpdate ), "patches" , p )
546- patches = append (patches , p ... )
556+ // TODO(maxcao13): change how this works later, this is gross and depends on the resource calculator being first in the slice
557+ // we may not even want the updater to patch pod annotations at all
558+ if i == 0 {
559+ resourcePatches = append (resourcePatches , p ... )
560+ } else {
561+ annotationPatches = append (annotationPatches , p ... )
562+ }
547563 }
548-
549- if len (patches ) > 0 {
550- patch , err := json .Marshal (patches )
564+ if len (resourcePatches ) > 0 {
565+ patch , err := json .Marshal (resourcePatches )
551566 if err != nil {
552- klog .Errorf ("Cannot marshal the patch %v: %v" , patches , err )
567+ klog .Errorf ("Cannot marshal the patch %v: %v" , resourcePatches , err )
553568 return err
554569 }
555570
556- // TODO(maxcao13): for now, this does not allow other patches to be applied since we are patching the resize subresource
557- // If we want other annotations to be patched in the pod using patchCalculators, we need to generalize this and refactor the
558- // NewResourceUpdatesCalculator and create a stripped down calculator just for calculating in-place resize patches
559571 res , err := e .client .CoreV1 ().Pods (podToUpdate .Namespace ).Patch (context .TODO (), podToUpdate .Name , k8stypes .JSONPatchType , patch , metav1.PatchOptions {}, "resize" )
560572 if err != nil {
561573 klog .ErrorS (err , "Failed to patch pod" , "pod" , klog .KObj (podToUpdate ))
562574 return err
563575 }
564576 klog .V (4 ).InfoS ("In-place patched pod /resize subresource using patches " , "pod" , klog .KObj (res ), "patches" , string (patch ))
565577
578+ // TODO(maxcao13): whether or not we apply annotation patches should depend on resource patches?
579+ if len (annotationPatches ) > 0 {
580+ patch , err := json .Marshal (annotationPatches )
581+ if err != nil {
582+ klog .Errorf ("Cannot marshal the patch %v: %v" , annotationPatches , err )
583+ return err
584+ }
585+ res , err = e .client .CoreV1 ().Pods (podToUpdate .Namespace ).Patch (context .TODO (), podToUpdate .Name , k8stypes .JSONPatchType , patch , metav1.PatchOptions {})
586+ if err != nil {
587+ klog .ErrorS (err , "Failed to patch pod" , "pod" , klog .KObj (podToUpdate ))
588+ return err
589+ }
590+ klog .V (4 ).InfoS ("Patched pod annotations" , "pod" , klog .KObj (res ), "patches" , string (patch ))
591+ }
566592 } else {
567593 err := fmt .Errorf ("no patches to apply to %s" , podToUpdate .Name )
568594 klog .ErrorS (err , "Failed to patch pod" , "pod" , klog .KObj (podToUpdate ))
@@ -571,8 +597,8 @@ func (e *podsEvictionRestrictionImpl) InPlaceUpdate(podToUpdate *apiv1.Pod, vpa
571597
572598 // TODO(maxcao13): If this keeps getting called on the same object with the same reason, it is considered a patch request.
573599 // And we fail to have the corresponding rbac for it. So figure out if we need this later.
574- eventRecorder . Event ( podToUpdate , apiv1 . EventTypeNormal , "InPlaceResizedByVPA" ,
575- "Pod was resized in place by VPA Updater." )
600+ // TODO(maxcao13): Do we even need to emit an event? The api server might reject the resize request. Should we rename this to InPlaceResizeAttempted?
601+ // eventRecorder.Event(podToUpdate, apiv1.EventTypeNormal, "InPlaceResizedByVPA", "Pod was resized in place by VPA Updater.")
576602
577603 // TODO(jkyros): You need to do this regardless once you update the pod, if it changes phases here as a result, you still
578604 // need to catalog what you did
@@ -604,21 +630,5 @@ func IsInPlaceUpdating(podToCheck *apiv1.Pod) (isUpdating bool) {
604630 }
605631 return true
606632 }
607-
608- // If any of the container resources don't match their spec, it's...updating but the lifecycle hasn't kicked in yet? So we
609- // also need to mark that?
610- /*
611- for num, container := range podToCheck.Spec.Containers {
612- // TODO(jkyros): supported resources only?
613- // Resources can be nil, especially if the feature gate isn't on
614- if podToCheck.Status.ContainerStatuses[num].Resources != nil {
615-
616- if !reflect.DeepEqual(container.Resources, *podToCheck.Status.ContainerStatuses[num].Resources) {
617- klog.V(4).Infof("Resize must be in progress for %s, resources for container %s don't match", podToCheck.Name, container.Name)
618- return true
619- }
620- }
621- }*/
622633 return false
623-
624634}
0 commit comments