@@ -124,20 +124,19 @@ func (e *podsEvictionRestrictionImpl) CanEvict(pod *apiv1.Pod) bool {
124124 // they might cause disruption. We assume pods will not be both in-place updated and evicted in the same pass, but
125125 // we need eviction to take the numbers into account so we don't violate our disruption dolerances.
126126 // If we're already resizing this pod, don't do anything to it, unless we failed to resize it, then we want to evict it.
127+ klog .V (4 ).InfoS ("Pod disruption tolerance" ,
128+ "pod" , klog .KObj (pod ),
129+ "running" , singleGroupStats .running ,
130+ "configured" , singleGroupStats .configured ,
131+ "tolerance" , singleGroupStats .evictionTolerance ,
132+ "evicted" , singleGroupStats .evicted ,
133+ "updating" , singleGroupStats .inPlaceUpdating )
127134 if IsInPlaceUpdating (pod ) {
128- klog .V (4 ).InfoS ("Pod disruption tolerance" ,
129- "pod" , pod .Name ,
130- "running" , singleGroupStats .running ,
131- "configured" , singleGroupStats .configured ,
132- "tolerance" , singleGroupStats .evictionTolerance ,
133- "evicted" , singleGroupStats .evicted ,
134- "updating" , singleGroupStats .inPlaceUpdating )
135-
136135 if singleGroupStats .running - (singleGroupStats .evicted + (singleGroupStats .inPlaceUpdating - 1 )) > shouldBeAlive {
137- klog .V (4 ).Infof ("Would be able to evict, but already resizing %s " , pod . Name )
136+ klog .V (4 ).InfoS ("Would be able to evict, but already resizing" , " pod" , klog . KObj ( pod ) )
138137
139138 if pod .Status .Resize == apiv1 .PodResizeStatusInfeasible || pod .Status .Resize == apiv1 .PodResizeStatusDeferred {
140- klog .Warningf ("Attempted in-place resize of %s impossible, should now evict" , pod . Name )
139+ klog .InfoS ("Attempted in-place resize was impossible, should now evict" , " pod" , klog . KObj ( pod ), "resizePolicy" , pod . Status . Resize )
141140 return true
142141 }
143142 }
@@ -442,74 +441,67 @@ func setUpInformer(kubeClient kube_client.Interface, kind controllerKind) (cache
442441
443442// CanInPlaceUpdate performs the same checks
444443func (e * podsEvictionRestrictionImpl ) CanInPlaceUpdate (pod * apiv1.Pod ) bool {
445-
446444 cr , present := e .podToReplicaCreatorMap [GetPodID (pod )]
447445 if present {
448-
449446 // If our QoS class is guaranteed, we can't change the resources without a restart
450- // TODO(maxcao13): kubelet already prevents a resize of a guaranteed pod, so should we still check this early?
451447 if pod .Status .QOSClass == apiv1 .PodQOSGuaranteed {
452- klog .Warningf ("Can't resize %s in-place, pod QoS is %s" , pod .Name , pod .Status .QOSClass )
453448 return false
454449 }
455450
456- // If we're already resizing this pod, don't do it again
457451 if IsInPlaceUpdating (pod ) {
458- klog .Warningf ("Not resizing %s, already resizing it" , pod .Name )
459452 return false
460453 }
461454
462455 noRestartPoliciesPopulated := true
456+ isPodRestartPolicyNever := pod .Spec .RestartPolicy == apiv1 .RestartPolicyNever
463457
464458 for _ , container := range pod .Spec .Containers {
465459 // If some of these are populated, we know it at least understands resizing
466460 if len (container .ResizePolicy ) > 0 {
467461 noRestartPoliciesPopulated = false
468462 }
469463
470- // TODO(maxcao13): Do we have to check the policy resource too? i.e. if only memory is getting scaled, then only check the memory resize policy?
471464 for _ , policy := range container .ResizePolicy {
472465 if policy .RestartPolicy != apiv1 .NotRequired {
473- klog .Warningf ( "in-place resize of %s will cause container disruption, container %s restart policy is %v " , pod . Name , container .Name , policy .RestartPolicy )
466+ klog .V ( 4 ). InfoS ( "in-place resize of pod will cause container disruption, because of container resize policy " , " pod" , klog . KObj ( pod ), " container" , container .Name , "containerResizeRestartPolicy" , policy .RestartPolicy )
474467 // TODO(jkyros): is there something that prevents this from happening elsewhere in the API?
475- if pod . Spec . RestartPolicy == apiv1 . RestartPolicyNever {
476- klog .Warningf ("in-place resize of %s not possible, container %s resize policy is %v but pod restartPolicy is %v " , pod . Name , container .Name , policy .RestartPolicy , pod .Spec .RestartPolicy )
468+ if isPodRestartPolicyNever {
469+ klog .InfoS ("in-place resize of pod not possible, container resize policy and pod restartPolicy conflict " , " pod" , klog . KObj ( pod ), " container" , container .Name , "containerResizeRestartPolicy" , policy .RestartPolicy , "podRestartPolicy" , pod .Spec .RestartPolicy )
477470 return false
478471 }
479-
480472 }
481473 }
482474 }
483475
484476 // If none of the policies are populated, our feature is probably not enabled, so we can't in-place regardless
485477 if noRestartPoliciesPopulated {
486- klog .Warningf ("impossible to resize %s in-place, container resize policies are not populated" , pod . Name )
478+ klog .InfoS ("impossible to resize pod in-place, container resize policies are not populated" , " pod" , klog . KObj ( pod ) )
487479 }
488480
489481 //TODO(jkyros): Come back and handle sidecar containers at some point since they're weird?
490482 singleGroupStats , present := e .creatorToSingleGroupStatsMap [cr ]
491483 // If we're pending, we can't in-place resize
492484 // TODO(jkyros): are we sure we can't? Should I just set this to "if running"?
493485 if pod .Status .Phase == apiv1 .PodPending {
494- klog .V (4 ).Infof ("Can't resize pending pod %s " , pod . Name )
486+ klog .V (4 ).InfoS ("Can't resize pending pod" , " pod" , klog . KObj ( pod ) )
495487 return false
496488 }
497- // This second "present" check is against the creator-to-group-stats map, not the pod-to-replica map
498- // TODO(maxcao13): Not sure, but do we need disruption tolerance for in-place updates? I guess we do since they are not guaranteed to be disruptionless...
499- // TODO(maxcao13): If this is okay, I may have to rename evictionTolerance to disruption tolerance
489+ // TODO(maxcao13): May need to rename evictionTolerance to disruptionTolerance
500490 if present {
491+ // minimum number of pods that should be running to tolerate disruptions
492+ shouldBeAlive := singleGroupStats .configured - singleGroupStats .evictionTolerance
493+ // number of pods that are actually running
494+ actuallyAlive := singleGroupStats .running - (singleGroupStats .evicted + singleGroupStats .inPlaceUpdating )
501495 klog .V (4 ).InfoS ("Checking pod disruption tolerance" ,
502496 "podName" , pod .Name ,
503497 "configuredPods" , singleGroupStats .configured ,
504498 "runningPods" , singleGroupStats .running ,
505499 "evictedPods" , singleGroupStats .evicted ,
506500 "inPlaceUpdatingPods" , singleGroupStats .inPlaceUpdating ,
507501 "evictionTolerance" , singleGroupStats .evictionTolerance ,
502+ "shouldBeAlive" , shouldBeAlive ,
503+ "actuallyAlive" , actuallyAlive ,
508504 )
509- // minimum number of pods that should be running to tolerate disruptions
510- shouldBeAlive := singleGroupStats .configured - singleGroupStats .evictionTolerance
511- // number of pods that are actually running
512- actuallyAlive := singleGroupStats .running - (singleGroupStats .evicted + singleGroupStats .inPlaceUpdating )
513505 if actuallyAlive > shouldBeAlive {
514506 klog .V (4 ).InfoS ("Pod can be resized in-place; more pods are running than required" , "podName" , pod .Name , "shouldBeAlive" , shouldBeAlive , "actuallyAlive" , actuallyAlive )
515507 return true
@@ -533,16 +525,10 @@ func (e *podsEvictionRestrictionImpl) CanInPlaceUpdate(pod *apiv1.Pod) bool {
533525func (e * podsEvictionRestrictionImpl ) InPlaceUpdate (podToUpdate * apiv1.Pod , vpa * vpa_types.VerticalPodAutoscaler , eventRecorder record.EventRecorder ) error {
534526 cr , present := e .podToReplicaCreatorMap [GetPodID (podToUpdate )]
535527 if ! present {
536- return fmt .Errorf ("pod not suitable for eviction %v : not in replicated pods map" , podToUpdate .Name )
528+ return fmt .Errorf ("pod not suitable for eviction %v: not in replicated pods map" , podToUpdate .Name )
537529 }
538530
539- // TODO(maxcao13): Not sure if we need to check again here, but commenting it out for now in case we do
540- // if !e.CanInPlaceUpdate(podToUpdate) {
541- // return fmt.Errorf("cannot update pod %v in place : number of in-flight updates exceeded", podToUpdate.Name)
542- // }
543-
544531 // TODO(maxcao13): There's maybe a more efficient way to do this, but this is what we have for now
545-
546532 // separate patches since we have to patch resize and spec separately
547533 resourcePatches := []resource_updates.PatchRecord {}
548534 annotationPatches := []resource_updates.PatchRecord {}
@@ -552,7 +538,7 @@ func (e *podsEvictionRestrictionImpl) InPlaceUpdate(podToUpdate *apiv1.Pod, vpa
552538 for i , calculator := range e .patchCalculators {
553539 p , err := calculator .CalculatePatches (podToUpdate , vpa )
554540 if err != nil {
555- return fmt . Errorf ( "failed to calculate resource patch for pod %s/%s: %v" , podToUpdate . Namespace , podToUpdate . Name , err )
541+ return err
556542 }
557543 klog .V (4 ).InfoS ("Calculated patches for pod" , "pod" , klog .KObj (podToUpdate ), "patches" , p )
558544 // TODO(maxcao13): change how this works later, this is gross and depends on the resource calculator being first in the slice
@@ -566,35 +552,28 @@ func (e *podsEvictionRestrictionImpl) InPlaceUpdate(podToUpdate *apiv1.Pod, vpa
566552 if len (resourcePatches ) > 0 {
567553 patch , err := json .Marshal (resourcePatches )
568554 if err != nil {
569- klog .Errorf ("Cannot marshal the patch %v: %v" , resourcePatches , err )
570555 return err
571556 }
572557
573558 res , err := e .client .CoreV1 ().Pods (podToUpdate .Namespace ).Patch (context .TODO (), podToUpdate .Name , k8stypes .JSONPatchType , patch , metav1.PatchOptions {}, "resize" )
574559 if err != nil {
575- klog .ErrorS (err , "Failed to patch pod" , "pod" , klog .KObj (podToUpdate ))
576560 return err
577561 }
578562 klog .V (4 ).InfoS ("In-place patched pod /resize subresource using patches " , "pod" , klog .KObj (res ), "patches" , string (patch ))
579563
580- // TODO(maxcao13): whether or not we apply annotation patches should depend on resource patches?
581564 if len (annotationPatches ) > 0 {
582565 patch , err := json .Marshal (annotationPatches )
583566 if err != nil {
584- klog .Errorf ("Cannot marshal the patch %v: %v" , annotationPatches , err )
585567 return err
586568 }
587569 res , err = e .client .CoreV1 ().Pods (podToUpdate .Namespace ).Patch (context .TODO (), podToUpdate .Name , k8stypes .JSONPatchType , patch , metav1.PatchOptions {})
588570 if err != nil {
589- klog .ErrorS (err , "Failed to patch pod" , "pod" , klog .KObj (podToUpdate ))
590571 return err
591572 }
592573 klog .V (4 ).InfoS ("Patched pod annotations" , "pod" , klog .KObj (res ), "patches" , string (patch ))
593574 }
594575 } else {
595- err := fmt .Errorf ("no patches to apply to %s" , podToUpdate .Name )
596- klog .ErrorS (err , "Failed to patch pod" , "pod" , klog .KObj (podToUpdate ))
597- return err
576+ return fmt .Errorf ("no resource patches were calculated to apply" )
598577 }
599578
600579 // TODO(maxcao13): If this keeps getting called on the same object with the same reason, it is considered a patch request.
@@ -607,30 +586,19 @@ func (e *podsEvictionRestrictionImpl) InPlaceUpdate(podToUpdate *apiv1.Pod, vpa
607586 if podToUpdate .Status .Phase == apiv1 .PodRunning {
608587 singleGroupStats , present := e .creatorToSingleGroupStatsMap [cr ]
609588 if ! present {
610- klog .Errorf ("Internal error - cannot find stats for replication group %v " , cr )
589+ klog .InfoS ("Internal error - cannot find stats for replication group" , "pod" , klog . KObj ( podToUpdate ), "podReplicaCreator " , cr )
611590 } else {
612591 singleGroupStats .inPlaceUpdating = singleGroupStats .inPlaceUpdating + 1
613592 e .creatorToSingleGroupStatsMap [cr ] = singleGroupStats
614593 }
615594 } else {
616- klog .Warningf ( "I updated , but my pod phase was %s " , podToUpdate .Status .Phase )
595+ klog .InfoS ( "Attempted to in-place update , but pod was not running" , "pod" , klog . KObj ( podToUpdate ), "phase " , podToUpdate .Status .Phase )
617596 }
618597
619598 return nil
620599}
621600
622601// IsInPlaceUpdating checks whether or not the given pod is currently in the middle of an in-place update
623602func IsInPlaceUpdating (podToCheck * apiv1.Pod ) (isUpdating bool ) {
624- // If the pod is currently updating we need to tally that
625- if podToCheck .Status .Resize != "" {
626- klog .V (4 ).InfoS ("Pod is currently resizing" , "pod" , klog .KObj (podToCheck ), "status" , podToCheck .Status .Resize )
627- // Proposed -> Deferred -> InProgress, but what about Infeasible?
628- if podToCheck .Status .Resize == apiv1 .PodResizeStatusInfeasible {
629- klog .V (4 ).InfoS ("Resource proposal for pod is Infeasible, we're probably stuck like this until we evict" , "pod" , klog .KObj (podToCheck ))
630- } else if podToCheck .Status .Resize == apiv1 .PodResizeStatusDeferred {
631- klog .V (4 ).InfoS ("Resource proposal for pod is Deferred, we're probably stuck like this until we evict" , "pod" , klog .KObj (podToCheck ))
632- }
633- return true
634- }
635- return false
603+ return podToCheck .Status .Resize != ""
636604}
0 commit comments