@@ -90,42 +90,55 @@ func (t *NodeLatencyTracker) Process(autoscalingCtx *ca_context.AutoscalingConte
9090 if t .wrapped != nil {
9191 t .wrapped .Process (autoscalingCtx , status )
9292 }
93+
9394 for _ , unremovableNode := range status .UnremovableNodes {
94- nodeName := unremovableNode .Node .Name
95- if info , exists := t .unneededNodes [nodeName ]; exists {
96- duration := time .Since (info .unneededSince )
97- metrics .UpdateScaleDownNodeRemovalLatency (false , duration )
98- klog .V (4 ).Infof ("Node %q is unremovable, became needed again (unneeded for %s)." , nodeName , duration )
99- delete (t .unneededNodes , nodeName )
100- }
95+ t .recordAndCleanup (unremovableNode .Node .Name , false )
10196 }
102- for _ , scaledDownNode := range status .ScaledDownNodes {
103- nodeName := scaledDownNode .Node .Name
104- if info , exists := t .unneededNodes [nodeName ]; exists {
105- duration := time .Since (info .unneededSince )
106- latency := duration - info .removalThreshold
107- metrics .UpdateScaleDownNodeRemovalLatency (true , latency )
108- if latency > scaleDownLatencyLogThreshold {
109- klog .V (2 ).Infof (
110- "Observing deletion for node %s, unneeded for %s (removal threshold was %s)." ,
111- nodeName , duration , info .removalThreshold ,
112- )
113- } else {
114- klog .V (6 ).Infof (
115- "Observing deletion for node %s, unneeded for %s (removal threshold was %s)." ,
116- nodeName , duration , info .removalThreshold ,
117- )
118- }
119- delete (t .unneededNodes , nodeName )
120- }
97+ for _ , node := range status .ScaledDownNodes {
98+ t .recordAndCleanup (node .Node .Name , true )
12199 }
100+
122101 if klog .V (6 ).Enabled () {
123102 for nodeName := range t .unneededNodes {
124103 klog .Infof ("Node %q remains in unneeded list (not scaled down). Continuing to track latency." , nodeName )
125104 }
126105 }
127106}
128107
108+ // recordAndCleanup calculates the time a node spent in the "unneeded" state, updates
109+ // relevant Prometheus metrics, and removes the node from internal tracking.
110+ func (t * NodeLatencyTracker ) recordAndCleanup (nodeName string , isRemoved bool ) {
111+ info , exists := t .unneededNodes [nodeName ]
112+ if ! exists {
113+ return
114+ }
115+ defer delete (t .unneededNodes , nodeName )
116+
117+ duration := time .Since (info .unneededSince )
118+ latency := duration - info .removalThreshold
119+
120+ if isRemoved || latency > 0 {
121+ metrics .UpdateScaleDownNodeRemovalLatency (isRemoved , latency )
122+ }
123+ if isRemoved {
124+ t .logDeletion (nodeName , duration , info .removalThreshold , latency )
125+ } else {
126+ klog .V (4 ).Infof ("Node %q is unremovable, became needed again (unneeded for %s)." ,
127+ nodeName , duration )
128+ }
129+ }
130+
131+ // logDeletion handles the logging for scaled-down nodes,
132+ // using a higher verbosity (V2) if the latency exceeds the configured threshold.
133+ func (t * NodeLatencyTracker ) logDeletion (nodeName string , duration , threshold , latency time.Duration ) {
134+ level := klog .Level (6 )
135+ if latency > scaleDownLatencyLogThreshold {
136+ level = klog .Level (2 )
137+ }
138+ klog .V (level ).Infof ("Observing deletion for node %s, unneeded for %s (removal threshold was %s)." ,
139+ nodeName , duration , threshold )
140+ }
141+
129142// getTrackedNodes returns the names of all nodes currently tracked as unneeded.
130143func (t * NodeLatencyTracker ) getTrackedNodes () []string {
131144 return slices .Collect (maps .Keys (t .unneededNodes ))
0 commit comments