Skip to content

Commit 1aa019f

Browse files
committed
Report proper value when node became needed again
1 parent 3f86aa6 commit 1aa019f

File tree

1 file changed

+39
-26
lines changed

1 file changed

+39
-26
lines changed

cluster-autoscaler/core/scaledown/latencytracker/node_latency_tracker.go

Lines changed: 39 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -90,42 +90,55 @@ func (t *NodeLatencyTracker) Process(autoscalingCtx *ca_context.AutoscalingConte
9090
if t.wrapped != nil {
9191
t.wrapped.Process(autoscalingCtx, status)
9292
}
93+
9394
for _, unremovableNode := range status.UnremovableNodes {
94-
nodeName := unremovableNode.Node.Name
95-
if info, exists := t.unneededNodes[nodeName]; exists {
96-
duration := time.Since(info.unneededSince)
97-
metrics.UpdateScaleDownNodeRemovalLatency(false, duration)
98-
klog.V(4).Infof("Node %q is unremovable, became needed again (unneeded for %s).", nodeName, duration)
99-
delete(t.unneededNodes, nodeName)
100-
}
95+
t.recordAndCleanup(unremovableNode.Node.Name, false)
10196
}
102-
for _, scaledDownNode := range status.ScaledDownNodes {
103-
nodeName := scaledDownNode.Node.Name
104-
if info, exists := t.unneededNodes[nodeName]; exists {
105-
duration := time.Since(info.unneededSince)
106-
latency := duration - info.removalThreshold
107-
metrics.UpdateScaleDownNodeRemovalLatency(true, latency)
108-
if latency > scaleDownLatencyLogThreshold {
109-
klog.V(2).Infof(
110-
"Observing deletion for node %s, unneeded for %s (removal threshold was %s).",
111-
nodeName, duration, info.removalThreshold,
112-
)
113-
} else {
114-
klog.V(6).Infof(
115-
"Observing deletion for node %s, unneeded for %s (removal threshold was %s).",
116-
nodeName, duration, info.removalThreshold,
117-
)
118-
}
119-
delete(t.unneededNodes, nodeName)
120-
}
97+
for _, node := range status.ScaledDownNodes {
98+
t.recordAndCleanup(node.Node.Name, true)
12199
}
100+
122101
if klog.V(6).Enabled() {
123102
for nodeName := range t.unneededNodes {
124103
klog.Infof("Node %q remains in unneeded list (not scaled down). Continuing to track latency.", nodeName)
125104
}
126105
}
127106
}
128107

108+
// recordAndCleanup calculates the time a node spent in the "unneeded" state, updates
109+
// relevant Prometheus metrics, and removes the node from internal tracking.
110+
func (t *NodeLatencyTracker) recordAndCleanup(nodeName string, isRemoved bool) {
111+
info, exists := t.unneededNodes[nodeName]
112+
if !exists {
113+
return
114+
}
115+
defer delete(t.unneededNodes, nodeName)
116+
117+
duration := time.Since(info.unneededSince)
118+
latency := duration - info.removalThreshold
119+
120+
if isRemoved || latency > 0 {
121+
metrics.UpdateScaleDownNodeRemovalLatency(isRemoved, latency)
122+
}
123+
if isRemoved {
124+
t.logDeletion(nodeName, duration, info.removalThreshold, latency)
125+
} else {
126+
klog.V(4).Infof("Node %q is unremovable, became needed again (unneeded for %s).",
127+
nodeName, duration)
128+
}
129+
}
130+
131+
// logDeletion handles the logging for scaled-down nodes,
132+
// using a higher verbosity (V2) if the latency exceeds the configured threshold.
133+
func (t *NodeLatencyTracker) logDeletion(nodeName string, duration, threshold, latency time.Duration) {
134+
level := klog.Level(6)
135+
if latency > scaleDownLatencyLogThreshold {
136+
level = klog.Level(2)
137+
}
138+
klog.V(level).Infof("Observing deletion for node %s, unneeded for %s (removal threshold was %s).",
139+
nodeName, duration, threshold)
140+
}
141+
129142
// getTrackedNodes returns the names of all nodes currently tracked as unneeded.
130143
func (t *NodeLatencyTracker) getTrackedNodes() []string {
131144
return slices.Collect(maps.Keys(t.unneededNodes))

0 commit comments

Comments
 (0)