Skip to content

Commit 1874b7c

Browse files
committed
Expose classification metrics from LowNodeUtilization plugin
Enhance the plugin interface to support metrics export in a backward-compatible way. All plugins now have access to the export setting via handle.ShouldExportMetrics(), without requiring code changes to existing plugins. Expose two new metrics from the LowNodeUtilization plugin: - low_node_utilization_thresholds - low_node_utilization_classification These metrics allow users to properly monitor plugin behavior, which was previously only visible in descheduler logs. Signed-off-by: Simone Tiraboschi <[email protected]>
1 parent f8c8d9a commit 1874b7c

File tree

6 files changed

+62
-2
lines changed

6 files changed

+62
-2
lines changed

metrics/metrics.go

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,25 @@ var (
9292
Buckets: []float64{0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 25, 50, 100},
9393
}, []string{"strategy", "profile"})
9494

95+
LowNodeUtilizationThresholds = metrics.NewGaugeVec(
96+
&metrics.GaugeOpts{
97+
Subsystem: DeschedulerSubsystem,
98+
Name: "low_node_utilization_thresholds",
99+
Help: "Thresholds used by the LowNodeUtilization to classify nodes by node, by class, by resource",
100+
StabilityLevel: metrics.ALPHA,
101+
},
102+
[]string{"node", "class", "resource"},
103+
)
104+
LowNodeUtilizationClassification = metrics.NewGaugeVec(
105+
&metrics.GaugeOpts{
106+
Subsystem: DeschedulerSubsystem,
107+
Name: "low_node_utilization_classification",
108+
Help: "Number of nodes by class",
109+
StabilityLevel: metrics.ALPHA,
110+
},
111+
[]string{"class"},
112+
)
113+
95114
metricsList = []metrics.Registerable{
96115
PodsEvicted,
97116
PodsEvictedTotal,
@@ -100,6 +119,8 @@ var (
100119
DeschedulerStrategyDuration,
101120
LoopDuration,
102121
StrategyDuration,
122+
LowNodeUtilizationThresholds,
123+
LowNodeUtilizationClassification,
103124
}
104125
)
105126

pkg/descheduler/descheduler.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -425,6 +425,7 @@ func (d *descheduler) runProfiles(ctx context.Context, client clientset.Interfac
425425
frameworkprofile.WithGetPodsAssignedToNodeFnc(d.getPodsAssignedToNode),
426426
frameworkprofile.WithMetricsCollector(d.metricsCollector),
427427
frameworkprofile.WithPrometheusClient(d.prometheusClient),
428+
frameworkprofile.WithShouldExportMetrics(!d.rs.DisableMetrics),
428429
)
429430
if err != nil {
430431
klog.ErrorS(err, "unable to create a profile", "profile", profile.Name)

pkg/framework/fake/fake.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ type HandleImpl struct {
2323
PodEvictorImpl *evictions.PodEvictor
2424
MetricsCollectorImpl *metricscollector.MetricsCollector
2525
PrometheusClientImpl promapi.Client
26+
ShouldExportMetricsImpl bool
2627
}
2728

2829
var _ frameworktypes.Handle = &HandleImpl{}
@@ -62,3 +63,7 @@ func (hi *HandleImpl) PreEvictionFilter(pod *v1.Pod) bool {
6263
func (hi *HandleImpl) Evict(ctx context.Context, pod *v1.Pod, opts evictions.EvictOptions) error {
6364
return hi.PodEvictorImpl.EvictPod(ctx, pod, opts)
6465
}
66+
67+
func (hi *HandleImpl) ShouldExportMetrics() bool {
68+
return hi.ShouldExportMetricsImpl
69+
}

pkg/framework/plugins/nodeutilization/lownodeutilization.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,13 @@ package nodeutilization
1919
import (
2020
"context"
2121
"fmt"
22+
"strconv"
2223

2324
v1 "k8s.io/api/core/v1"
2425
"k8s.io/apimachinery/pkg/runtime"
2526
"k8s.io/klog/v2"
2627

28+
"sigs.k8s.io/descheduler/metrics"
2729
"sigs.k8s.io/descheduler/pkg/api"
2830
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
2931
nodeutil "sigs.k8s.io/descheduler/pkg/descheduler/node"
@@ -179,6 +181,16 @@ func (l *LowNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fra
179181
)
180182
}
181183

184+
if l.handle.ShouldExportMetrics() {
185+
for key, rt := range thresholds {
186+
for i, t := range rt {
187+
for resource, value := range t {
188+
metrics.LowNodeUtilizationThresholds.WithLabelValues(key, strconv.Itoa(i), resource.String()).Set(float64(value))
189+
}
190+
}
191+
}
192+
}
193+
182194
// classify nodes in under and over utilized. we will later try to move
183195
// pods from the overutilized nodes to the underutilized ones.
184196
nodeGroups := classifier.Classify(
@@ -255,6 +267,11 @@ func (l *LowNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fra
255267
logger.V(1).Info("Criteria for a node above target utilization", l.overCriteria...)
256268
logger.V(1).Info("Number of overutilized nodes", "totalNumber", len(highNodes))
257269

270+
if l.handle.ShouldExportMetrics() {
271+
metrics.LowNodeUtilizationClassification.WithLabelValues("underutilized").Set(float64(len(lowNodes)))
272+
metrics.LowNodeUtilizationClassification.WithLabelValues("overutilized").Set(float64(len(highNodes)))
273+
}
274+
258275
if len(lowNodes) == 0 {
259276
logger.V(1).Info(
260277
"No node is underutilized, nothing to do here, you might tune your thresholds further",

pkg/framework/profile/profile.go

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ type handleImpl struct {
7474
getPodsAssignedToNodeFunc podutil.GetPodsAssignedToNodeFunc
7575
sharedInformerFactory informers.SharedInformerFactory
7676
evictor *evictorImpl
77+
shouldExportMetrics bool
7778
}
7879

7980
var _ frameworktypes.Handle = &handleImpl{}
@@ -106,6 +107,11 @@ func (hi *handleImpl) Evictor() frameworktypes.Evictor {
106107
return hi.evictor
107108
}
108109

110+
// ShouldExportMetrics returns whether plugins should export metrics
111+
func (hi *handleImpl) ShouldExportMetrics() bool {
112+
return hi.shouldExportMetrics
113+
}
114+
109115
type filterPlugin interface {
110116
frameworktypes.Plugin
111117
Filter(pod *v1.Pod) bool
@@ -142,6 +148,7 @@ type handleImplOpts struct {
142148
getPodsAssignedToNodeFunc podutil.GetPodsAssignedToNodeFunc
143149
podEvictor *evictions.PodEvictor
144150
metricsCollector *metricscollector.MetricsCollector
151+
shouldExportMetrics bool
145152
}
146153

147154
// WithClientSet sets clientSet for the scheduling frameworkImpl.
@@ -182,6 +189,12 @@ func WithMetricsCollector(metricsCollector *metricscollector.MetricsCollector) O
182189
}
183190
}
184191

192+
func WithShouldExportMetrics(shouldExportMetrics bool) Option {
193+
return func(o *handleImplOpts) {
194+
o.shouldExportMetrics = shouldExportMetrics
195+
}
196+
}
197+
185198
func getPluginConfig(pluginName string, pluginConfigs []api.PluginConfig) (*api.PluginConfig, int) {
186199
for idx, pluginConfig := range pluginConfigs {
187200
if pluginConfig.Name == pluginName {
@@ -280,8 +293,9 @@ func NewProfile(ctx context.Context, config api.DeschedulerProfile, reg pluginre
280293
profileName: config.Name,
281294
podEvictor: hOpts.podEvictor,
282295
},
283-
metricsCollector: hOpts.metricsCollector,
284-
prometheusClient: hOpts.prometheusClient,
296+
metricsCollector: hOpts.metricsCollector,
297+
prometheusClient: hOpts.prometheusClient,
298+
shouldExportMetrics: hOpts.shouldExportMetrics,
285299
}
286300

287301
pluginNames := append(config.Plugins.Deschedule.Enabled, config.Plugins.Balance.Enabled...)

pkg/framework/types/types.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@ type Handle interface {
4141
GetPodsAssignedToNodeFunc() podutil.GetPodsAssignedToNodeFunc
4242
SharedInformerFactory() informers.SharedInformerFactory
4343
MetricsCollector() *metricscollector.MetricsCollector
44+
// ShouldExportMetrics returns whether plugins should export metrics
45+
ShouldExportMetrics() bool
4446
}
4547

4648
// Evictor defines an interface for filtering and evicting pods

0 commit comments

Comments
 (0)