Skip to content

Commit b9f6bbc

Browse files
committed
Keep track of exactly which collectors are registered for logging warnings to unsupported volumes.
Change-Id: Ie7ce9cf87e47410afb2493f0a0de8ab6d0c3e34b
1 parent 273ebc0 commit b9f6bbc

1 file changed

Lines changed: 25 additions & 10 deletions

File tree

pkg/metrics/metrics.go

Lines changed: 25 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@ import (
2929
"sync"
3030
"time"
3131

32+
"k8s.io/apimachinery/pkg/util/sets"
33+
3234
"github.com/googlecloudplatform/gcs-fuse-csi-driver/pkg/cloud_provider/clientset"
3335
"github.com/googlecloudplatform/gcs-fuse-csi-driver/pkg/util"
3436
"github.com/prometheus/client_golang/prometheus"
@@ -59,7 +61,7 @@ type manager struct {
5961
clientset clientset.Interface
6062

6163
maximumNumberOfCollectors int
62-
registeredCollectorsCount int
64+
volumesRegistered sets.Set[string]
6365
mutex sync.Mutex
6466
}
6567

@@ -69,6 +71,7 @@ func NewMetricsManager(metricsEndpoint, fuseSocketDir string, maximumNumberOfCol
6971
metricsEndpoint: metricsEndpoint,
7072
fuseSocketDir: fuseSocketDir,
7173
clientset: clientset,
74+
volumesRegistered: sets.Set[string]{},
7275
maximumNumberOfCollectors: maximumNumberOfCollectors,
7376
mutex: sync.Mutex{},
7477
}
@@ -123,24 +126,36 @@ func (mm *manager) RegisterMetricsCollector(targetPath, podNamespace, podName, b
123126
"pod_uid": podUID,
124127
}, mm.clientset)
125128

126-
// Lock the number of registered collectors while we attemtp to register a new collector.
129+
// Lock the number of registered collectors while we attempt to register a new collector.
127130
mm.mutex.Lock()
128131
defer mm.mutex.Unlock()
129132

130-
// We skip registration when we already registered all supported metrics collectors. If a limit is
131-
// unset (maximumNumberOfCollectors less than zero), we continue to registration.
132-
if mm.maximumNumberOfCollectors >= 0 && mm.registeredCollectorsCount >= mm.maximumNumberOfCollectors {
133-
return
133+
// Check if we need to register collector. We register a collector when the following are met:
134+
// 1. There is space on the metrics pipeline for the collector to be registered.
135+
// 2. The metrics collector has not previously been registered.
136+
if mm.maximumNumberOfCollectors >= 0 {
137+
// If volume is already registered, do not register again. This flow can get triggered
138+
// since CSI driver has republishVolume capability.
139+
if mm.volumesRegistered.Has(targetPath) {
140+
return
141+
}
142+
// If collector hasn't been registered and there's no space left, log a warning.
143+
if mm.volumesRegistered.Len() >= mm.maximumNumberOfCollectors {
144+
klog.Infof("could not register a metrics collector: podUID: %s, volume: %s. there's already %d collectors registered.", podUID, bucketName, mm.volumesRegistered.Len())
145+
146+
return
147+
}
134148
}
135149

150+
// Attempt to register new metrics collector and record success.
136151
err = mm.registry.Register(c)
137152
if err != nil {
138153
if !strings.Contains(err.Error(), prometheus.AlreadyRegisteredError{}.Error()) {
139154
klog.Errorf("failed to register metrics collector for pod %v/%v, volume %q, bucket %q: %v", podNamespace, podName, volumeName, bucketName, err)
140155
}
141156
} else {
142-
mm.registeredCollectorsCount += 1
143-
klog.Infof("successfully registered a new metrics collector: podUID: %s, volume: %s. there's %d collectors registerd.", podUID, bucketName, mm.registeredCollectorsCount)
157+
mm.volumesRegistered.Insert(targetPath)
158+
klog.Infof("successfully registered a new metrics collector: podUID: %s, volume: %s. there's %d collectors registered.", podUID, bucketName, mm.volumesRegistered.Len())
144159
}
145160
}
146161

@@ -158,8 +173,8 @@ func (mm *manager) UnregisterMetricsCollector(targetPath string) {
158173
if ok := mm.registry.Unregister(c); !ok {
159174
klog.Infof("Unregister metrics collector for targetPath %q is not needed since the collector is not registered", targetPath)
160175
} else {
161-
mm.registeredCollectorsCount -= 1
162-
klog.Infof("successfully unregistered a metrics collector: podUID: %s, volume: %s. there's %d collectors registerd.", podUID, volumeName, mm.registeredCollectorsCount)
176+
mm.volumesRegistered.Delete(targetPath)
177+
klog.Infof("successfully unregistered a metrics collector: podUID: %s, volume: %s. there's %d collectors registered.", podUID, volumeName, mm.volumesRegistered.Len())
163178
}
164179
}
165180

0 commit comments

Comments
 (0)