Skip to content

Commit 983fc87

Browse files
authored
Merge pull request #586 from GoogleCloudPlatform/revert-570-automated-cherry-pick-of-#547-upstream-release-1.14
Revert "Automated cherry pick of #547: Limit the maximum number of registered metric collectors."
2 parents b2b1551 + 7ef4796 commit 983fc87

5 files changed

Lines changed: 10 additions & 137 deletions

File tree

cmd/csi_driver/main.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,6 @@ var (
4747
informerResyncDurationSec = flag.Int("informer-resync-duration-sec", 1800, "informer resync duration in seconds")
4848
fuseSocketDir = flag.String("fuse-socket-dir", "/sockets", "FUSE socket directory")
4949
metricsEndpoint = flag.String("metrics-endpoint", "", "The TCP network address where the Prometheus metrics endpoint will listen (example: `:8080`). The default is empty string, which means that the metrics endpoint is disabled.")
50-
maximumNumberOfCollectors = flag.Int("max-metric-collectors", -1, "Maximum number of prometheus metric collectors exporting metrics at a time, less than 0 (e.g -1) means no limit.")
5150

5251
// These are set at compile time.
5352
version = "unknown"
@@ -110,7 +109,7 @@ func main() {
110109
}
111110

112111
if *metricsEndpoint != "" {
113-
mm = metrics.NewMetricsManager(*metricsEndpoint, *fuseSocketDir, *maximumNumberOfCollectors, clientset)
112+
mm = metrics.NewMetricsManager(*metricsEndpoint, *fuseSocketDir, clientset)
114113
mm.InitializeHTTPHandler()
115114
}
116115
}

deploy/base/node/node.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,6 @@ spec:
5353
- --node=true
5454
- --identity-provider=$(IDENTITY_PROVIDER)
5555
- --metrics-endpoint=:9920
56-
- --max-metric-collectors=10
5756
ports:
5857
- containerPort: 9920
5958
name: metrics

pkg/csi_driver/node.go

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -114,8 +114,7 @@ func (s *nodeServer) NodePublishVolume(ctx context.Context, req *csi.NodePublish
114114
s.volumeStateStore.Store(targetPath, &util.VolumeState{})
115115
vs, _ = s.volumeStateStore.Load(targetPath)
116116
}
117-
// volumeState is safe to access for remaining of function since volumeLock prevents
118-
// Node Publish/Unpublish Volume calls from running more than once at a time per volume.
117+
119118
if !vs.BucketAccessCheckPassed {
120119
storageService, err := s.prepareStorageService(ctx, vc)
121120
if err != nil {
@@ -166,7 +165,7 @@ func (s *nodeServer) NodePublishVolume(ctx context.Context, req *csi.NodePublish
166165
return nil, status.Error(codes.FailedPrecondition, "failed to find the sidecar container in Pod spec")
167166
}
168167

169-
// Register metrics collector.
168+
// Register metrics collecter.
170169
// It is idempotent to register the same collector in node republish calls.
171170
if s.driver.config.MetricsManager != nil && !disableMetricsCollection {
172171
klog.V(6).Infof("NodePublishVolume enabling metrics collector for target path %q", targetPath)

pkg/metrics/metrics.go

Lines changed: 7 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,8 @@ import (
2626
"os"
2727
"path/filepath"
2828
"strings"
29-
"sync"
3029
"time"
3130

32-
"k8s.io/apimachinery/pkg/util/sets"
33-
3431
"github.com/googlecloudplatform/gcs-fuse-csi-driver/pkg/cloud_provider/clientset"
3532
"github.com/googlecloudplatform/gcs-fuse-csi-driver/pkg/util"
3633
"github.com/prometheus/client_golang/prometheus"
@@ -59,21 +56,14 @@ type manager struct {
5956
metricsEndpoint string
6057
fuseSocketDir string
6158
clientset clientset.Interface
62-
63-
maximumNumberOfCollectors int
64-
volumePublishPathRegistered sets.Set[string]
65-
mutex sync.Mutex
6659
}
6760

68-
func NewMetricsManager(metricsEndpoint, fuseSocketDir string, maximumNumberOfCollectors int, clientset clientset.Interface) Manager {
61+
func NewMetricsManager(metricsEndpoint, fuseSocketDir string, clientset clientset.Interface) Manager {
6962
mm := &manager{
70-
registry: prometheus.NewRegistry(),
71-
metricsEndpoint: metricsEndpoint,
72-
fuseSocketDir: fuseSocketDir,
73-
clientset: clientset,
74-
volumePublishPathRegistered: sets.Set[string]{},
75-
maximumNumberOfCollectors: maximumNumberOfCollectors,
76-
mutex: sync.Mutex{},
63+
registry: prometheus.NewRegistry(),
64+
metricsEndpoint: metricsEndpoint,
65+
fuseSocketDir: fuseSocketDir,
66+
clientset: clientset,
7767
}
7868

7969
return mm
@@ -125,43 +115,8 @@ func (mm *manager) RegisterMetricsCollector(targetPath, podNamespace, podName, b
125115
"bucket_name": bucketName,
126116
"pod_uid": podUID,
127117
}, mm.clientset)
128-
129-
// Lock the number of registered collectors while we attempt to register a new collector.
130-
mm.mutex.Lock()
131-
defer mm.mutex.Unlock()
132-
133-
if mm.maximumNumberOfCollectors == 0 {
134-
klog.Infof("could not register metrics collector: podUID: %s, volume: %s. metrics collector limit is set to zero.", podUID, bucketName)
135-
136-
return
137-
}
138-
139-
// Check if we need to register collector. We register a collector when the following are met:
140-
// 1. There is space on the metrics pipeline for the collector to be registered.
141-
// 2. The metrics collector has not previously been registered.
142-
if mm.maximumNumberOfCollectors > 0 {
143-
// If volume is already registered, do not register again. This flow can get triggered
144-
// since CSI driver has republishVolume capability.
145-
if mm.volumePublishPathRegistered.Has(targetPath) {
146-
return
147-
}
148-
// If collector hasn't been registered and there's no space left, log a warning.
149-
if mm.volumePublishPathRegistered.Len() >= mm.maximumNumberOfCollectors {
150-
klog.V(6).Infof("could not register a metrics collector: podUID: %s, volume: %s. there's already %d collectors registered.", podUID, bucketName, mm.volumePublishPathRegistered.Len())
151-
152-
return
153-
}
154-
}
155-
156-
// Attempt to register new metrics collector and record success.
157-
err = mm.registry.Register(c)
158-
if err != nil {
159-
if !strings.Contains(err.Error(), prometheus.AlreadyRegisteredError{}.Error()) {
160-
klog.Errorf("failed to register metrics collector for pod %v/%v, volume %q, bucket %q: %v", podNamespace, podName, volumeName, bucketName, err)
161-
}
162-
} else {
163-
mm.volumePublishPathRegistered.Insert(targetPath)
164-
klog.Infof("successfully registered a new metrics collector: podUID: %s, volume: %s. there's %d collectors registered.", podUID, bucketName, mm.volumePublishPathRegistered.Len())
118+
if err := mm.registry.Register(c); err != nil && !strings.Contains(err.Error(), prometheus.AlreadyRegisteredError{}.Error()) {
119+
klog.Errorf("failed to register metrics collector for pod %v/%v, volume %q, bucket %q: %v", podNamespace, podName, volumeName, bucketName, err)
165120
}
166121
}
167122

@@ -171,16 +126,8 @@ func (mm *manager) UnregisterMetricsCollector(targetPath string) {
171126

172127
// metricsCollector uses a hash of pod UID and volume name as an identifier.
173128
c := NewMetricsCollector("", "", "", "", podUID, volumeName, nil, nil)
174-
175-
// Lock the number of registered collectors while we attempt to unregister a collector.
176-
mm.mutex.Lock()
177-
defer mm.mutex.Unlock()
178-
179129
if ok := mm.registry.Unregister(c); !ok {
180130
klog.Infof("Unregister metrics collector for targetPath %q is not needed since the collector is not registered", targetPath)
181-
} else {
182-
mm.volumePublishPathRegistered.Delete(targetPath)
183-
klog.Infof("successfully unregistered a metrics collector: podUID: %s, volume: %s. there's %d collectors registered.", podUID, volumeName, mm.volumePublishPathRegistered.Len())
184131
}
185132
}
186133

pkg/metrics/metrics_test.go

Lines changed: 0 additions & 71 deletions
This file was deleted.

0 commit comments

Comments
 (0)