Skip to content

Commit b6c4695

Browse files
authored
fix and update: multicluster metrics (#50)
1 parent 66f7b8b commit b6c4695

File tree

4 files changed

+42
-7
lines changed

4 files changed

+42
-7
lines changed

multicluster/manager.go

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ package multicluster
1919
import (
2020
"context"
2121
"errors"
22-
"fmt"
2322
"os"
2423
"strings"
2524
"sync"
@@ -254,8 +253,6 @@ func getClusterFilter(cfg *ManagerConfig) (func(string) bool, error) {
254253
blockSet = sets.NewString(strings.Split(blockList, ",")...)
255254
}
256255

257-
fmt.Printf("allowList: %v, blockList: %v, allowSet: %v, blockSet: %v\n", allowList, blockList, allowSet, blockSet)
258-
259256
if allowSet != nil && blockSet != nil {
260257
return nil, errors.New("both cluster allow and block lists are set")
261258
}

multicluster/metrics/metrics.go

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,32 +29,40 @@ const (
2929
CacheCount = "cache_count"
3030
ClientCount = "client_count"
3131
ClusterEventCount = "cluster_event_count"
32+
InvalidClusterCount = "invalid_cluster_count"
3233
)
3334

3435
var (
3536
cacheCounter = prometheus.NewCounterVec(prometheus.CounterOpts{
3637
Subsystem: MultiClusterSubSystem,
3738
Name: CacheCount,
38-
Help: "count the number of cache call",
39+
Help: "Number of Cache methods involked",
3940
}, []string{"cluster", "method", "code"})
4041

4142
clientCounter = prometheus.NewCounterVec(prometheus.CounterOpts{
4243
Subsystem: MultiClusterSubSystem,
4344
Name: ClientCount,
44-
Help: "count the number of client call",
45+
Help: "Number of Client methods involked",
4546
}, []string{"cluster", "method", "code"})
4647

4748
clusterEventCounter = prometheus.NewCounterVec(prometheus.CounterOpts{
4849
Subsystem: MultiClusterSubSystem,
4950
Name: ClusterEventCount,
50-
Help: "count the number of cluster event",
51+
Help: "Number of cluster events",
5152
}, []string{"cluster", "event", "success"})
53+
54+
invalidClusterCounter = prometheus.NewCounterVec(prometheus.CounterOpts{
55+
Subsystem: MultiClusterSubSystem,
56+
Name: InvalidClusterCount,
57+
Help: "Number of invalid clusters for Client and Cache",
58+
}, []string{"method", "cluster"})
5259
)
5360

5461
func init() {
5562
metrics.Registry.MustRegister(cacheCounter)
5663
metrics.Registry.MustRegister(clientCounter)
5764
metrics.Registry.MustRegister(clusterEventCounter)
65+
metrics.Registry.MustRegister(invalidClusterCounter)
5866
}
5967

6068
func NewCacheCountMetrics(cluster, method string, err error) prometheus.Counter {
@@ -69,6 +77,10 @@ func NewClusterEventCountMetrics(cluster, event, success string) prometheus.Coun
6977
return clusterEventCounter.WithLabelValues(cluster, event, success)
7078
}
7179

80+
func NewInvalidClusterCounterMetrics(method, cluster string) prometheus.Counter {
81+
return invalidClusterCounter.WithLabelValues(method, cluster)
82+
}
83+
7284
func CodeForError(err error) string {
7385
if err == nil {
7486
return "200"

multicluster/multi_cluster_cache.go

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import (
1919
"context"
2020
"fmt"
2121
"reflect"
22+
"strings"
2223
"sync"
2324
"time"
2425

@@ -299,6 +300,7 @@ func (mcc *multiClusterCache) WaitForCacheSync(ctx context.Context) bool {
299300
if len(clusters) == 0 {
300301
clusters = []string{clusterinfo.Fed}
301302
} else if err != nil {
303+
metrics.NewInvalidClusterCounterMetrics("WaitForCacheSync", strings.Join(clusters, ","))
302304
mcc.log.Error(err, "failed to get clusters")
303305
return false
304306
}
@@ -319,6 +321,7 @@ func (mcc *multiClusterCache) WaitForCacheSync(ctx context.Context) bool {
319321
} else {
320322
c, ok := clusterToCache[cluster]
321323
if !ok {
324+
metrics.NewInvalidClusterCounterMetrics("WaitForCacheSync", cluster)
322325
mcc.log.Info("invalid cluster", "cluster", cluster)
323326
continue
324327
}
@@ -379,6 +382,7 @@ func (mcc *multiClusterCache) Get(ctx context.Context, key types.NamespacedName,
379382

380383
cluster, err = getCluster(ctx, obj.GetLabels())
381384
if err != nil {
385+
metrics.NewInvalidClusterCounterMetrics("Get", cluster)
382386
mcc.log.Error(err, "failed to get cluster")
383387
return err
384388
}
@@ -394,6 +398,7 @@ func (mcc *multiClusterCache) Get(ctx context.Context, key types.NamespacedName,
394398

395399
clusterCache, ok := clusterToCache[cluster]
396400
if !ok {
401+
metrics.NewInvalidClusterCounterMetrics("Get", cluster)
397402
return fmt.Errorf("unable to get: %v because of unknown cluster: %s for the cache", key, cluster)
398403
}
399404
return clusterCache.Get(ctx, key, obj)
@@ -406,6 +411,7 @@ func (mcc *multiClusterCache) List(ctx context.Context, list client.ObjectList,
406411

407412
clusters, _, err := mcc.getClusters(ctx)
408413
if err != nil {
414+
metrics.NewInvalidClusterCounterMetrics("Get", strings.Join(clusters, ","))
409415
mcc.log.Error(err, "failed to get clusters")
410416
return err
411417
}
@@ -427,13 +433,14 @@ func (mcc *multiClusterCache) List(ctx context.Context, list client.ObjectList,
427433
var ok bool
428434
c, ok = clusterToCache[cluster]
429435
if !ok {
436+
metrics.NewInvalidClusterCounterMetrics("List", cluster)
430437
return fmt.Errorf("unable to list because of unknown cluster: %s for the cache", cluster)
431438
}
432439
}
433440

434441
listObj := list.DeepCopyObject().(client.ObjectList)
435442
err = c.List(ctx, listObj, opts...)
436-
metrics.NewClientCountMetrics(cluster, "List", err).Inc()
443+
metrics.NewCacheCountMetrics(cluster, "List", err).Inc()
437444
if err != nil {
438445
return err
439446
}

multicluster/multi_cluster_client.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ package multicluster
1919
import (
2020
"context"
2121
"fmt"
22+
"strings"
2223
"sync"
2324

2425
"github.com/go-logr/logr"
@@ -142,6 +143,7 @@ func (mcc *multiClusterClient) Create(ctx context.Context, obj client.Object, op
142143
// Get cluster info from context or labels, and delete it from labels because we should not write it into apiserver
143144
cluster, err = getThenDeleteCluster(ctx, obj.GetLabels())
144145
if err != nil {
146+
metrics.NewInvalidClusterCounterMetrics("Create", cluster)
145147
mcc.log.Error(err, "failed to get cluster")
146148
return err
147149
}
@@ -155,6 +157,7 @@ func (mcc *multiClusterClient) Create(ctx context.Context, obj client.Object, op
155157

156158
clusterClient, ok := mcc.clusterToClient[cluster]
157159
if !ok {
160+
metrics.NewInvalidClusterCounterMetrics("Create", cluster)
158161
return fmt.Errorf("unable to create: %v because of unknown cluster: %s for the client", obj, cluster)
159162
}
160163
return clusterClient.Create(ctx, obj, opts...)
@@ -169,6 +172,7 @@ func (mcc *multiClusterClient) Delete(ctx context.Context, obj client.Object, op
169172

170173
cluster, err = getCluster(ctx, obj.GetLabels())
171174
if err != nil {
175+
metrics.NewInvalidClusterCounterMetrics("Delete", cluster)
172176
mcc.log.Error(err, "failed to get cluster")
173177
return err
174178
}
@@ -182,6 +186,7 @@ func (mcc *multiClusterClient) Delete(ctx context.Context, obj client.Object, op
182186

183187
clusterClient, ok := mcc.clusterToClient[cluster]
184188
if !ok {
189+
metrics.NewInvalidClusterCounterMetrics("Delete", cluster)
185190
return fmt.Errorf("unable to delete: %v because of unknown cluster: %s for the client", obj, cluster)
186191
}
187192
return clusterClient.Delete(ctx, obj, opts...)
@@ -195,6 +200,7 @@ func (mcc *multiClusterClient) DeleteAllOf(ctx context.Context, obj client.Objec
195200

196201
cluster, err = getCluster(ctx, obj.GetLabels())
197202
if err != nil {
203+
metrics.NewInvalidClusterCounterMetrics("DeleteAllOf", cluster)
198204
mcc.log.Error(err, "failed to get cluster")
199205
return err
200206
}
@@ -208,6 +214,7 @@ func (mcc *multiClusterClient) DeleteAllOf(ctx context.Context, obj client.Objec
208214

209215
clusterClient, ok := mcc.clusterToClient[cluster]
210216
if !ok {
217+
metrics.NewInvalidClusterCounterMetrics("DeleteAllOf", cluster)
211218
err = fmt.Errorf("unable to deleteAllOf: %v because of unknown cluster: %s for the client", obj, cluster)
212219
return
213220
}
@@ -226,6 +233,7 @@ func (mcc *multiClusterClient) Get(ctx context.Context, key types.NamespacedName
226233

227234
cluster, err = getCluster(ctx, obj.GetLabels())
228235
if err != nil {
236+
metrics.NewInvalidClusterCounterMetrics("Get", cluster)
229237
mcc.log.Error(err, "failed to get cluster")
230238
return err
231239
}
@@ -239,6 +247,7 @@ func (mcc *multiClusterClient) Get(ctx context.Context, key types.NamespacedName
239247

240248
clusterClient, ok := mcc.clusterToClient[cluster]
241249
if !ok {
250+
metrics.NewInvalidClusterCounterMetrics("Get", cluster)
242251
return fmt.Errorf("unable to get: %v because of unknown cluster: %s for the client", obj, cluster)
243252
}
244253
return clusterClient.Get(ctx, key, obj)
@@ -251,6 +260,7 @@ func (mcc *multiClusterClient) List(ctx context.Context, list client.ObjectList,
251260

252261
clusters, err := mcc.getClusterNames(ctx)
253262
if err != nil {
263+
metrics.NewInvalidClusterCounterMetrics("List", strings.Join(clusters, ","))
254264
mcc.log.Error(err, "failed to get clusters")
255265
return err
256266
}
@@ -273,6 +283,7 @@ func (mcc *multiClusterClient) List(ctx context.Context, list client.ObjectList,
273283
var ok bool
274284
c, ok = mcc.clusterToClient[cluster]
275285
if !ok {
286+
metrics.NewInvalidClusterCounterMetrics("List", cluster)
276287
return fmt.Errorf("unable to list because of unknown cluster: %s for the client", cluster)
277288
}
278289
}
@@ -314,6 +325,7 @@ func (mcc *multiClusterClient) Patch(ctx context.Context, obj client.Object, pat
314325
// Get cluster info from context or labels, and delete it from labels because we should not write it into apiserver
315326
cluster, err = getThenDeleteCluster(ctx, obj.GetLabels())
316327
if err != nil {
328+
metrics.NewInvalidClusterCounterMetrics("Patch", cluster)
317329
mcc.log.Error(err, "failed to get cluster")
318330
return err
319331
}
@@ -327,6 +339,7 @@ func (mcc *multiClusterClient) Patch(ctx context.Context, obj client.Object, pat
327339

328340
clusterClient, ok := mcc.clusterToClient[cluster]
329341
if !ok {
342+
metrics.NewInvalidClusterCounterMetrics("Patch", cluster)
330343
return fmt.Errorf("unable to patch: %v because of unknown cluster: %v for the client", obj, cluster)
331344
}
332345
return clusterClient.Patch(ctx, obj, patch, opts...)
@@ -342,6 +355,7 @@ func (mcc *multiClusterClient) Update(ctx context.Context, obj client.Object, op
342355
// Get cluster info from context or labels, and delete it from labels because we should not write it into apiserver
343356
cluster, err = getThenDeleteCluster(ctx, obj.GetLabels())
344357
if err != nil {
358+
metrics.NewInvalidClusterCounterMetrics("Update", cluster)
345359
mcc.log.Error(err, "failed to get cluster")
346360
return err
347361
}
@@ -355,6 +369,7 @@ func (mcc *multiClusterClient) Update(ctx context.Context, obj client.Object, op
355369

356370
clusterClient, ok := mcc.clusterToClient[cluster]
357371
if !ok {
372+
metrics.NewInvalidClusterCounterMetrics("Update", cluster)
358373
err = fmt.Errorf("unable to update: %v because of unknown cluster: %s for the client", obj, cluster)
359374
return
360375
}
@@ -393,6 +408,7 @@ func (sw *statusWriter) Update(ctx context.Context, obj client.Object, opts ...c
393408
// Get cluster info from context or labels, and delete it from labels because we should not write it into apiserver
394409
cluster, err = getThenDeleteCluster(ctx, obj.GetLabels())
395410
if err != nil {
411+
metrics.NewInvalidClusterCounterMetrics("StatusUpdate", cluster)
396412
sw.log.Error(err, "failed to get cluster")
397413
return err
398414
}
@@ -403,6 +419,7 @@ func (sw *statusWriter) Update(ctx context.Context, obj client.Object, opts ...c
403419

404420
clusterClient, ok := sw.clusterToClient[cluster]
405421
if !ok {
422+
metrics.NewInvalidClusterCounterMetrics("StatusUpdate", cluster)
406423
return fmt.Errorf("unable to update: %v because of unknown cluster: %s for the client", obj, cluster)
407424
}
408425
return clusterClient.Status().Update(ctx, obj, opts...)
@@ -418,6 +435,7 @@ func (sw *statusWriter) Patch(ctx context.Context, obj client.Object, patch clie
418435
// Get cluster info from context or labels, and delete it from labels because we should not write it into apiserver
419436
cluster, err = getThenDeleteCluster(ctx, obj.GetLabels())
420437
if err != nil {
438+
metrics.NewInvalidClusterCounterMetrics("StatusPatch", cluster)
421439
sw.log.Error(err, "failed to get cluster")
422440
return err
423441
}
@@ -428,6 +446,7 @@ func (sw *statusWriter) Patch(ctx context.Context, obj client.Object, patch clie
428446

429447
clusterClient, ok := sw.clusterToClient[cluster]
430448
if !ok {
449+
metrics.NewInvalidClusterCounterMetrics("StatusPatch", cluster)
431450
return fmt.Errorf("unable to update: %v because of unknown cluster: %s for the client", obj, cluster)
432451
}
433452
return clusterClient.Status().Patch(ctx, obj, patch, opts...)

0 commit comments

Comments
 (0)