Skip to content

Commit b92e3c8

Browse files
Add L4NetLB metrics
1 parent 8fefb94 commit b92e3c8

File tree

4 files changed

+223
-0
lines changed

4 files changed

+223
-0
lines changed

providers/gce/gce_loadbalancer_external.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,18 @@ func (g *Cloud) ensureExternalLoadBalancer(clusterName string, clusterID string,
6767
return nil, cloudprovider.ImplementedElsewhere
6868
}
6969

70+
nm := types.NamespacedName{Namespace: apiService.Namespace, Name: apiService.Name}
71+
metricsState := L4NetLBServiceState{
72+
Status: StatusError,
73+
DenyFirewall: DenyFirewallStatusNone,
74+
}
75+
if !g.enableL4DenyFirewallRule && g.enableL4DenyFirewallRollbackCleanup {
76+
metricsState.DenyFirewall = DenyFirewallStatusDisabled
77+
}
78+
defer func() {
79+
g.metricsCollector.SetL4NetLBService(nm.String(), metricsState)
80+
}()
81+
7082
if hasLoadBalancerClass(apiService, LegacyRegionalExternalLoadBalancerClass) {
7183
if apiService.Annotations[ServiceAnnotationLoadBalancerType] == string(LBTypeInternal) {
7284
g.eventRecorder.Event(apiService, v1.EventTypeWarning, "ConflictingConfiguration", fmt.Sprintf("loadBalancerClass conflicts with %s: %s annotation. External LoadBalancer Service provisioned.", ServiceAnnotationLoadBalancerType, string(LBTypeInternal)))
@@ -292,6 +304,11 @@ func (g *Cloud) ensureExternalLoadBalancer(clusterName string, clusterID string,
292304
status := &v1.LoadBalancerStatus{}
293305
status.Ingress = []v1.LoadBalancerIngress{{IP: ipAddressToUse}}
294306

307+
metricsState.Status = StatusSuccess
308+
if g.enableL4DenyFirewallRule {
309+
metricsState.DenyFirewall = DenyFirewallStatusIPv4
310+
}
311+
295312
syncResult.status = status
296313
return syncResult, nil
297314
}
@@ -409,6 +426,7 @@ func (g *Cloud) ensureExternalLoadBalancerDeleted(clusterName, clusterID string,
409426
klog.Errorf("Failed to remove finalizer '%s' from service %s - %v", NetLBFinalizerV1, service.Name, err)
410427
return err
411428
}
429+
g.metricsCollector.DeleteL4NetLBService(serviceName.String())
412430
return nil
413431
}
414432

providers/gce/gce_loadbalancer_external_test.go

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2530,3 +2530,70 @@ func TestFirewallsEqual(t *testing.T) {
25302530
})
25312531
}
25322532
}
2533+
2534+
func TestEnsureExternalLoadBalancerMetrics(t *testing.T) {
2535+
// t.Parallel() // Disable parallel to avoid race with global metrics registry
2536+
2537+
vals := DefaultTestClusterValues()
2538+
gce, err := fakeGCECloud(vals)
2539+
require.NoError(t, err)
2540+
2541+
lm, ok := gce.metricsCollector.(*LoadBalancerMetrics)
2542+
require.True(t, ok)
2543+
2544+
svc := fakeLoadbalancerService("")
2545+
svc, err = gce.client.CoreV1().Services(svc.Namespace).Create(context.TODO(), svc, metav1.CreateOptions{})
2546+
require.NoError(t, err)
2547+
2548+
nodes, err := createAndInsertNodes(gce, []string{"test-node-1"}, vals.ZoneName)
2549+
require.NoError(t, err)
2550+
2551+
// Case 1: Success
2552+
_, err = gce.ensureExternalLoadBalancer(vals.ClusterName, vals.ClusterID, svc, nil, nodes)
2553+
assert.NoError(t, err)
2554+
2555+
// We expect 1 success, and deny firewall None (default)
2556+
lm.exportNetLBMetrics()
2557+
verifyL4NetLBMetric(t, 1, StatusSuccess, DenyFirewallStatusNone)
2558+
2559+
// Case 2: Enable deny firewall cleanup
2560+
gce.enableL4DenyFirewallRollbackCleanup = true
2561+
_, err = gce.ensureExternalLoadBalancer(vals.ClusterName, vals.ClusterID, svc, nil, nodes)
2562+
assert.NoError(t, err)
2563+
2564+
// We expect 1 success, and deny firewall Disabled
2565+
lm.exportNetLBMetrics()
2566+
verifyL4NetLBMetric(t, 1, StatusSuccess, DenyFirewallStatusDisabled)
2567+
2568+
// Case 3: Enable deny firewall
2569+
gce.enableL4DenyFirewallRule = true
2570+
_, err = gce.ensureExternalLoadBalancer(vals.ClusterName, vals.ClusterID, svc, nil, nodes)
2571+
assert.NoError(t, err)
2572+
2573+
// We expect 1 success, and deny firewall IPv4
2574+
lm.exportNetLBMetrics()
2575+
verifyL4NetLBMetric(t, 1, StatusSuccess, DenyFirewallStatusIPv4)
2576+
2577+
// Case 4: Error on fetch
2578+
mockGCE := gce.Compute().(*cloud.MockGCE)
2579+
mockGCE.MockFirewalls.GetHook = func(ctx context.Context, key *meta.Key, m *cloud.MockFirewalls, options ...cloud.Option) (bool, *compute.Firewall, error) {
2580+
return true, nil, fmt.Errorf("error on fetch")
2581+
}
2582+
_, err = gce.ensureExternalLoadBalancer(vals.ClusterName, vals.ClusterID, svc, nil, nodes)
2583+
assert.Error(t, err)
2584+
2585+
// We expect 1 error, and deny firewall IPv4
2586+
lm.exportNetLBMetrics()
2587+
verifyL4NetLBMetric(t, 1, StatusError, DenyFirewallStatusNone)
2588+
2589+
// Clear mock
2590+
mockGCE.MockFirewalls.GetHook = nil
2591+
2592+
// Case 5: Delete
2593+
err = gce.ensureExternalLoadBalancerDeleted(vals.ClusterName, vals.ClusterID, svc)
2594+
assert.NoError(t, err)
2595+
2596+
// Now verify success count is 0 (since we deleted the success service)
2597+
lm.exportNetLBMetrics()
2598+
verifyL4NetLBMetric(t, 0, StatusError, DenyFirewallStatusNone)
2599+
}

providers/gce/gce_loadbalancer_metrics.go

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,19 +43,29 @@ var (
4343
},
4444
[]string{label},
4545
)
46+
l4NetLBCount = metrics.NewGaugeVec(
47+
&metrics.GaugeOpts{
48+
Name: "number_of_l4_netlbs",
49+
Help: "Metric containing the number of NetLBs that can be filtered by feature labels and status",
50+
},
51+
[]string{"status", "deny_firewall"},
52+
)
4653
)
4754

4855
// init registers L4 internal loadbalancer usage metrics.
4956
func init() {
5057
klog.V(3).Infof("Registering Service Controller loadbalancer usage metrics %v", l4ILBCount)
5158
legacyregistry.MustRegister(l4ILBCount)
59+
klog.V(3).Infof("Registering Service Controller loadbalancer usage metrics %v", l4NetLBCount)
60+
legacyregistry.MustRegister(l4NetLBCount)
5261
}
5362

5463
// LoadBalancerMetrics is a cache that contains loadbalancer service resource
5564
// states for computing usage metrics.
5665
type LoadBalancerMetrics struct {
5766
// l4ILBServiceMap is a map of service key and L4 ILB service state.
5867
l4ILBServiceMap map[string]L4ILBServiceState
68+
l4NetLBMap map[string]L4NetLBServiceState
5969

6070
sync.Mutex
6171
}
@@ -97,13 +107,18 @@ type loadbalancerMetricsCollector interface {
97107
SetL4ILBService(svcKey string, state L4ILBServiceState)
98108
// DeleteL4ILBService removes the given L4 ILB service key.
99109
DeleteL4ILBService(svcKey string)
110+
// SetL4NetLBService adds/updates L4 NetLB service state for given service key.
111+
SetL4NetLBService(svcKey string, state L4NetLBServiceState)
112+
// DeleteL4NetLBService removes the given L4 NetLB service key.
113+
DeleteL4NetLBService(svcKey string)
100114
}
101115

102116
// newLoadBalancerMetrics initializes LoadBalancerMetrics and starts a goroutine
103117
// to compute and export metrics periodically.
104118
func newLoadBalancerMetrics() loadbalancerMetricsCollector {
105119
return &LoadBalancerMetrics{
106120
l4ILBServiceMap: make(map[string]L4ILBServiceState),
121+
l4NetLBMap: make(map[string]L4NetLBServiceState),
107122
}
108123
}
109124

@@ -140,6 +155,11 @@ func (lm *LoadBalancerMetrics) DeleteL4ILBService(svcKey string) {
140155

141156
// export computes and exports loadbalancer usage metrics.
142157
func (lm *LoadBalancerMetrics) export() {
158+
lm.exportILBMetrics()
159+
lm.exportNetLBMetrics()
160+
}
161+
162+
func (lm *LoadBalancerMetrics) exportILBMetrics() {
143163
ilbCount := lm.computeL4ILBMetrics()
144164
klog.V(5).Infof("Exporting L4 ILB usage metrics: %#v", ilbCount)
145165
for feature, count := range ilbCount {
@@ -180,3 +200,60 @@ func (lm *LoadBalancerMetrics) computeL4ILBMetrics() map[feature]int {
180200
klog.V(4).Info("L4 ILB usage metrics computed.")
181201
return counts
182202
}
203+
204+
// L4ServiceStatus denotes the status of the service
205+
type L4ServiceStatus string
206+
207+
// L4ServiceStatus denotes the status of the service
208+
const (
209+
StatusSuccess = L4ServiceStatus("Success")
210+
StatusUserError = L4ServiceStatus("UserError")
211+
StatusError = L4ServiceStatus("Error")
212+
StatusPersistentError = L4ServiceStatus("PersistentError")
213+
)
214+
215+
// DenyFirewallStatus represents IP stack used when the deny firewalls are provisioned.
216+
type DenyFirewallStatus string
217+
218+
// DenyFirewallStatus represents IP stack used when the deny firewalls are provisioned.
219+
const (
220+
DenyFirewallStatusUnknown = DenyFirewallStatus("UNKNOWN") // Shouldn't happen, but if it does something is wrong.
221+
DenyFirewallStatusNone = DenyFirewallStatus("") // Case when no firewalls have been provisioned yet or when the feature has not been enabled explicitly
222+
DenyFirewallStatusDisabled = DenyFirewallStatus("DISABLED") // Case to mark when the feature has been enabled then explicitly disabled - for example when the feature is rolled back
223+
DenyFirewallStatusIPv4 = DenyFirewallStatus("IPv4")
224+
)
225+
226+
type L4NetLBServiceState struct {
227+
Status L4ServiceStatus
228+
DenyFirewall DenyFirewallStatus
229+
}
230+
231+
// SetL4NetLBService patches information about L4 NetLB
232+
func (lm *LoadBalancerMetrics) SetL4NetLBService(svcKey string, state L4NetLBServiceState) {
233+
lm.Lock()
234+
defer lm.Unlock()
235+
236+
lm.l4NetLBMap[svcKey] = state
237+
}
238+
239+
// DeleteL4NetLBService removes the given L4 NetLB service key.
240+
func (lm *LoadBalancerMetrics) DeleteL4NetLBService(svcKey string) {
241+
lm.Lock()
242+
defer lm.Unlock()
243+
244+
delete(lm.l4NetLBMap, svcKey)
245+
}
246+
247+
// exportNetLBMetrics computes and exports loadbalancer usage metrics.
248+
func (lm *LoadBalancerMetrics) exportNetLBMetrics() {
249+
lm.Lock()
250+
defer lm.Unlock()
251+
252+
klog.Info("Exporting L4 NetLB usage metrics for services", "serviceCount", len(lm.l4NetLBMap))
253+
254+
l4NetLBCount.Reset()
255+
for _, svcState := range lm.l4NetLBMap {
256+
l4NetLBCount.WithLabelValues(string(svcState.Status), string(svcState.DenyFirewall)).Inc()
257+
}
258+
klog.Info("L4 NetLB usage metrics exported")
259+
}

providers/gce/gce_loadbalancer_metrics_test.go

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import (
2424
"testing"
2525

2626
"github.com/google/go-cmp/cmp"
27+
"k8s.io/component-base/metrics/testutil"
2728
)
2829

2930
func TestComputeL4ILBMetrics(t *testing.T) {
@@ -168,3 +169,63 @@ func newL4ILBServiceState(globalAccess, customSubnet, inSuccess bool) L4ILBServi
168169
InSuccess: inSuccess,
169170
}
170171
}
172+
173+
func TestL4NetLBMetrics(t *testing.T) {
174+
metrics := newLoadBalancerMetrics()
175+
// Cast to *LoadBalancerMetrics to access methods
176+
lbMetrics, ok := metrics.(*LoadBalancerMetrics)
177+
if !ok {
178+
t.Fatalf("Failed to cast loadbalancerMetricsCollector to *LoadBalancerMetrics")
179+
}
180+
181+
lbMetrics.SetL4NetLBService("svc-success-ipv4", L4NetLBServiceState{
182+
Status: StatusSuccess,
183+
DenyFirewall: DenyFirewallStatusIPv4,
184+
})
185+
lbMetrics.SetL4NetLBService("svc-success-ipv4-2", L4NetLBServiceState{
186+
Status: StatusSuccess,
187+
DenyFirewall: DenyFirewallStatusIPv4,
188+
})
189+
lbMetrics.SetL4NetLBService("svc-success-disabled", L4NetLBServiceState{
190+
Status: StatusSuccess,
191+
DenyFirewall: DenyFirewallStatusDisabled,
192+
})
193+
lbMetrics.SetL4NetLBService("svc-error-none", L4NetLBServiceState{
194+
Status: StatusError,
195+
DenyFirewall: DenyFirewallStatusNone,
196+
})
197+
lbMetrics.SetL4NetLBService("svc-user-error-none", L4NetLBServiceState{
198+
Status: StatusUserError,
199+
DenyFirewall: DenyFirewallStatusNone,
200+
})
201+
lbMetrics.SetL4NetLBService("svc-persistent-error-none", L4NetLBServiceState{
202+
Status: StatusPersistentError,
203+
DenyFirewall: DenyFirewallStatusNone,
204+
})
205+
206+
// Add keys to be checked for deletion
207+
lbMetrics.SetL4NetLBService("svc-to-delete", L4NetLBServiceState{
208+
Status: StatusSuccess,
209+
DenyFirewall: DenyFirewallStatusNone,
210+
})
211+
lbMetrics.DeleteL4NetLBService("svc-to-delete")
212+
213+
lbMetrics.exportNetLBMetrics()
214+
215+
verifyL4NetLBMetric(t, 2, StatusSuccess, DenyFirewallStatusIPv4)
216+
verifyL4NetLBMetric(t, 1, StatusSuccess, DenyFirewallStatusDisabled)
217+
verifyL4NetLBMetric(t, 1, StatusError, DenyFirewallStatusNone)
218+
verifyL4NetLBMetric(t, 1, StatusUserError, DenyFirewallStatusNone)
219+
verifyL4NetLBMetric(t, 1, StatusPersistentError, DenyFirewallStatusNone)
220+
}
221+
222+
func verifyL4NetLBMetric(t *testing.T, expectedCount int, status L4ServiceStatus, denyFirewall DenyFirewallStatus) {
223+
t.Helper()
224+
val, err := testutil.GetGaugeMetricValue(l4NetLBCount.WithLabelValues(string(status), string(denyFirewall)))
225+
if err != nil {
226+
t.Errorf("Failed to get metric value: %v", err)
227+
}
228+
if int(val) != expectedCount {
229+
t.Errorf("Expected count %d but got %d for status %s, denyFirewall %s", expectedCount, int(val), status, denyFirewall)
230+
}
231+
}

0 commit comments

Comments
 (0)