Skip to content

Commit d01674b

Browse files
authored
metrics: introduce metrics based on the NNS (#1414)
This PR introduces a small set of metrics based on the NodeNetworkState, i.e. `kubernetes_nmstate_network_interfaces` and `kubernetes_nmstate_routes`. Those track number of network interfaces and number of routes for every node in the cluster where nmstate handler pods are running. This creates a skeleton for k-nmstate to create metrics based on the effective state of the network on the system and not only based on the content of NNCP. In the future it may be used to create statistics about network topologies of clusters. Signed-off-by: Mat Kowalski <[email protected]>
1 parent d3202c8 commit d01674b

File tree

7 files changed

+694
-1
lines changed

7 files changed

+694
-1
lines changed

cmd/handler/main.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,8 @@ func init() {
8383
// +kubebuilder:scaffold:scheme
8484

8585
metrics.Registry.MustRegister(monitoring.AppliedFeatures)
86+
metrics.Registry.MustRegister(monitoring.NetworkInterfaces)
87+
metrics.Registry.MustRegister(monitoring.NetworkRoutes)
8688
}
8789

8890
func main() {
@@ -390,6 +392,17 @@ func setupMetricsManager(mgr manager.Manager) error {
390392
setupLog.Error(err, "unable to create NodeNetworkConfigurationEnactment metrics controller", "metrics", "NMState")
391393
return err
392394
}
395+
396+
setupLog.Info("Creating Metrics NodeNetworkState controller")
397+
if err := (&controllersmetrics.NodeNetworkStateReconciler{
398+
Client: mgr.GetClient(),
399+
Log: ctrl.Log.WithName("metrics").WithName("NodeNetworkState"),
400+
Scheme: mgr.GetScheme(),
401+
}).SetupWithManager(mgr); err != nil {
402+
setupLog.Error(err, "unable to create NodeNetworkState metrics controller", "metrics", "NMState")
403+
return err
404+
}
405+
393406
return nil
394407
}
395408

Lines changed: 217 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,217 @@
1+
/*
2+
Copyright The Kubernetes NMState Authors.
3+
4+
5+
Licensed under the Apache License, Version 2.0 (the "License");
6+
you may not use this file except in compliance with the License.
7+
You may obtain a copy of the License at
8+
9+
http://www.apache.org/licenses/LICENSE-2.0
10+
11+
Unless required by applicable law or agreed to in writing, software
12+
distributed under the License is distributed on an "AS IS" BASIS,
13+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
See the License for the specific language governing permissions and
15+
limitations under the License.
16+
*/
17+
18+
package metrics
19+
20+
import (
21+
"context"
22+
23+
"github.com/go-logr/logr"
24+
"github.com/pkg/errors"
25+
"github.com/prometheus/client_golang/prometheus"
26+
27+
apierrors "k8s.io/apimachinery/pkg/api/errors"
28+
"k8s.io/apimachinery/pkg/runtime"
29+
ctrl "sigs.k8s.io/controller-runtime"
30+
"sigs.k8s.io/controller-runtime/pkg/client"
31+
"sigs.k8s.io/controller-runtime/pkg/event"
32+
"sigs.k8s.io/controller-runtime/pkg/predicate"
33+
34+
nmstatev1beta1 "github.com/nmstate/kubernetes-nmstate/api/v1beta1"
35+
"github.com/nmstate/kubernetes-nmstate/pkg/monitoring"
36+
"github.com/nmstate/kubernetes-nmstate/pkg/state"
37+
)
38+
39+
// NodeNetworkStateReconciler reconciles a NodeNetworkState object for metrics
40+
type NodeNetworkStateReconciler struct {
41+
client.Client
42+
Log logr.Logger
43+
Scheme *runtime.Scheme
44+
// Track interface types per node to clean up stale metrics
45+
oldInterfaceTypes map[string]map[string]struct{} // node name -> set of interface types
46+
// Track route keys per node to clean up stale metrics
47+
oldRouteKeys map[string]map[state.RouteKey]struct{} // node name -> set of route keys
48+
}
49+
50+
// Reconcile reads the state of the cluster for a NodeNetworkState object and calculates
51+
// metrics for network interface counts by type and node.
52+
func (r *NodeNetworkStateReconciler) Reconcile(ctx context.Context, request ctrl.Request) (ctrl.Result, error) {
53+
log := r.Log.WithValues("metrics.nodenetworkstate", request.NamespacedName)
54+
log.Info("Reconcile")
55+
56+
nodeName := request.Name
57+
58+
nnsInstance := &nmstatev1beta1.NodeNetworkState{}
59+
err := r.Client.Get(ctx, request.NamespacedName, nnsInstance)
60+
if err != nil {
61+
if apierrors.IsNotFound(err) {
62+
// NNS has been deleted, clean up metrics for this node
63+
r.deleteNodeMetrics(nodeName)
64+
return ctrl.Result{}, nil
65+
}
66+
log.Error(err, "Error retrieving NodeNetworkState")
67+
return ctrl.Result{}, err
68+
}
69+
70+
// Count interfaces by type for this node
71+
counts, err := state.CountInterfacesByType(nnsInstance.Status.CurrentState)
72+
if err != nil {
73+
log.Error(err, "Failed to count interfaces by type")
74+
return ctrl.Result{}, err
75+
}
76+
77+
// Update interface metrics for this node
78+
r.updateNodeInterfaceMetrics(nodeName, counts)
79+
80+
// Count routes by IP stack and type for this node
81+
routeCounts, err := state.CountRoutes(nnsInstance.Status.CurrentState)
82+
if err != nil {
83+
log.Error(err, "Failed to count routes")
84+
return ctrl.Result{}, err
85+
}
86+
87+
// Update route metrics for this node
88+
r.updateNodeRouteMetrics(nodeName, routeCounts)
89+
90+
return ctrl.Result{}, nil
91+
}
92+
93+
func (r *NodeNetworkStateReconciler) SetupWithManager(mgr ctrl.Manager) error {
94+
r.oldInterfaceTypes = make(map[string]map[string]struct{})
95+
r.oldRouteKeys = make(map[string]map[state.RouteKey]struct{})
96+
97+
onCreationOrUpdateForThisNNS := predicate.Funcs{
98+
CreateFunc: func(createEvent event.CreateEvent) bool {
99+
return true
100+
},
101+
DeleteFunc: func(e event.DeleteEvent) bool {
102+
return true
103+
},
104+
UpdateFunc: func(e event.UpdateEvent) bool {
105+
oldNNS, ok := e.ObjectOld.(*nmstatev1beta1.NodeNetworkState)
106+
if !ok {
107+
return false
108+
}
109+
newNNS, ok := e.ObjectNew.(*nmstatev1beta1.NodeNetworkState)
110+
if !ok {
111+
return false
112+
}
113+
114+
// Reconcile if the current state has changed
115+
return oldNNS.Status.CurrentState.String() != newNNS.Status.CurrentState.String()
116+
},
117+
GenericFunc: func(event.GenericEvent) bool {
118+
return false
119+
},
120+
}
121+
122+
err := ctrl.NewControllerManagedBy(mgr).
123+
For(&nmstatev1beta1.NodeNetworkState{}).
124+
WithEventFilter(onCreationOrUpdateForThisNNS).
125+
Complete(r)
126+
if err != nil {
127+
return errors.Wrap(err, "failed to add controller to NNS metrics Reconciler")
128+
}
129+
130+
return nil
131+
}
132+
133+
// updateNodeInterfaceMetrics sets the interface count metrics for a specific node
134+
func (r *NodeNetworkStateReconciler) updateNodeInterfaceMetrics(nodeName string, counts map[string]int) {
135+
// Get the old interface types for this node to detect removed types
136+
oldTypes := r.oldInterfaceTypes[nodeName]
137+
newTypes := make(map[string]struct{})
138+
139+
// Set metrics for current interface types
140+
for ifaceType, count := range counts {
141+
monitoring.NetworkInterfaces.With(prometheus.Labels{
142+
"type": ifaceType,
143+
"node": nodeName,
144+
}).Set(float64(count))
145+
newTypes[ifaceType] = struct{}{}
146+
}
147+
148+
// Delete metrics for interface types that no longer exist on this node
149+
for oldType := range oldTypes {
150+
if _, exists := newTypes[oldType]; !exists {
151+
monitoring.NetworkInterfaces.Delete(prometheus.Labels{
152+
"type": oldType,
153+
"node": nodeName,
154+
})
155+
}
156+
}
157+
158+
// Store current types for next reconcile
159+
r.oldInterfaceTypes[nodeName] = newTypes
160+
}
161+
162+
// updateNodeRouteMetrics sets the route count metrics for a specific node
163+
func (r *NodeNetworkStateReconciler) updateNodeRouteMetrics(nodeName string, counts map[state.RouteKey]int) {
164+
// Get the old route keys for this node to detect removed keys
165+
oldKeys := r.oldRouteKeys[nodeName]
166+
newKeys := make(map[state.RouteKey]struct{})
167+
168+
// Set metrics for current route keys
169+
for key, count := range counts {
170+
monitoring.NetworkRoutes.With(prometheus.Labels{
171+
"node": nodeName,
172+
"ip_stack": key.IPStack,
173+
"type": key.Type,
174+
}).Set(float64(count))
175+
newKeys[key] = struct{}{}
176+
}
177+
178+
// Delete metrics for route keys that no longer exist on this node
179+
for oldKey := range oldKeys {
180+
if _, exists := newKeys[oldKey]; !exists {
181+
monitoring.NetworkRoutes.Delete(prometheus.Labels{
182+
"node": nodeName,
183+
"ip_stack": oldKey.IPStack,
184+
"type": oldKey.Type,
185+
})
186+
}
187+
}
188+
189+
// Store current keys for next reconcile
190+
r.oldRouteKeys[nodeName] = newKeys
191+
}
192+
193+
// deleteNodeMetrics removes all interface and route count metrics for a specific node
194+
func (r *NodeNetworkStateReconciler) deleteNodeMetrics(nodeName string) {
195+
// Delete interface metrics
196+
if oldTypes, ok := r.oldInterfaceTypes[nodeName]; ok {
197+
for ifaceType := range oldTypes {
198+
monitoring.NetworkInterfaces.Delete(prometheus.Labels{
199+
"type": ifaceType,
200+
"node": nodeName,
201+
})
202+
}
203+
delete(r.oldInterfaceTypes, nodeName)
204+
}
205+
206+
// Delete route metrics
207+
if oldKeys, ok := r.oldRouteKeys[nodeName]; ok {
208+
for key := range oldKeys {
209+
monitoring.NetworkRoutes.Delete(prometheus.Labels{
210+
"node": nodeName,
211+
"ip_stack": key.IPStack,
212+
"type": key.Type,
213+
})
214+
}
215+
delete(r.oldRouteKeys, nodeName)
216+
}
217+
}

pkg/monitoring/metrics.go

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,12 +29,35 @@ var (
2929
Help: "Number of nmstate features applied labeled by its name",
3030
}
3131

32+
NetworkInterfacesOpts = prometheus.GaugeOpts{
33+
Name: "kubernetes_nmstate_network_interfaces",
34+
Help: "Number of network interfaces labeled by its type",
35+
}
36+
37+
NetworkRoutesOpts = prometheus.GaugeOpts{
38+
Name: "kubernetes_nmstate_routes",
39+
Help: "Number of network routes labeled by node, IP stack and type (static/dynamic)",
40+
}
41+
3242
AppliedFeatures = prometheus.NewGaugeVec(
3343
AppliedFeaturesOpts,
3444
[]string{"name"},
3545
)
46+
47+
NetworkInterfaces = prometheus.NewGaugeVec(
48+
NetworkInterfacesOpts,
49+
[]string{"type", "node"},
50+
)
51+
52+
NetworkRoutes = prometheus.NewGaugeVec(
53+
NetworkRoutesOpts,
54+
[]string{"node", "ip_stack", "type"},
55+
)
56+
3657
gaugeOpts = []prometheus.GaugeOpts{
3758
AppliedFeaturesOpts,
59+
NetworkInterfacesOpts,
60+
NetworkRoutesOpts,
3861
}
3962
)
4063

pkg/state/filter.go

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ limitations under the License.
1818
package state
1919

2020
import (
21+
"strings"
22+
2123
"github.com/nmstate/kubernetes-nmstate/api/shared"
2224
"github.com/nmstate/kubernetes-nmstate/pkg/environment"
2325

@@ -38,6 +40,76 @@ func FilterOut(currentState shared.State) (shared.State, error) {
3840
return filterOut(currentState)
3941
}
4042

43+
// CountInterfacesByType parses the state and returns a map of interface type to count.
44+
func CountInterfacesByType(currentState shared.State) (map[string]int, error) {
45+
var state rootState
46+
if err := yaml.Unmarshal(currentState.Raw, &state); err != nil {
47+
return nil, err
48+
}
49+
50+
counts := make(map[string]int)
51+
for _, iface := range state.Interfaces {
52+
if iface.Type != "" {
53+
counts[iface.Type]++
54+
}
55+
}
56+
return counts, nil
57+
}
58+
59+
// RouteKey represents the grouping key for route metrics.
60+
type RouteKey struct {
61+
IPStack string // "ipv4" or "ipv6"
62+
Type string // "static" or "dynamic"
63+
}
64+
65+
// CountRoutes parses the state and returns a map of RouteKey to count.
66+
// Routes are categorized by:
67+
// - IP stack: determined by presence of ":" in destination (ipv6) or not (ipv4)
68+
// - Type: "static" if route exists in routes.config, "dynamic" if only in routes.running
69+
func CountRoutes(currentState shared.State) (map[RouteKey]int, error) {
70+
var state rootState
71+
if err := yaml.Unmarshal(currentState.Raw, &state); err != nil {
72+
return nil, err
73+
}
74+
75+
counts := make(map[RouteKey]int)
76+
if state.Routes == nil {
77+
return counts, nil
78+
}
79+
80+
// Build a set of static route destinations for quick lookup
81+
staticRoutes := make(map[string]struct{})
82+
for _, route := range state.Routes.Config {
83+
staticRoutes[route.Destination] = struct{}{}
84+
}
85+
86+
// Count running routes by IP stack and type
87+
for _, route := range state.Routes.Running {
88+
ipStack := getIPStack(route.Destination)
89+
routeType := "dynamic"
90+
if _, isStatic := staticRoutes[route.Destination]; isStatic {
91+
routeType = "static"
92+
}
93+
94+
key := RouteKey{
95+
IPStack: ipStack,
96+
Type: routeType,
97+
}
98+
counts[key]++
99+
}
100+
101+
return counts, nil
102+
}
103+
104+
// getIPStack determines the IP stack from a destination CIDR.
105+
// Returns "ipv6" if the destination contains ":", otherwise "ipv4".
106+
func getIPStack(destination string) string {
107+
if strings.Contains(destination, ":") {
108+
return "ipv6"
109+
}
110+
return "ipv4"
111+
}
112+
41113
func filterOutRoutes(routes []routeState, filteredInterfaces []interfaceState) []routeState {
42114
filteredRoutes := []routeState{}
43115
for _, route := range routes {

0 commit comments

Comments
 (0)