@@ -5,57 +5,24 @@ import (
5
5
"log"
6
6
"net/http"
7
7
"os"
8
+ "sync"
8
9
"time"
9
10
10
11
"github.com/prometheus/client_golang/prometheus"
11
12
"github.com/prometheus/client_golang/prometheus/promhttp"
12
13
v1 "k8s.io/api/core/v1"
13
14
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
14
15
"k8s.io/apimachinery/pkg/fields"
15
- "k8s.io/apimachinery/pkg/watch"
16
16
"k8s.io/client-go/kubernetes"
17
17
"k8s.io/client-go/rest"
18
18
)
19
19
20
20
var (
21
- namespace = "kube_node_metrics"
22
- commonLabels = []string {"node" , "ip" }
23
-
24
- cpuRequests = prometheus .NewGaugeVec (
25
- prometheus.GaugeOpts {
26
- Namespace : namespace ,
27
- Name : "cpu_reqeust" ,
28
- Help : "Total CPU requests of all pods running on the node" ,
29
- },
30
- commonLabels ,
31
- )
32
-
33
- cpuLimits = prometheus .NewGaugeVec (
34
- prometheus.GaugeOpts {
35
- Namespace : namespace ,
36
- Name : "cpu_limit" ,
37
- Help : "Total CPU limits of all pods running on the node" ,
38
- },
39
- commonLabels ,
40
- )
41
-
42
- memRequests = prometheus .NewGaugeVec (
43
- prometheus.GaugeOpts {
44
- Namespace : namespace ,
45
- Name : "memory_request_bytes" ,
46
- Help : "Total memory requests of all pods running on the node" ,
47
- },
48
- commonLabels ,
49
- )
50
-
51
- memLimits = prometheus .NewGaugeVec (
52
- prometheus.GaugeOpts {
53
- Namespace : namespace ,
54
- Name : "memory_limit_bytes" ,
55
- Help : "Total memory limits of all pods running on the node" ,
56
- },
57
- commonLabels ,
58
- )
21
+ namespace = "kube_node_metrics"
22
+ metricNameCpuRequests = "cpu_requests"
23
+ metricNameCpuLimits = "cpu_limits"
24
+ metricNameMemRequests = "mem_requests"
25
+ metricNameMemLimits = "mem_limits"
59
26
60
27
lastFullSyncOkTimeSeconds = prometheus .NewGauge (
61
28
prometheus.GaugeOpts {
93
60
type nodeMeta struct {
94
61
ip string
95
62
name string
96
- }
97
63
98
- type nodeMetrics struct {
99
- cpuRequests prometheus.Gauge
100
- cpuLimits prometheus.Gauge
101
- memRequests prometheus.Gauge
102
- memLimits prometheus.Gauge
64
+ labels map [string ]string
103
65
}
104
66
105
67
type nodeResources struct {
@@ -108,10 +70,77 @@ type nodeResources struct {
108
70
memRequests float64
109
71
memLimits float64
110
72
111
- meta * nodeMeta
112
- metrics * nodeMetrics
73
+ meta * nodeMeta
74
+ }
75
+
76
+ type nodeResourceCollector struct {
77
+ name2nodeResources map [string ]* nodeResources
78
+
79
+ rwMut sync.RWMutex
80
+ }
81
+
82
+ func (n * nodeResourceCollector ) lockedUpdate (name2nodeResources map [string ]* nodeResources ) {
83
+ n .rwMut .Lock ()
84
+ defer n .rwMut .Unlock ()
85
+
86
+ n .name2nodeResources = name2nodeResources
87
+ }
88
+
89
+ // Collect implements prometheus.Collector.
90
+ func (n * nodeResourceCollector ) Collect (ch chan <- prometheus.Metric ) {
91
+ n .rwMut .RLock ()
92
+ defer n .rwMut .RUnlock ()
93
+
94
+ for _ , node := range n .name2nodeResources {
95
+ node .collect (ch )
96
+ }
97
+ }
98
+
99
+ func (nr * nodeResources ) collect (ch chan <- prometheus.Metric ) {
100
+ labels := nr .meta .labels
101
+
102
+ cpuRequest := prometheus .NewGauge (prometheus.GaugeOpts {
103
+ Namespace : namespace ,
104
+ Name : metricNameCpuRequests ,
105
+ Help : "Total CPU requests of all pods running on the node" ,
106
+ ConstLabels : labels ,
107
+ })
108
+ cpuLimit := prometheus .NewGauge (prometheus.GaugeOpts {
109
+ Namespace : namespace ,
110
+ Name : metricNameCpuLimits ,
111
+ Help : "Total CPU limits of all pods running on the node" ,
112
+ ConstLabels : labels ,
113
+ })
114
+ memRequest := prometheus .NewGauge (prometheus.GaugeOpts {
115
+ Namespace : namespace ,
116
+ Name : metricNameMemRequests ,
117
+ Help : "Total memory requests of all pods running on the node" ,
118
+ ConstLabels : labels ,
119
+ })
120
+ memLimit := prometheus .NewGauge (prometheus.GaugeOpts {
121
+ Namespace : namespace ,
122
+ Name : metricNameMemLimits ,
123
+ Help : "Total memory limits of all pods running on the node" ,
124
+ ConstLabels : labels ,
125
+ })
126
+
127
+ cpuRequest .Set (nr .cpuRequests )
128
+ cpuLimit .Set (nr .cpuLimits )
129
+ memRequest .Set (nr .memRequests )
130
+ memLimit .Set (nr .memLimits )
131
+
132
+ ch <- cpuRequest
133
+ ch <- cpuLimit
134
+ ch <- memRequest
135
+ ch <- memLimit
136
+ }
137
+
138
+ // Describe implements prometheus.Collector.
139
+ func (n * nodeResourceCollector ) Describe (chan <- * prometheus.Desc ) {
113
140
}
114
141
142
+ var _ prometheus.Collector = & nodeResourceCollector {}
143
+
115
144
var (
116
145
fullSyncInterval = 2 * time .Minute
117
146
@@ -120,10 +149,6 @@ var (
120
149
)
121
150
122
151
func init () {
123
- prometheus .MustRegister (cpuRequests )
124
- prometheus .MustRegister (cpuLimits )
125
- prometheus .MustRegister (memRequests )
126
- prometheus .MustRegister (memLimits )
127
152
prometheus .MustRegister (lastFullSyncOkTimeSeconds )
128
153
prometheus .MustRegister (fullSyncDurationSeconds )
129
154
prometheus .MustRegister (k8sApiLatencySeconds )
@@ -152,14 +177,16 @@ func main() {
152
177
153
178
log .Printf ("Full sync interval: %v" , fullSyncInterval )
154
179
180
+ collector := newNodeResourceCollector ()
181
+
155
182
go func () {
156
183
// TODO: set timeout
157
184
ctx := context .Background ()
158
185
159
186
lastFullSyncOk := time .Now ()
160
187
161
188
for {
162
- err := syncFullMetrics (ctx , clientset )
189
+ err := collector . syncFullMetrics (ctx , clientset )
163
190
if err != nil {
164
191
log .Printf ("Error collecting metrics: %v" , err )
165
192
}
@@ -174,26 +201,24 @@ func main() {
174
201
}
175
202
}()
176
203
177
- go watchNodes (clientset )
178
-
179
204
http .Handle ("/metrics" , promhttp .Handler ())
180
205
log .Fatal (http .ListenAndServe (":19191" , nil ))
181
206
}
182
207
208
+ func newNodeResourceCollector () * nodeResourceCollector {
209
+ return & nodeResourceCollector {
210
+ name2nodeResources : make (map [string ]* nodeResources ),
211
+ }
212
+ }
213
+
183
214
func newNodeMeta (node * v1.Node ) * nodeMeta {
184
215
return & nodeMeta {
185
216
ip : getNodeIp (node ),
186
217
name : node .Name ,
187
- }
188
- }
189
-
190
- func newNodeMetrics (meta * nodeMeta ) * nodeMetrics {
191
- labels := getNodeLabelValues (meta )
192
- return & nodeMetrics {
193
- cpuRequests : cpuRequests .WithLabelValues (labels ... ),
194
- cpuLimits : cpuLimits .WithLabelValues (labels ... ),
195
- memRequests : memRequests .WithLabelValues (labels ... ),
196
- memLimits : memLimits .WithLabelValues (labels ... ),
218
+ labels : map [string ]string {
219
+ "node" : node .Name ,
220
+ "ip" : getNodeIp (node ),
221
+ },
197
222
}
198
223
}
199
224
@@ -221,8 +246,7 @@ func getName2NodeResources(nodes []v1.Node) map[string]*nodeResources {
221
246
func newNodeResource (node * v1.Node ) * nodeResources {
222
247
meta := newNodeMeta (node )
223
248
return & nodeResources {
224
- meta : meta ,
225
- metrics : newNodeMetrics (meta ),
249
+ meta : meta ,
226
250
}
227
251
}
228
252
@@ -290,7 +314,7 @@ func listPods(ctx context.Context, clientset *kubernetes.Clientset) ([]v1.Pod, e
290
314
return allPods , nil
291
315
}
292
316
293
- func syncFullMetrics (ctx context.Context , clientset * kubernetes.Clientset ) error {
317
+ func ( collector * nodeResourceCollector ) syncFullMetrics (ctx context.Context , clientset * kubernetes.Clientset ) error {
294
318
beg := time .Now ()
295
319
defer func (beg time.Time ) {
296
320
end := time .Now ()
@@ -312,8 +336,8 @@ func syncFullMetrics(ctx context.Context, clientset *kubernetes.Clientset) error
312
336
}
313
337
314
338
updateResourcesByNode (allPods , name2nodeResources )
315
- updateNodeMetrics (name2nodeResources )
316
339
340
+ collector .lockedUpdate (name2nodeResources )
317
341
return nil
318
342
}
319
343
@@ -352,47 +376,3 @@ func updateResourcesByNode(pods []v1.Pod, n2r map[string]*nodeResources) {
352
376
}
353
377
}
354
378
}
355
-
356
- func updateNodeMetrics (name2node map [string ]* nodeResources ) {
357
- for _ , node := range name2node {
358
- node .metrics .cpuRequests .Set (node .cpuRequests )
359
- node .metrics .cpuLimits .Set (node .cpuLimits )
360
- node .metrics .memRequests .Set (node .memRequests )
361
- node .metrics .memLimits .Set (node .memLimits )
362
- }
363
- }
364
-
365
- func watchNodes (clientset * kubernetes.Clientset ) {
366
- watcher , err := clientset .CoreV1 ().Nodes ().Watch (context .TODO (), metav1.ListOptions {})
367
- if err != nil {
368
- log .Fatalf ("Error watching nodes: %v" , err )
369
- }
370
-
371
- for event := range watcher .ResultChan () {
372
- node , ok := event .Object .(* v1.Node )
373
- if ! ok {
374
- continue
375
- }
376
-
377
- switch event .Type {
378
- case watch .Deleted :
379
- deleteNodeMetrics (node )
380
- }
381
- }
382
- }
383
-
384
- func deleteNodeMetrics (node * v1.Node ) {
385
- labelValues := getNodeLabelValues (newNodeMeta (node ))
386
-
387
- cpuRequests .DeleteLabelValues (labelValues ... )
388
- cpuLimits .DeleteLabelValues (labelValues ... )
389
- memRequests .DeleteLabelValues (labelValues ... )
390
- memLimits .DeleteLabelValues (labelValues ... )
391
- }
392
-
393
- func getNodeLabelValues (meta * nodeMeta ) []string {
394
- return []string {
395
- meta .name ,
396
- meta .ip ,
397
- }
398
- }
0 commit comments