Skip to content

Commit 027b541

Browse files
RomainMouallamiro
authored andcommitted
Add needrestart_info.py to monitore needrestart. (prometheus-community#195)
Add needrestart_info.py to monitor needrestart Signed-off-by: RomainMou <[email protected]>
1 parent 6f76a9e commit 027b541

File tree

1 file changed

+172
-0
lines changed

1 file changed

+172
-0
lines changed

needrestart_info.py

Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,172 @@
1+
#!/usr/bin/env python3
2+
3+
"""
4+
Description: Expose metrics from needrestart.
5+
6+
This script runs needrestart in batch mode. It will never ask for input
7+
and will never restart or upgrade anything.
8+
9+
Dependencies: python >= 3.5, python3-prometheus-client, needrestart
10+
11+
Authors: RomainMou
12+
"""
13+
14+
import sys
15+
import time
16+
import subprocess
17+
from collections import Counter
18+
from enum import Enum
19+
20+
from prometheus_client import (
21+
CollectorRegistry,
22+
Gauge,
23+
generate_latest,
24+
)
25+
26+
27+
class KernelStatus(Enum):
28+
UNKNOWN = 0
29+
CURRENT = 1
30+
ABI_UPGRADE = 2
31+
VERSION_UPGRADE = 3
32+
33+
34+
class MicroCodeStatus(Enum):
35+
UNKNOWN = 0
36+
CURRENT = 1
37+
OBSOLETE = 2
38+
39+
40+
class NeedRestartData:
41+
def __init__(self, needrestart_output):
42+
# Some default value
43+
self.timestamp = int(time.time())
44+
self.version = None
45+
self.kernel_status = None
46+
self.microcode_status = None
47+
self.kernel_current_version = ""
48+
self.kernel_expected_version = ""
49+
self.microcode_current_version = ""
50+
self.microcode_expected_version = ""
51+
needrestart_counter = Counter()
52+
53+
# Parse the cmd output
54+
for line in needrestart_output.splitlines():
55+
key, value = line.split(": ", maxsplit=1)
56+
if key == "NEEDRESTART-VER":
57+
self.version = value
58+
# Kernel informations
59+
elif key == "NEEDRESTART-KCUR":
60+
self.kernel_current_version = value
61+
elif key == "NEEDRESTART-KEXP":
62+
self.kernel_expected_version = value
63+
elif key == "NEEDRESTART-KSTA":
64+
self.kernel_status = KernelStatus(int(value))
65+
# Microcode informations
66+
elif key == "NEEDRESTART-UCCUR":
67+
self.microcode_current_version = value
68+
elif key == "NEEDRESTART-UCEXP":
69+
self.microcode_expected_version = value
70+
elif key == "NEEDRESTART-UCSTA":
71+
self.microcode_status = MicroCodeStatus(int(value))
72+
# Count the others
73+
else:
74+
needrestart_counter.update({key})
75+
76+
self.services_count = needrestart_counter["NEEDRESTART-SVC"]
77+
self.containers_count = needrestart_counter["NEEDRESTART-CONT"]
78+
self.sessions_count = needrestart_counter["NEEDRESTART-SESS"]
79+
80+
81+
def write_timestamp(registry, needrestart_data):
82+
g = Gauge(
83+
"needrestart_timestamp_seconds",
84+
"information about the version and when it was last run",
85+
labelnames=["version"],
86+
registry=registry,
87+
)
88+
g.labels(needrestart_data.version).set(needrestart_data.timestamp)
89+
90+
91+
def write_kernel(registry, needrestart_data):
92+
if needrestart_data.kernel_status:
93+
e = Gauge(
94+
"needrestart_kernel_status_info",
95+
"information about the kernel status",
96+
labelnames=["current", "expected"],
97+
registry=registry,
98+
)
99+
e.labels(
100+
needrestart_data.kernel_current_version,
101+
needrestart_data.kernel_expected_version,
102+
).set(needrestart_data.kernel_status.value)
103+
104+
105+
def write_microcode(registry, needrestart_data):
106+
if needrestart_data.microcode_status:
107+
e = Gauge(
108+
"needrestart_microcode_status_info",
109+
"information about the microcode status",
110+
labelnames=["current", "expected"],
111+
registry=registry,
112+
)
113+
e.labels(
114+
needrestart_data.microcode_current_version,
115+
needrestart_data.microcode_expected_version,
116+
).set(needrestart_data.microcode_status.value)
117+
118+
119+
def write_services(registry, needrestart_data):
120+
g = Gauge(
121+
"needrestart_services_total",
122+
"number of services requiring a restart",
123+
registry=registry,
124+
)
125+
g.set(needrestart_data.services_count)
126+
127+
128+
def write_containers(registry, needrestart_data):
129+
g = Gauge(
130+
"needrestart_containers_total",
131+
"number of containers requiring a restart",
132+
registry=registry,
133+
)
134+
g.set(needrestart_data.containers_count)
135+
136+
137+
def write_sessions(registry, needrestart_data):
138+
g = Gauge(
139+
"needrestart_sessions_total",
140+
"number of sessions requiring a restart",
141+
registry=registry,
142+
)
143+
g.set(needrestart_data.sessions_count)
144+
145+
146+
def main():
147+
registry = CollectorRegistry()
148+
149+
try:
150+
needrestart_output = subprocess.run(
151+
["needrestart", "-b"], capture_output=True, text=True, check=True
152+
).stdout
153+
needrestart_data = NeedRestartData(needrestart_output)
154+
except subprocess.CalledProcessError as e:
155+
print(f"Error executing needrestart:\n{e}", file=sys.stderr)
156+
sys.exit(1)
157+
except Exception as e:
158+
print(f"An unexpected error occurred:\n{e}", file=sys.stderr)
159+
sys.exit(1)
160+
161+
write_timestamp(registry, needrestart_data)
162+
write_kernel(registry, needrestart_data)
163+
write_microcode(registry, needrestart_data)
164+
write_services(registry, needrestart_data)
165+
write_containers(registry, needrestart_data)
166+
write_sessions(registry, needrestart_data)
167+
168+
print(generate_latest(registry).decode(), end="")
169+
170+
171+
if __name__ == "__main__":
172+
main()

0 commit comments

Comments
 (0)