From bf06317e1c1024d208ed14d1a6f9933a1b98298e Mon Sep 17 00:00:00 2001 From: RomainMou <58464216+RomainMou@users.noreply.github.com> Date: Fri, 15 Dec 2023 16:47:49 +0100 Subject: [PATCH 1/7] Add needrestart_info.py to monitore needrestart. Signed-off-by: RomainMou <58464216+RomainMou@users.noreply.github.com> --- needrestart_info.py | 160 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 160 insertions(+) create mode 100644 needrestart_info.py diff --git a/needrestart_info.py b/needrestart_info.py new file mode 100644 index 00000000..a4d56b50 --- /dev/null +++ b/needrestart_info.py @@ -0,0 +1,160 @@ +#!/usr/bin/env python3 +# +# +# Description: Expose metrics from needrestart. +# +# This script runs needrestart in batch mode. It will never ask for input +# and will never restart or upgrade anything. +# +# Dependencies: python >= 3.5, python3-prometheus-client, needrestart +# +# Authors: RomainMou + +import time +import subprocess +from collections import Counter +from enum import Enum +from prometheus_client import ( + CollectorRegistry, + Gauge, + generate_latest, +) + + +class KernelStatus(Enum): + UNKNOWN = 0 + CURRENT = 1 + ABI_UPGRADE = 2 + VERSION_UPGRADE = 3 + + +class MicroCodeStatus(Enum): + UNKNOWN = 0 + CURRENT = 1 + OBSELETE = 2 + + +class NeedrestartParser: + def __init__(self, needrestart_output): + # Some default value + self.timestamp = int(time.time()) + self.version = None + self.kernel_status = None + self.microcode_status = None + self.kernel_current_version = "" + self.kernel_expected_version = "" + self.microcode_current_version = "" + self.microcode_expected_version = "" + needrestart_counter = Counter() + + # Parse the cmd output + for line in needrestart_output.stdout.decode().splitlines(): + key, value = line.split(": ") + if key == "NEEDRESTART-VER": + self.version = value + # Kernel informations + elif key == "NEEDRESTART-KCUR": + self.kernel_current_version = value + elif key == "NEEDRESTART-KEXP": + self.kernel_expected_version = value + elif key == "NEEDRESTART-KSTA": + self.kernel_status = KernelStatus(int(value)) + # Microcode informations + elif key == "NEEDRESTART-UCCUR": + self.microcode_current_version = value + elif key == "NEEDRESTART-UCEXP": + self.microcode_expected_version = value + elif key == "NEEDRESTART-UCSTA": + self.microcode_status = MicroCodeStatus(int(value)) + # Count the others + else: + needrestart_counter.update({key}) + + self.services_count = needrestart_counter["NEEDRESTART-SVC"] + self.containers_count = needrestart_counter["NEEDRESTART-CONT"] + self.sessions_count = needrestart_counter["NEEDRESTART-SESS"] + + +def _write_timestamp(registry, needrestart_data): + g = Gauge( + "needrestart_timestamp", + "information about the version and when it was last run", + labelnames=["version"], + registry=registry, + ) + g.labels(needrestart_data.version).set(needrestart_data.timestamp) + + +def _write_kernel(registry, needrestart_data): + if needrestart_data.kernel_status: + e = Gauge( + "needrestart_kernel_status", + "information about the kernel status", + labelnames=["current", "expected"], + registry=registry, + ) + e.labels( + needrestart_data.kernel_current_version, + needrestart_data.kernel_expected_version, + ).set(needrestart_data.kernel_status.value) + + +def _write_microcode(registry, needrestart_data): + if needrestart_data.microcode_status: + e = Gauge( + "needrestart_microcode_status", + "information about the microcode status", + labelnames=["current", "expected"], + registry=registry, + ) + e.labels( + needrestart_data.microcode_current_version, + needrestart_data.microcode_expected_version, + ).set(needrestart_data.microcode_status.value) + + +def _write_services(registry, needrestart_data): + g = Gauge( + "needrestart_services_count", + "number of services requiring a restart", + registry=registry, + ) + g.set(needrestart_data.services_count) + + +def _write_containers(registry, needrestart_data): + g = Gauge( + "needrestart_containers_count", + "number of containers requiring a restart", + registry=registry, + ) + g.set(needrestart_data.containers_count) + + +def _write_sessions(registry, needrestart_data): + g = Gauge( + "needrestart_sessions_count", + "number of sessions requiring a restart", + registry=registry, + ) + g.set(needrestart_data.sessions_count) + + +def _main(): + registry = CollectorRegistry() + needrestart_data = NeedrestartParser( + subprocess.run(["needrestart", "-b"], stdout=subprocess.PIPE) + ) + + _write_timestamp(registry, needrestart_data) + _write_kernel(registry, needrestart_data) + _write_microcode(registry, needrestart_data) + _write_services(registry, needrestart_data) + _write_containers(registry, needrestart_data) + _write_sessions(registry, needrestart_data) + + print(generate_latest(registry).decode(), end="") + + +if __name__ == "__main__": + _main() From d80564725f21378a35ee11c753c73db8cdd77c6d Mon Sep 17 00:00:00 2001 From: RomainMou <58464216+RomainMou@users.noreply.github.com> Date: Mon, 3 Jun 2024 10:15:10 +0200 Subject: [PATCH 2/7] Some fixes and improvments Signed-off-by: RomainMou <58464216+RomainMou@users.noreply.github.com> --- needrestart_info.py | 64 ++++++++++++++++++++++++--------------------- 1 file changed, 34 insertions(+), 30 deletions(-) diff --git a/needrestart_info.py b/needrestart_info.py index a4d56b50..15242af6 100644 --- a/needrestart_info.py +++ b/needrestart_info.py @@ -1,19 +1,21 @@ #!/usr/bin/env python3 -# -# -# Description: Expose metrics from needrestart. -# -# This script runs needrestart in batch mode. It will never ask for input -# and will never restart or upgrade anything. -# -# Dependencies: python >= 3.5, python3-prometheus-client, needrestart -# -# Authors: RomainMou + +""" +Description: Expose metrics from needrestart. + +This script runs needrestart in batch mode. It will never ask for input +and will never restart or upgrade anything. + +Dependencies: python >= 3.5, python3-prometheus-client, needrestart + +Authors: RomainMou +""" import time import subprocess from collections import Counter from enum import Enum + from prometheus_client import ( CollectorRegistry, Gauge, @@ -34,7 +36,7 @@ class MicroCodeStatus(Enum): OBSELETE = 2 -class NeedrestartParser: +class NeedRestartData: def __init__(self, needrestart_output): # Some default value self.timestamp = int(time.time()) @@ -48,8 +50,8 @@ def __init__(self, needrestart_output): needrestart_counter = Counter() # Parse the cmd output - for line in needrestart_output.stdout.decode().splitlines(): - key, value = line.split(": ") + for line in needrestart_output.splitlines(): + key, value = line.split(": ", maxsplit=1) if key == "NEEDRESTART-VER": self.version = value # Kernel informations @@ -75,7 +77,7 @@ def __init__(self, needrestart_output): self.sessions_count = needrestart_counter["NEEDRESTART-SESS"] -def _write_timestamp(registry, needrestart_data): +def write_timestamp(registry, needrestart_data): g = Gauge( "needrestart_timestamp", "information about the version and when it was last run", @@ -85,7 +87,7 @@ def _write_timestamp(registry, needrestart_data): g.labels(needrestart_data.version).set(needrestart_data.timestamp) -def _write_kernel(registry, needrestart_data): +def write_kernel(registry, needrestart_data): if needrestart_data.kernel_status: e = Gauge( "needrestart_kernel_status", @@ -99,7 +101,7 @@ def _write_kernel(registry, needrestart_data): ).set(needrestart_data.kernel_status.value) -def _write_microcode(registry, needrestart_data): +def write_microcode(registry, needrestart_data): if needrestart_data.microcode_status: e = Gauge( "needrestart_microcode_status", @@ -113,7 +115,7 @@ def _write_microcode(registry, needrestart_data): ).set(needrestart_data.microcode_status.value) -def _write_services(registry, needrestart_data): +def write_services(registry, needrestart_data): g = Gauge( "needrestart_services_count", "number of services requiring a restart", @@ -122,7 +124,7 @@ def _write_services(registry, needrestart_data): g.set(needrestart_data.services_count) -def _write_containers(registry, needrestart_data): +def write_containers(registry, needrestart_data): g = Gauge( "needrestart_containers_count", "number of containers requiring a restart", @@ -131,7 +133,7 @@ def _write_containers(registry, needrestart_data): g.set(needrestart_data.containers_count) -def _write_sessions(registry, needrestart_data): +def write_sessions(registry, needrestart_data): g = Gauge( "needrestart_sessions_count", "number of sessions requiring a restart", @@ -140,21 +142,23 @@ def _write_sessions(registry, needrestart_data): g.set(needrestart_data.sessions_count) -def _main(): +def main(): registry = CollectorRegistry() - needrestart_data = NeedrestartParser( - subprocess.run(["needrestart", "-b"], stdout=subprocess.PIPE) - ) - _write_timestamp(registry, needrestart_data) - _write_kernel(registry, needrestart_data) - _write_microcode(registry, needrestart_data) - _write_services(registry, needrestart_data) - _write_containers(registry, needrestart_data) - _write_sessions(registry, needrestart_data) + needrestart_output = subprocess.run( + ["needrestart", "-b"], capture_output=True, text=True + ).stdout + needrestart_data = NeedRestartData(needrestart_output) + + write_timestamp(registry, needrestart_data) + write_kernel(registry, needrestart_data) + write_microcode(registry, needrestart_data) + write_services(registry, needrestart_data) + write_containers(registry, needrestart_data) + write_sessions(registry, needrestart_data) print(generate_latest(registry).decode(), end="") if __name__ == "__main__": - _main() + main() From 8689e68860edaac664e7a93cd9b3989beb810376 Mon Sep 17 00:00:00 2001 From: RomainMou <58464216+RomainMou@users.noreply.github.com> Date: Fri, 18 Oct 2024 17:50:07 +0200 Subject: [PATCH 3/7] Fix typo. Signed-off-by: RomainMou <58464216+RomainMou@users.noreply.github.com> --- needrestart_info.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/needrestart_info.py b/needrestart_info.py index 15242af6..05598a23 100644 --- a/needrestart_info.py +++ b/needrestart_info.py @@ -33,7 +33,7 @@ class KernelStatus(Enum): class MicroCodeStatus(Enum): UNKNOWN = 0 CURRENT = 1 - OBSELETE = 2 + OBSOLETE = 2 class NeedRestartData: From 2e1fae3e0c3ffc4c3ee56899b1d8b1c3234a757e Mon Sep 17 00:00:00 2001 From: RomainMou <58464216+RomainMou@users.noreply.github.com> Date: Fri, 18 Oct 2024 18:00:31 +0200 Subject: [PATCH 4/7] Rename metrics to follow best practices. Signed-off-by: RomainMou <58464216+RomainMou@users.noreply.github.com> --- needrestart_info.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/needrestart_info.py b/needrestart_info.py index 05598a23..3f17ec90 100644 --- a/needrestart_info.py +++ b/needrestart_info.py @@ -79,7 +79,7 @@ def __init__(self, needrestart_output): def write_timestamp(registry, needrestart_data): g = Gauge( - "needrestart_timestamp", + "needrestart_timestamp_seconds", "information about the version and when it was last run", labelnames=["version"], registry=registry, @@ -90,7 +90,7 @@ def write_timestamp(registry, needrestart_data): def write_kernel(registry, needrestart_data): if needrestart_data.kernel_status: e = Gauge( - "needrestart_kernel_status", + "needrestart_kernel_status_info", "information about the kernel status", labelnames=["current", "expected"], registry=registry, @@ -104,7 +104,7 @@ def write_kernel(registry, needrestart_data): def write_microcode(registry, needrestart_data): if needrestart_data.microcode_status: e = Gauge( - "needrestart_microcode_status", + "needrestart_microcode_status_info", "information about the microcode status", labelnames=["current", "expected"], registry=registry, @@ -117,7 +117,7 @@ def write_microcode(registry, needrestart_data): def write_services(registry, needrestart_data): g = Gauge( - "needrestart_services_count", + "needrestart_services_total", "number of services requiring a restart", registry=registry, ) @@ -126,7 +126,7 @@ def write_services(registry, needrestart_data): def write_containers(registry, needrestart_data): g = Gauge( - "needrestart_containers_count", + "needrestart_containers_total", "number of containers requiring a restart", registry=registry, ) @@ -135,7 +135,7 @@ def write_containers(registry, needrestart_data): def write_sessions(registry, needrestart_data): g = Gauge( - "needrestart_sessions_count", + "needrestart_sessions_total", "number of sessions requiring a restart", registry=registry, ) From ce87ff3fe5bccea14b879f1dfbbbd4e1fdb0b53a Mon Sep 17 00:00:00 2001 From: RomainMou <58464216+RomainMou@users.noreply.github.com> Date: Fri, 18 Oct 2024 18:13:34 +0200 Subject: [PATCH 5/7] Manage error when running needrestart. Signed-off-by: RomainMou <58464216+RomainMou@users.noreply.github.com> --- needrestart_info.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/needrestart_info.py b/needrestart_info.py index 3f17ec90..13808b64 100644 --- a/needrestart_info.py +++ b/needrestart_info.py @@ -145,10 +145,17 @@ def write_sessions(registry, needrestart_data): def main(): registry = CollectorRegistry() - needrestart_output = subprocess.run( - ["needrestart", "-b"], capture_output=True, text=True - ).stdout - needrestart_data = NeedRestartData(needrestart_output) + try: + needrestart_output = subprocess.run( + ["needrestart", "-b"], capture_output=True, text=True, check=True + ).stdout + needrestart_data = NeedRestartData(needrestart_output) + except subprocess.CalledProcessError as e: + print(f"Error executing needrestart:\n{e}") + sys.exit(1) + except Exception as e: + print(f"An unexpected error occurred:\n{e}") + sys.exit(1) write_timestamp(registry, needrestart_data) write_kernel(registry, needrestart_data) From 2c87495b4bf31fca1b535c548ec86fe65ab92dbe Mon Sep 17 00:00:00 2001 From: RomainMou <58464216+RomainMou@users.noreply.github.com> Date: Fri, 18 Oct 2024 18:20:40 +0200 Subject: [PATCH 6/7] Fix missing import. Signed-off-by: RomainMou <58464216+RomainMou@users.noreply.github.com> --- needrestart_info.py | 1 + 1 file changed, 1 insertion(+) diff --git a/needrestart_info.py b/needrestart_info.py index 13808b64..10c4d489 100644 --- a/needrestart_info.py +++ b/needrestart_info.py @@ -11,6 +11,7 @@ Authors: RomainMou """ +import sys import time import subprocess from collections import Counter From 3aa2b23b349fbbc6440a6e98924e5a109dbf3f2a Mon Sep 17 00:00:00 2001 From: RomainMou <58464216+RomainMou@users.noreply.github.com> Date: Mon, 21 Oct 2024 10:48:18 +0200 Subject: [PATCH 7/7] Ouput errors on stderr. Signed-off-by: RomainMou <58464216+RomainMou@users.noreply.github.com> --- needrestart_info.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/needrestart_info.py b/needrestart_info.py index 10c4d489..67970552 100644 --- a/needrestart_info.py +++ b/needrestart_info.py @@ -152,10 +152,10 @@ def main(): ).stdout needrestart_data = NeedRestartData(needrestart_output) except subprocess.CalledProcessError as e: - print(f"Error executing needrestart:\n{e}") + print(f"Error executing needrestart:\n{e}", file=sys.stderr) sys.exit(1) except Exception as e: - print(f"An unexpected error occurred:\n{e}") + print(f"An unexpected error occurred:\n{e}", file=sys.stderr) sys.exit(1) write_timestamp(registry, needrestart_data)