Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,12 +1,8 @@
import shlex
from simple_logger.logger import get_logger
Comment thread
fege marked this conversation as resolved.

import pytest
from ocp_utilities.monitoring import Prometheus
from pyhelper_utils.shell import run_command

LOGGER = get_logger(name=__name__)


def get_prometheus_k8s_token(duration: str = "1800s") -> str:
token_command = f"oc create token prometheus-k8s -n openshift-monitoring --duration={duration}"
Expand All @@ -21,16 +17,3 @@ def prometheus_for_monitoring() -> Prometheus:
verify_ssl=False,
bearer_token=get_prometheus_k8s_token(duration="86400s"),
)


@pytest.mark.order("last")
def test_mr_operator_not_oomkilled(prometheus_for_monitoring: Prometheus):
result = prometheus_for_monitoring.query_sampler(
query='kube_pod_container_status_last_terminated_reason{reason="OOMKilled"}'
)
if result:
for entry in result:
LOGGER.info(entry)
pod_name = entry["metric"]["pod"]
if pod_name.startswith("model-registry-operator-controller-manager"):
pytest.fail(f"Pod {pod_name} was oomkilled: {entry}")
19 changes: 19 additions & 0 deletions tests/model_registry/cluster_health/test_mr_operator_health.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from simple_logger.logger import get_logger

import pytest
from ocp_utilities.monitoring import Prometheus

LOGGER = get_logger(name=__name__)


@pytest.mark.order("last")
def test_mr_operator_not_oomkilled(prometheus_for_monitoring: Prometheus):
result = prometheus_for_monitoring.query_sampler(
query='kube_pod_container_status_last_terminated_reason{reason="OOMKilled"}'
)
if result:
for entry in result:
LOGGER.info(entry)
pod_name = entry["metric"]["pod"]
if pod_name.startswith("model-registry-operator-controller-manager"):
pytest.fail(f"Pod {pod_name} was oomkilled: {entry}")
Loading