Skip to content

Commit 4ab46d5

Browse files
committed
Add test to check if mr operator pod oomkilled
1 parent ac161b0 commit 4ab46d5

1 file changed

Lines changed: 36 additions & 0 deletions

File tree

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
import shlex
2+
from simple_logger.logger import get_logger
3+
4+
import pytest
5+
from ocp_utilities.monitoring import Prometheus
6+
from pyhelper_utils.shell import run_command
7+
8+
LOGGER = get_logger(name=__name__)
9+
10+
11+
def get_prometheus_k8s_token(duration: str = "1800s") -> str:
12+
token_command = f"oc create token prometheus-k8s -n openshift-monitoring --duration={duration}"
13+
command_success, out, _ = run_command(command=shlex.split(token_command), verify_stderr=False)
14+
assert command_success, f"Command {token_command} failed to execute"
15+
return out
16+
17+
18+
@pytest.fixture(scope="session")
19+
def prometheus_for_monitoring() -> Prometheus:
20+
return Prometheus(
21+
verify_ssl=False,
22+
bearer_token=get_prometheus_k8s_token(duration="86400s"),
23+
)
24+
25+
26+
@pytest.mark.order("last")
27+
def test_mr_operator_not_oomkilled(prometheus_for_monitoring: Prometheus):
28+
result = prometheus_for_monitoring.query_sampler(
29+
query='kube_pod_container_status_last_terminated_reason{reason="OOMKilled"}'
30+
)
31+
if result:
32+
for entry in result:
33+
LOGGER.info(entry)
34+
pod_name = entry["metric"]["pod"]
35+
if pod_name.startswith("model-registry-operator-controller-manager"):
36+
pytest.fail(f"Pod {pod_name} was oomkilled: {entry}")

0 commit comments

Comments
 (0)