Skip to content

Commit 1bf3bc5

Browse files
[2.19] Add must-gather support (#548)
* [2.19] Add must-gather support * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent 5a2aba5 commit 1bf3bc5

File tree

10 files changed

+546
-11
lines changed

10 files changed

+546
-11
lines changed

.flake8

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ fcn_exclude_functions =
1919
re,
2020
logging,
2121
LOGGER,
22+
BASIC_LOGGER,
2223
os,
2324
json,
2425
pytest,

conftest.py

Lines changed: 96 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,24 +4,35 @@
44
import os
55
import pathlib
66
import shutil
7+
import datetime
8+
import traceback
79

810
import shortuuid
11+
from _pytest.runner import CallInfo
12+
from _pytest.reports import TestReport
913
from pytest import (
1014
Parser,
1115
Session,
1216
FixtureRequest,
1317
FixtureDef,
1418
Item,
19+
Collector,
1520
Config,
1621
CollectReport,
1722
)
1823
from _pytest.terminal import TerminalReporter
1924
from typing import Optional, Any
2025
from pytest_testconfig import config as py_config
21-
26+
from utilities.database import Database
2227
from utilities.constants import KServeDeploymentType
2328
from utilities.logger import separator, setup_logging
24-
29+
from utilities.must_gather_collector import (
30+
set_must_gather_collector_directory,
31+
set_must_gather_collector_values,
32+
get_must_gather_collector_dir,
33+
collect_rhoai_must_gather,
34+
get_base_dir,
35+
)
2536

2637
LOGGER = logging.getLogger(name=__name__)
2738
BASIC_LOGGER = logging.getLogger(name="basic")
@@ -194,14 +205,26 @@ def _add_upgrade_test(_item: Item, _upgrade_deployment_modes: list[str]) -> bool
194205

195206

196207
def pytest_sessionstart(session: Session) -> None:
197-
tests_log_file = session.config.getoption("log_file") or "pytest-tests.log"
208+
log_file = session.config.getoption("log_file") or "pytest-tests.log"
209+
tests_log_file = os.path.join(get_base_dir(), log_file)
210+
LOGGER.info(f"Writing tests log to {tests_log_file}")
198211
if os.path.exists(tests_log_file):
199212
pathlib.Path(tests_log_file).unlink()
200-
213+
if session.config.getoption("--collect-must-gather"):
214+
session.config.option.must_gather_db = Database()
201215
session.config.option.log_listener = setup_logging(
202216
log_file=tests_log_file,
203217
log_level=session.config.getoption("log_cli_level") or logging.INFO,
204218
)
219+
must_gather_dict = set_must_gather_collector_values()
220+
shutil.rmtree(
221+
path=must_gather_dict["must_gather_base_directory"],
222+
ignore_errors=True,
223+
)
224+
config = session.config
225+
if config.getoption("--collect-only") or config.getoption("--setup-plan"):
226+
LOGGER.info("Skipping global config update for collect-only or setup-plan")
227+
return
205228

206229
if py_config.get("distribution") == "upstream":
207230
py_config["applications_namespace"] = "opendatahub"
@@ -220,6 +243,21 @@ def pytest_runtest_setup(item: Item) -> None:
220243

221244
BASIC_LOGGER.info(f"\n{separator(symbol_='-', val=item.name)}")
222245
BASIC_LOGGER.info(f"{separator(symbol_='-', val='SETUP')}")
246+
if item.config.getoption("--collect-must-gather"):
247+
# set must-gather collection directory:
248+
set_must_gather_collector_directory(item=item, directory_path=get_must_gather_collector_dir())
249+
250+
# At the begining of setup work, insert current epoch time into the database to indicate test
251+
# start time
252+
253+
try:
254+
db = item.config.option.must_gather_db
255+
db.insert_test_start_time(
256+
test_name=f"{item.fspath}::{item.name}",
257+
start_time=int(datetime.datetime.now().timestamp()),
258+
)
259+
except Exception as db_exception:
260+
LOGGER.error(f"Database error: {db_exception}. Must-gather collection may not be accurate")
223261

224262
if KServeDeploymentType.SERVERLESS.lower() in item.keywords:
225263
item.fixturenames.insert(0, "skip_if_no_deployed_redhat_authorino_operator")
@@ -240,6 +278,10 @@ def pytest_runtest_call(item: Item) -> None:
240278

241279
def pytest_runtest_teardown(item: Item) -> None:
242280
BASIC_LOGGER.info(f"{separator(symbol_='-', val='TEARDOWN')}")
281+
# reset must-gather collector after each tests
282+
py_config["must_gather_collector"]["collector_directory"] = py_config["must_gather_collector"][
283+
"must_gather_base_directory"
284+
]
243285

244286

245287
def pytest_report_teststatus(report: CollectReport, config: Config) -> None:
@@ -261,13 +303,59 @@ def pytest_report_teststatus(report: CollectReport, config: Config) -> None:
261303

262304

263305
def pytest_sessionfinish(session: Session, exitstatus: int) -> None:
306+
session.config.option.log_listener.stop()
264307
if session.config.option.setupplan or session.config.option.collectonly:
265308
return
266-
267-
base_dir = py_config["tmp_base_dir"]
268-
LOGGER.info(f"Deleting pytest base dir {base_dir}")
269-
shutil.rmtree(path=base_dir, ignore_errors=True)
309+
if session.config.getoption("--collect-must-gather"):
310+
db = session.config.option.must_gather_db
311+
file_path = db.database_file_path
312+
LOGGER.info(f"Removing database file path {file_path}")
313+
if os.path.exists(file_path):
314+
os.remove(file_path)
315+
# clean up the empty folders
316+
collector_directory = py_config["must_gather_collector"]["must_gather_base_directory"]
317+
if os.path.exists(collector_directory):
318+
for root, dirs, files in os.walk(collector_directory, topdown=False):
319+
for _dir in dirs:
320+
dir_path = os.path.join(root, _dir)
321+
if not os.listdir(dir_path):
322+
shutil.rmtree(path=dir_path, ignore_errors=True)
323+
LOGGER.info(f"Deleting pytest base dir {session.config.option.basetemp}")
324+
shutil.rmtree(path=session.config.option.basetemp, ignore_errors=True)
270325

271326
reporter: Optional[TerminalReporter] = session.config.pluginmanager.get_plugin("terminalreporter")
272327
if reporter:
273328
reporter.summary_stats()
329+
330+
331+
def calculate_must_gather_timer(test_start_time: int) -> int:
332+
default_duration = 300
333+
if test_start_time > 0:
334+
duration = int(datetime.datetime.now().timestamp()) - test_start_time
335+
return duration if duration > 60 else default_duration
336+
else:
337+
LOGGER.warning(f"Could not get start time of test. Collecting must-gather for last {default_duration}s")
338+
return default_duration
339+
340+
341+
def pytest_exception_interact(node: Item | Collector, call: CallInfo[Any], report: TestReport | CollectReport) -> None:
342+
LOGGER.error(report.longreprtext)
343+
if node.config.getoption("--collect-must-gather"):
344+
test_name = f"{node.fspath}::{node.name}"
345+
LOGGER.info(f"Must-gather collection is enabled for {test_name}.")
346+
347+
try:
348+
db = node.config.option.must_gather_db
349+
test_start_time = db.get_test_start_time(test_name=test_name)
350+
except Exception as db_exception:
351+
test_start_time = 0
352+
LOGGER.warning(f"Error: {db_exception} in accessing database.")
353+
354+
try:
355+
collect_rhoai_must_gather(
356+
since=calculate_must_gather_timer(test_start_time=test_start_time),
357+
target_dir=os.path.join(get_must_gather_collector_dir(), "pytest_exception_interact"),
358+
)
359+
360+
except Exception as current_exception:
361+
LOGGER.warning(f"Failed to collect logs: {test_name}: {current_exception} {traceback.format_exc()}")

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ dependencies = [
6666
"openshift-python-wrapper>=11.0.26",
6767
"semver>=3.0.4",
6868
"pytest-html>=4.1.1",
69+
"sqlalchemy>=2.0.43",
6970
]
7071

7172
[project.urls]

tests/model_registry/test_model_registry_creation.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,3 +44,7 @@ def test_registering_model(
4444
get_and_validate_registered_model(
4545
model_registry_client=model_registry_client, model_name=MODEL_NAME, registered_model=model
4646
)
47+
48+
@pytest.mark.smoke
49+
def test_registering_modelfail(self):
50+
pytest.fail("test failure")

utilities/constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,3 +155,4 @@ class Timeout:
155155
MODELMESH_SERVING: str = "modelmesh-serving"
156156
ISTIO_CA_BUNDLE_FILENAME: str = "istio_knative.crt"
157157
OPENSHIFT_CA_BUNDLE_FILENAME: str = "openshift_ca.crt"
158+
RHOAI_OPERATOR_NAMESPACE = "redhat-ods-operator"

utilities/database.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
import logging
2+
import os
3+
4+
from sqlalchemy import Integer, String, create_engine
5+
from sqlalchemy.orm import Mapped, Session, mapped_column
6+
from sqlalchemy.orm import DeclarativeBase
7+
from utilities.must_gather_collector import get_base_dir
8+
9+
LOGGER = logging.getLogger(__name__)
10+
11+
TEST_DB = "opendatahub-tests.db"
12+
13+
14+
class Base(DeclarativeBase):
15+
pass
16+
17+
18+
class OpenDataHubTestTable(Base):
19+
__tablename__ = "OpenDataHubTestTable"
20+
21+
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True, nullable=False)
22+
test_name: Mapped[str] = mapped_column(String(500))
23+
start_time: Mapped[int] = mapped_column(Integer, nullable=False)
24+
25+
26+
class Database:
27+
def __init__(self, database_file_name: str = TEST_DB, verbose: bool = True) -> None:
28+
self.database_file_path = os.path.join(get_base_dir(), database_file_name)
29+
self.connection_string = f"sqlite:///{self.database_file_path}"
30+
self.verbose = verbose
31+
self.engine = create_engine(url=self.connection_string, echo=self.verbose)
32+
Base.metadata.create_all(bind=self.engine)
33+
34+
def insert_test_start_time(self, test_name: str, start_time: int) -> None:
35+
with Session(bind=self.engine) as db_session:
36+
new_table_entry = OpenDataHubTestTable(test_name=test_name, start_time=start_time)
37+
db_session.add(new_table_entry)
38+
db_session.commit()
39+
40+
def get_test_start_time(self, test_name: str) -> int:
41+
with Session(bind=self.engine) as db_session:
42+
result_row = (
43+
db_session.query(OpenDataHubTestTable)
44+
.with_entities(OpenDataHubTestTable.start_time)
45+
.filter_by(test_name=test_name)
46+
.first()
47+
)
48+
if result_row:
49+
start_time_value = result_row[0]
50+
else:
51+
start_time_value = 0
52+
LOGGER.warning(f"No test found with name: {test_name}")
53+
return start_time_value

utilities/exceptions.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,3 +68,9 @@ def __init__(self, type: str):
6868

6969
def __str__(self) -> str:
7070
return f"The {self.type} is not supported"
71+
72+
73+
class InvalidArgumentsError(Exception):
74+
"""Raised when mutually exclusive or invalid argument combinations are passed."""
75+
76+
pass

utilities/infra.py

Lines changed: 89 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,16 @@
11
from __future__ import annotations
2-
2+
import base64
33
import json
4+
import os
5+
46
import re
57
import shlex
8+
import tempfile
9+
610
from contextlib import contextmanager
711
from functools import cache
812
from typing import Any, Generator, Optional, Set
13+
from json import JSONDecodeError
914

1015
import kubernetes
1116
from kubernetes.dynamic import DynamicClient
@@ -21,7 +26,7 @@
2126
from ocp_resources.pod import Pod
2227
from ocp_resources.project_project_openshift_io import Project
2328
from ocp_resources.project_request import ProjectRequest
24-
from ocp_resources.resource import ResourceEditor
29+
from ocp_resources.resource import ResourceEditor, get_client
2530
from ocp_resources.role import Role
2631
from ocp_resources.route import Route
2732
from ocp_resources.secret import Secret
@@ -33,7 +38,8 @@
3338
from semver import Version
3439
from simple_logger.logger import get_logger
3540

36-
from utilities.constants import Timeout
41+
from ocp_resources.subscription import Subscription
42+
from utilities.constants import Timeout, RHOAI_OPERATOR_NAMESPACE
3743
from utilities.exceptions import FailedPodsError
3844
from timeout_sampler import TimeoutExpiredError, TimeoutSampler
3945
from utilities.general import create_isvc_label_selector_str, get_s3_secret_dict
@@ -604,3 +610,83 @@ def get_product_version(admin_client: DynamicClient) -> Version:
604610
raise MissingResourceError("Operator ClusterServiceVersion not found")
605611

606612
return Version.parse(operator_version)
613+
614+
615+
def get_rhods_subscription() -> Subscription | None:
616+
subscriptions = Subscription.get(dyn_client=get_client(), namespace=RHOAI_OPERATOR_NAMESPACE)
617+
if subscriptions:
618+
for subscription in subscriptions:
619+
LOGGER.info(f"Checking subscription {subscription.name}")
620+
if subscription.name.startswith(tuple(["rhods-operator", "rhoai-operator"])):
621+
return subscription
622+
623+
LOGGER.warning("No RHOAI subscription found. Potentially ODH cluster")
624+
return None
625+
626+
627+
def get_rhods_operator_installed_csv() -> ClusterServiceVersion | None:
628+
subscription = get_rhods_subscription()
629+
if subscription:
630+
csv_name = subscription.instance.status.installedCSV
631+
LOGGER.info(f"Expected CSV: {csv_name}")
632+
return ClusterServiceVersion(name=csv_name, namespace=RHOAI_OPERATOR_NAMESPACE, ensure_exists=True)
633+
return None
634+
635+
636+
def get_rhods_csv_version() -> Version | None:
637+
rhoai_csv = get_rhods_operator_installed_csv()
638+
if rhoai_csv:
639+
LOGGER.info(f"RHOAI CSV version: {rhoai_csv.instance.spec.version}")
640+
return Version.parse(version=rhoai_csv.instance.spec.version)
641+
LOGGER.warning("No RHOAI CSV found. Potentially ODH cluster")
642+
return None
643+
644+
645+
def get_openshift_pull_secret(client: DynamicClient = None) -> Secret:
646+
openshift_config_namespace = "openshift-config"
647+
pull_secret_name = "pull-secret" # pragma: allowlist secret
648+
secret = Secret(
649+
client=client or get_client(),
650+
name=pull_secret_name,
651+
namespace=openshift_config_namespace,
652+
)
653+
assert secret.exists, f"Pull-secret {pull_secret_name} not found in namespace {openshift_config_namespace}"
654+
return secret
655+
656+
657+
def generate_openshift_pull_secret_file(client: DynamicClient = None) -> str:
658+
pull_secret = get_openshift_pull_secret(client=client)
659+
pull_secret_path = tempfile.mkdtemp(suffix="odh-pull-secret")
660+
json_file = os.path.join(pull_secret_path, "pull-secrets.json")
661+
secret = base64.b64decode(pull_secret.instance.data[".dockerconfigjson"]).decode(encoding="utf-8")
662+
with open(file=json_file, mode="w") as outfile:
663+
outfile.write(secret)
664+
return json_file
665+
666+
667+
def get_oc_image_info(
668+
image: str,
669+
architecture: str,
670+
pull_secret: str | None = None,
671+
) -> Any:
672+
def _get_image_json(cmd: str) -> Any:
673+
return json.loads(run_command(command=shlex.split(cmd), check=False)[1])
674+
675+
base_command = f"oc image -o json info {image} --filter-by-os {architecture}"
676+
if pull_secret:
677+
base_command = f"{base_command} --registry-config={pull_secret}"
678+
679+
sample = None
680+
try:
681+
for sample in TimeoutSampler(
682+
wait_timeout=10,
683+
sleep=5,
684+
exceptions_dict={JSONDecodeError: [], TypeError: []},
685+
func=_get_image_json,
686+
cmd=base_command,
687+
):
688+
if sample:
689+
return sample
690+
except TimeoutExpiredError:
691+
LOGGER.error(f"Failed to parse {base_command}")
692+
raise

0 commit comments

Comments
 (0)