Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -449,6 +449,7 @@ def pytest_exception_interact(node: Item | Collector, call: CallInfo[Any], repor

try:
collect_rhoai_must_gather(
base_file_name=f"mg-{test_start_time}",
since=calculate_must_gather_timer(test_start_time=test_start_time),
target_dir=os.path.join(get_must_gather_collector_dir(), "pytest_exception_interact"),
)
Expand Down
15 changes: 15 additions & 0 deletions docs/GETTING_STARTED.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,21 @@ OC_BINARY_PATH=/usr/local/bin/oc uv run pytest

**Note:** Ensure your local `oc` binary is executable and compatible with your target cluster version.

## Must gather

In order to collect must-gather on failure point one may use `--collect-must-gather` to the pytest command. e.g.

```bash
uv run pytest tests/<your component> --collect-must-gather
```

By default, the collected must-gather would be archived. To skip archiving, please set environment variable
ARCHIVE_MUST_GATHER to any value other than "true". e.g.

```bash
export ARCHIVE_MUST_GATHER="false"
```

### Benefits of Using Local Binary

- Faster test startup (no download time)
Expand Down
62 changes: 0 additions & 62 deletions utilities/infra.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,13 @@
import base64
import json
import os
import re
import shlex
import stat
import tarfile
import tempfile
import zipfile
from contextlib import contextmanager
from functools import cache
from typing import Any, Generator, Optional, Set, Callable
from json import JSONDecodeError

import kubernetes
import platform
Expand Down Expand Up @@ -1025,15 +1022,6 @@ def get_rhods_operator_installed_csv() -> ClusterServiceVersion | None:
return None


def get_rhods_csv_version() -> Version | None:
rhoai_csv = get_rhods_operator_installed_csv()
if rhoai_csv:
LOGGER.info(f"RHOAI CSV version: {rhoai_csv.instance.spec.version}")
return Version.parse(version=rhoai_csv.instance.spec.version)
LOGGER.warning("No RHOAI CSV found. Potentially ODH cluster")
return None


@retry(
wait_timeout=120,
sleep=5,
Expand Down Expand Up @@ -1115,56 +1103,6 @@ def verify_cluster_sanity(
pytest.exit(reason=error_msg, returncode=return_code)


def get_openshift_pull_secret(client: DynamicClient = None) -> Secret:
openshift_config_namespace = "openshift-config"
pull_secret_name = "pull-secret" # pragma: allowlist secret
secret = Secret(
client=client or get_client(),
name=pull_secret_name,
namespace=openshift_config_namespace,
)
assert secret.exists, f"Pull-secret {pull_secret_name} not found in namespace {openshift_config_namespace}"
return secret


def generate_openshift_pull_secret_file(client: DynamicClient = None) -> str:
pull_secret = get_openshift_pull_secret(client=client)
pull_secret_path = tempfile.mkdtemp(suffix="odh-pull-secret")
json_file = os.path.join(pull_secret_path, "pull-secrets.json")
secret = base64.b64decode(pull_secret.instance.data[".dockerconfigjson"]).decode(encoding="utf-8")
with open(file=json_file, mode="w") as outfile:
outfile.write(secret)
return json_file


def get_oc_image_info(
image: str,
architecture: str,
pull_secret: str | None = None,
) -> Any:
def _get_image_json(cmd: str) -> Any:
return json.loads(run_command(command=shlex.split(cmd), check=False)[1])

base_command = f"oc image -o json info {image} --filter-by-os {architecture}"
if pull_secret:
base_command = f"{base_command} --registry-config={pull_secret}"

sample = None
try:
for sample in TimeoutSampler(
wait_timeout=10,
sleep=5,
exceptions_dict={JSONDecodeError: [], TypeError: []},
func=_get_image_json,
cmd=base_command,
):
if sample:
return sample
except TimeoutExpiredError:
LOGGER.error(f"Failed to parse {base_command}")
raise


def get_machine_platform() -> str:
os_machine_type = platform.machine()
return "amd64" if os_machine_type == "x86_64" else os_machine_type
Expand Down
61 changes: 35 additions & 26 deletions utilities/must_gather_collector.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
import os
import shlex
import shutil

from pytest_testconfig import config as py_config
from pytest import Item
from pyhelper_utils.shell import run_command
from simple_logger.logger import get_logger
from utilities.exceptions import InvalidArgumentsError
from utilities.infra import get_rhods_csv_version, get_oc_image_info, generate_openshift_pull_secret_file
from utilities.infra import get_rhods_operator_installed_csv

BASE_DIRECTORY_NAME = "must-gather-collected"
BASE_RESULTS_DIR = "/home/odh/opendatahub-tests/"
Expand Down Expand Up @@ -132,48 +133,56 @@ def run_must_gather(
return run_command(command=shlex.split(must_gather_command), check=False)[1]


def get_must_gather_image_info(architecture: str = "linux/amd64") -> str:
try:
csv_version = get_rhods_csv_version()
if csv_version:
must_gather_image_manifest = f"quay.io/modh/must-gather:rhoai-{csv_version.major}.{csv_version.minor}"
pull_secret = generate_openshift_pull_secret_file()
image_info = get_oc_image_info(
image=must_gather_image_manifest, architecture=architecture, pull_secret=pull_secret
)
return f"quay.io/modh/must-gather@{image_info['digest']}"
else:
LOGGER.warning(
"No RHAOI CSV found. Potentially ODH cluster and must-gather collection is not "
"relevant for this cluster"
)
return ""
except Exception as exec:
raise RuntimeError(f"Failed to retrieve must-gather image info: {str(exec)}") from exec
def get_must_gather_image_info() -> str:
csv_object = get_rhods_operator_installed_csv()
if not csv_object:
return ""
must_gather_image = [
image["image"] for image in csv_object.instance.spec.relatedImages if "odh-must-gather" in image["image"]
]
if not must_gather_image:
LOGGER.warning(
"No RHAOI CSV found. Potentially ODH cluster and must-gather collection is not relevant for this cluster"
)
return ""
return must_gather_image[0]


def collect_rhoai_must_gather(
target_dir: str, since: int, save_collection_output: bool = True, architecture: str = "linux/amd64"
) -> str:
base_file_name: str,
target_dir: str,
since: int,
save_collection_output: bool = True,
) -> None:
"""
Collect must-gather data for RHOAI cluster.

Args:
base_file_name: (str): Base file name for must-gather compressed file
target_dir (str): Directory to store the must-gather output
since (int): Time in seconds to collect logs from
save_collection_output (bool, optional): Whether to save must-gather command output. Defaults to True.
architecture (str, optional): Target architecture for must-gather image. Defaults to "linux/amd64".

Returns:
str: Path to the must-gather output directory, or empty string if collection is skipped
"""
must_gather_image = get_must_gather_image_info(architecture=architecture)
must_gather_image = get_must_gather_image_info()
if must_gather_image:
output = run_must_gather(image_url=must_gather_image, target_dir=target_dir, since=f"{since}s")
if save_collection_output:
with open(os.path.join(target_dir, "output.log"), "w") as _file:
_file.write(output)
return get_must_gather_output_dir(must_gather_path=target_dir)
# get must gather directory to archive
path = get_must_gather_output_dir(must_gather_path=target_dir)
if os.getenv("ARCHIVE_MUST_GATHER", "true") == "true":
# archive the folder and get the zip file's name
file_name = shutil.make_archive(base_name=base_file_name, format="zip", base_dir=path)
# remove the folder that was archived
shutil.rmtree(path=path, ignore_errors=True)
# copy back the archived file to the same path
dest_file = os.path.join(target_dir, file_name)
shutil.copy(src=file_name, dst=dest_file)
LOGGER.info(f"{dest_file} is collected successfully")
os.unlink(file_name)
else:
LOGGER.warning("Must-gather collection would be skipped.")
return ""
LOGGER.error("No must-gather image is found from the csv. Must-gather collection would be skipped.")