-
Notifications
You must be signed in to change notification settings - Fork 63
llmd health check #1284
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
llmd health check #1284
Changes from all commits
Commits
Show all changes
9 commits
Select commit
Hold shift + click to select a range
9f55c8d
Add KServe component health check as autouse session fixture
mwaykole 101b8ad
Add LLMD infrastructure health check as autouse session fixture
mwaykole 70d890a
Add health check for test cases llmd
mwaykole fbe9156
Add test steps in test
mwaykole 1e6df0c
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] ddced41
Merge branch 'opendatahub-io:main' into llmd-healthcheck
mwaykole c0ac031
address comment
mwaykole a0d49dd
Merge branch 'main' into llmd-healthcheck
mwaykole 79414d5
Merge branch 'main' into llmd-healthcheck
mwaykole File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,84 @@ | ||
| import pytest | ||
| from kubernetes.dynamic import DynamicClient | ||
| from ocp_resources.data_science_cluster import DataScienceCluster | ||
| from ocp_resources.deployment import Deployment | ||
| from pytest_testconfig import config as py_config | ||
| from simple_logger.logger import get_logger | ||
|
|
||
| from utilities.constants import DscComponents | ||
|
|
||
| LOGGER = get_logger(name=__name__) | ||
|
|
||
| KSERVE_CONTROLLER_DEPLOYMENTS: list[str] = [ | ||
| "kserve-controller-manager", | ||
| "odh-model-controller", | ||
| ] | ||
|
|
||
|
|
||
| def verify_kserve_health(admin_client: DynamicClient, dsc_resource: DataScienceCluster) -> None: | ||
| """Verify that KServe components are healthy and ready to serve models. | ||
|
|
||
| Checks management state, DSC ready condition, and controller deployment availability. | ||
| Raises pytest.skip on any failure so downstream kserve tests are skipped. | ||
| """ | ||
| applications_namespace = py_config["applications_namespace"] | ||
|
|
||
| kserve_management_state = dsc_resource.instance.spec.components[DscComponents.KSERVE].managementState | ||
| if kserve_management_state != DscComponents.ManagementState.MANAGED: | ||
| pytest.skip(f"KServe managementState is {kserve_management_state}, expected Managed") | ||
|
|
||
| kserve_ready = False | ||
| for condition in dsc_resource.instance.status.conditions: | ||
| if condition.type == DscComponents.COMPONENT_MAPPING[DscComponents.KSERVE]: | ||
| if condition.status != "True": | ||
| pytest.skip(f"KServe DSC condition is not ready: {condition.status}, reason: {condition.get('reason')}") | ||
| kserve_ready = True | ||
| break | ||
|
|
||
| if not kserve_ready: | ||
| pytest.skip("KserveReady condition not found in DSC status") | ||
|
|
||
| for name in KSERVE_CONTROLLER_DEPLOYMENTS: | ||
| deployment = Deployment( | ||
| client=admin_client, | ||
| name=name, | ||
| namespace=applications_namespace, | ||
| ) | ||
| if not deployment.exists: | ||
| pytest.skip(f"KServe deployment {name} not found in {applications_namespace}") | ||
|
|
||
| available = False | ||
| for condition in deployment.instance.status.get("conditions", []): | ||
| if condition.type == "Available": | ||
| if condition.status != "True": | ||
| pytest.skip(f"KServe deployment {name} is not Available: {condition.get('reason')}") | ||
| available = True | ||
| break | ||
|
|
||
| if not available: | ||
| pytest.skip(f"KServe deployment {name} has no Available condition") | ||
|
|
||
| LOGGER.info("KServe component health check passed") | ||
|
|
||
|
|
||
| @pytest.fixture(scope="session", autouse=True) | ||
| def kserve_health_check( | ||
| request: pytest.FixtureRequest, | ||
| admin_client: DynamicClient, | ||
| dsc_resource: DataScienceCluster, | ||
| ) -> None: | ||
| """Session-scoped health gate for all kserve tests. | ||
|
|
||
| Skips all tests under tests/model_serving/model_server/kserve/ when | ||
| KServe components are not healthy. | ||
| """ | ||
| if request.session.config.getoption("--skip-kserve-health-check"): | ||
| LOGGER.warning("Skipping KServe health check, got --skip-kserve-health-check") | ||
| return | ||
|
|
||
| selected_markers = {mark.name for item in request.session.items for mark in item.iter_markers()} | ||
| if "component_health" in selected_markers: | ||
| LOGGER.info("Skipping KServe health gate because selected tests include component_health marker") | ||
| return | ||
|
|
||
| verify_kserve_health(admin_client=admin_client, dsc_resource=dsc_resource) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,22 @@ | ||
| """Kuadrant custom resource for Kuadrant API management.""" | ||
|
|
||
| from typing import Any | ||
|
|
||
| from ocp_resources.resource import NamespacedResource | ||
|
|
||
| from utilities.constants import ApiGroups | ||
|
|
||
|
|
||
| class Kuadrant(NamespacedResource): | ||
| """Kuadrant is the Schema for the kuadrants API.""" | ||
|
|
||
| api_group: str = ApiGroups.KUADRANT_IO | ||
|
|
||
| def __init__(self, **kwargs: Any) -> None: | ||
| super().__init__(**kwargs) | ||
|
|
||
| def to_dict(self) -> None: | ||
| super().to_dict() | ||
|
|
||
| if not self.kind_dict and not self.yaml_file: | ||
| self.res["spec"] = {} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,20 @@ | ||
| """LeaderWorkerSetOperator custom resource for OpenShift LWS operator.""" | ||
|
|
||
| from typing import Any | ||
|
|
||
| from ocp_resources.resource import Resource | ||
|
|
||
|
|
||
| class LeaderWorkerSetOperator(Resource): | ||
| """LeaderWorkerSetOperator is the Schema for the leaderworkersetoperators API.""" | ||
|
|
||
| api_group: str = "operator.openshift.io" | ||
|
|
||
| def __init__(self, **kwargs: Any) -> None: | ||
| super().__init__(**kwargs) | ||
|
|
||
| def to_dict(self) -> None: | ||
| super().to_dict() | ||
|
|
||
| if not self.kind_dict and not self.yaml_file: | ||
| self.res["spec"] = {} |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.