Skip to content

Commit 2708cf6

Browse files
authored
Merge branch 'main' into mm-ext
2 parents 79c6989 + 924b68b commit 2708cf6

File tree

4 files changed

+202
-29
lines changed

4 files changed

+202
-29
lines changed

tests/model_explainability/trustyai_service/conftest.py

Lines changed: 62 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -20,21 +20,26 @@
2020
from ocp_utilities.operators import install_operator, uninstall_operator
2121

2222
from tests.model_explainability.trustyai_service.trustyai_service_utils import (
23-
TRUSTYAI_SERVICE_NAME,
2423
wait_for_isvc_deployment_registered_by_trustyai_service,
2524
)
2625
from tests.model_explainability.trustyai_service.utils import (
2726
get_cluster_service_version,
2827
wait_for_mariadb_operator_deployments,
28+
create_trustyai_service,
2929
wait_for_mariadb_pods,
30+
TRUSTYAI_SERVICE_NAME,
3031
)
3132

3233
from utilities.constants import Timeout, KServeDeploymentType, ApiGroups, Labels, Ports
3334
from utilities.inference_utils import create_isvc
3435
from utilities.infra import update_configmap_data
3536

37+
3638
OPENSHIFT_OPERATORS: str = "openshift-operators"
3739

40+
TAI_DATA_CONFIG = {"filename": "data.csv", "format": "CSV"}
41+
TAI_METRICS_CONFIG = {"schedule": "5s"}
42+
TAI_DB_STORAGE_CONFIG = {"format": "DATABASE", "size": "1Gi", "databaseConfigurations": "db-credentials"}
3843
MARIADB: str = "mariadb"
3944
DB_CREDENTIALS_SECRET_NAME: str = "db-credentials"
4045
DB_NAME: str = "trustyai_db"
@@ -47,6 +52,14 @@
4752
LIGHTGBM: str = "lightgbm"
4853
MLFLOW: str = "mlflow"
4954
TIMEOUT_20MIN: int = 20 * Timeout.TIMEOUT_1MIN
55+
INVALID_TLS_CERTIFICATE: str = "LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUJnRENDQVNlZ0F3SUJBZ0lRRGtTcXVuUWRzRmZwdi8zSm\
56+
5TS2ZoVEFLQmdncWhrak9QUVFEQWpBVk1STXcKRVFZRFZRUURFd3B0WVhKcFlXUmlMV05oTUI0WERUSTFNRFF4TkRFME1EUXhOMW9YRFRJNE1EUXhNekUx\
57+
TURReApOMW93RlRFVE1CRUdBMVVFQXhNS2JXRnlhV0ZrWWkxallUQlpNQk1HQnlxR1NNNDlBZ0VHQ0NxR1NNNDlBd0VICkEwSUFCQ2IxQ1IwUjV1akZ1QUR\
58+
Gd1NsazQzUUpmdDFmTFVnOWNJNyttZ0w3bVd3MmVLUXowL04ybm9KMGpJaDYKN0NnQ2syUW1jNTdWM1podkFWQzJoU2NEbWg2aldUQlhNQTRHQTFVZER3RU\
59+
Ivd1FFQXdJQ0JEQVBCZ05WSFJNQgpBZjhFQlRBREFRSC9NQjBHQTFVZERnUVdCQlNUa2tzSU9pL1pTbCtQRlJua2NQRlJ0QTRrMERBVkJnTlZIUkVFCkRqQ\
60+
U1nZ3B0WVhKcFlXUmlMV05oTUFvR0NDcUdTTTQ5QkFNQ0EwY0FNRVFDSUI1Q2F6VW1WWUZQYTFkS2txUGkKbitKSEQvNVZTTGd4aHVPclgzUGcxQnlzQWlB\
61+
RmcvTXlNWW9CZUNrUVRWdS9rUkIwK2N2Qy9RMDB4NExvVGpJaQpGdCtKMGc9PQotLS0tLUVORCBDRVJUSUZJQ0FURS0t\
62+
LS0t" # pragma: allowlist secret
5063

5164

5265
@pytest.fixture(scope="class")
@@ -66,18 +79,14 @@ def trustyai_service_with_pvc_storage(
6679
trustyai_service.clean_up()
6780

6881
else:
69-
with TrustyAIService(
82+
yield from create_trustyai_service(
7083
**trustyai_service_kwargs,
7184
storage={"format": "PVC", "folder": "/inputs", "size": "1Gi"},
72-
data={"filename": "data.csv", "format": "CSV"},
73-
metrics={"schedule": "5s"},
85+
metrics=TAI_METRICS_CONFIG,
86+
data=TAI_DATA_CONFIG,
87+
wait_for_replicas=True,
7488
teardown=teardown_resources,
75-
) as trustyai_service:
76-
trustyai_deployment = Deployment(
77-
namespace=model_namespace.name, name=TRUSTYAI_SERVICE_NAME, wait_for_resource=True
78-
)
79-
trustyai_deployment.wait_for_replicas()
80-
yield trustyai_service
89+
)
8190

8291

8392
@pytest.fixture(scope="class")
@@ -89,18 +98,35 @@ def trustyai_service_with_db_storage(
8998
mariadb: MariaDB,
9099
trustyai_db_ca_secret: None,
91100
) -> Generator[TrustyAIService, Any, Any]:
92-
with TrustyAIService(
101+
yield from create_trustyai_service(
93102
client=admin_client,
94-
name=TRUSTYAI_SERVICE_NAME,
95103
namespace=model_namespace.name,
96-
storage={"format": "DATABASE", "size": "1Gi", "databaseConfigurations": "db-credentials"},
97-
metrics={"schedule": "5s"},
98-
) as trustyai_service:
99-
trustyai_deployment = Deployment(
100-
namespace=model_namespace.name, name=TRUSTYAI_SERVICE_NAME, wait_for_resource=True
101-
)
102-
trustyai_deployment.wait_for_replicas()
103-
yield trustyai_service
104+
storage=TAI_DB_STORAGE_CONFIG,
105+
metrics=TAI_METRICS_CONFIG,
106+
wait_for_replicas=True,
107+
)
108+
109+
110+
@pytest.fixture(scope="class")
111+
def trustyai_service_with_invalid_db_cert(
112+
admin_client: DynamicClient,
113+
model_namespace: Namespace,
114+
cluster_monitoring_config: ConfigMap,
115+
user_workload_monitoring_config: ConfigMap,
116+
mariadb: MariaDB,
117+
trustyai_invalid_db_ca_secret: None,
118+
) -> Generator[TrustyAIService, None, None]:
119+
"""Create a TrustyAIService deployment with an invalid database certificate set as secret.
120+
121+
Yields: A secret with invalid database certificate set.
122+
"""
123+
yield from create_trustyai_service(
124+
client=admin_client,
125+
namespace=model_namespace.name,
126+
storage=TAI_DB_STORAGE_CONFIG,
127+
metrics=TAI_METRICS_CONFIG,
128+
wait_for_replicas=False,
129+
)
104130

105131

106132
@pytest.fixture(scope="session")
@@ -229,7 +255,7 @@ def mariadb(
229255
@pytest.fixture(scope="class")
230256
def trustyai_db_ca_secret(
231257
admin_client: DynamicClient, model_namespace: Namespace, mariadb: MariaDB
232-
) -> Generator[None, Any, None]:
258+
) -> Generator[Secret, Any, None]:
233259
mariadb_ca_secret = Secret(
234260
client=admin_client, name=f"{mariadb.name}-ca", namespace=model_namespace.name, ensure_exists=True
235261
)
@@ -238,8 +264,21 @@ def trustyai_db_ca_secret(
238264
name=f"{TRUSTYAI_SERVICE_NAME}-db-ca",
239265
namespace=model_namespace.name,
240266
data_dict={"ca.crt": mariadb_ca_secret.instance.data["ca.crt"]},
241-
):
242-
yield
267+
) as secret:
268+
yield secret
269+
270+
271+
@pytest.fixture(scope="class")
272+
def trustyai_invalid_db_ca_secret(
273+
admin_client: DynamicClient, model_namespace: Namespace, mariadb: MariaDB
274+
) -> Generator[Secret, Any, None]:
275+
with Secret(
276+
client=admin_client,
277+
name=f"{TRUSTYAI_SERVICE_NAME}-db-ca",
278+
namespace=model_namespace.name,
279+
data_dict={"ca.crt": INVALID_TLS_CERTIFICATE},
280+
) as secret:
281+
yield secret
243282

244283

245284
@pytest.fixture(scope="class")
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
import pytest
2+
from ocp_resources.namespace import Namespace
3+
4+
from tests.model_explainability.trustyai_service.utils import validate_trustyai_service_db_conn_failure
5+
6+
7+
@pytest.mark.parametrize(
8+
"model_namespace",
9+
[
10+
pytest.param(
11+
{"name": "test-trustyai-service-invalid-db-cert"},
12+
)
13+
],
14+
indirect=True,
15+
)
16+
def test_trustyai_service_with_invalid_db_cert(
17+
admin_client,
18+
current_client_token,
19+
model_namespace: Namespace,
20+
trustyai_service_with_invalid_db_cert,
21+
):
22+
"""Test to make sure TrustyAIService pod fails when incorrect database TLS certificate is used."""
23+
validate_trustyai_service_db_conn_failure(
24+
client=admin_client,
25+
namespace=model_namespace,
26+
label_selector=f"app.kubernetes.io/instance={trustyai_service_with_invalid_db_cert.name}",
27+
)

tests/model_explainability/trustyai_service/utils.py

Lines changed: 105 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,22 @@
1+
from typing import Generator, Any, Optional
2+
import re
3+
14
from kubernetes.dynamic import DynamicClient
25
from kubernetes.dynamic.exceptions import ResourceNotFoundError, ResourceNotUniqueError
36
from ocp_resources.cluster_service_version import ClusterServiceVersion
47
from ocp_resources.deployment import Deployment
5-
from ocp_resources.maria_db import MariaDB
68
from ocp_resources.mariadb_operator import MariadbOperator
9+
from ocp_resources.maria_db import MariaDB
10+
from ocp_resources.namespace import Namespace
711
from ocp_resources.pod import Pod
12+
from ocp_resources.trustyai_service import TrustyAIService
813
from simple_logger.logger import get_logger
914
from timeout_sampler import TimeoutSampler
10-
15+
from tests.model_explainability.trustyai_service.trustyai_service_utils import TRUSTYAI_SERVICE_NAME
1116
from utilities.constants import Timeout
17+
from timeout_sampler import retry
18+
19+
from utilities.exceptions import TooManyPodsError, UnexpectedFailureError
1220

1321
LOGGER = get_logger(name=__name__)
1422

@@ -46,15 +54,15 @@ def wait_for_mariadb_operator_deployments(mariadb_operator: MariadbOperator) ->
4654

4755
def wait_for_mariadb_pods(client: DynamicClient, mariadb: MariaDB, timeout: int = Timeout.TIMEOUT_5MIN) -> None:
4856
def _get_mariadb_pods() -> list[Pod]:
49-
pods = [
50-
pod
51-
for pod in Pod.get(
57+
_pods = [
58+
_pod
59+
for _pod in Pod.get(
5260
dyn_client=client,
5361
namespace=mariadb.namespace,
5462
label_selector="app.kubernetes.io/instance=mariadb",
5563
)
5664
]
57-
return pods
65+
return _pods
5866

5967
sampler = TimeoutSampler(wait_timeout=timeout, sleep=1, func=lambda: bool(_get_mariadb_pods()))
6068

@@ -68,3 +76,94 @@ def _get_mariadb_pods() -> list[Pod]:
6876
condition=Pod.Condition.READY,
6977
status="True",
7078
)
79+
80+
81+
@retry(
82+
wait_timeout=Timeout.TIMEOUT_2MIN,
83+
sleep=5,
84+
exceptions_dict={TooManyPodsError: list(), UnexpectedFailureError: list()},
85+
)
86+
def validate_trustyai_service_db_conn_failure(
87+
client: DynamicClient, namespace: Namespace, label_selector: Optional[str]
88+
) -> bool:
89+
"""Validate if invalid DB Certificate leads to pod crash loop.
90+
91+
Waits for TrustyAIService pod to fail and checks if the pod is in a CrashLoopBackOff state and
92+
the LastState is in terminated state and the cause was a MariaDB TLS certificate exception.
93+
Also checks if there are more than one pod for the service.
94+
95+
Args:
96+
client: The OpenShift client.
97+
namespace: Namespace under which the pod is created.
98+
label_selector: The label selector used to select the correct pod(s) to monitor.
99+
100+
Returns:
101+
bool: True if pod failure is of expected state else False.
102+
103+
Raises:
104+
TimeoutExpiredError: if the method takes longer than `wait_timeout` to return a value.
105+
TooManyPodsError: if the number of pods exceeds 1.
106+
UnexpectedFailureError: if the pod failure is different from the expected failure mode.
107+
108+
"""
109+
pods = list(Pod.get(dyn_client=client, namespace=namespace.name, label_selector=label_selector))
110+
mariadb_conn_failure_regex = (
111+
r"^.+ERROR.+Could not connect to mariadb:.+ PKIX path validation failed: "
112+
r"java\.security\.cert\.CertPathValidatorException: signature check failed"
113+
)
114+
if pods:
115+
if len(pods) > 1:
116+
raise TooManyPodsError("More than one pod found in TrustyAIService.")
117+
for container_status in pods[0].instance.status.containerStatuses:
118+
if (terminate_state := container_status.lastState.terminated) and terminate_state.reason in (
119+
pods[0].Status.ERROR,
120+
pods[0].Status.CRASH_LOOPBACK_OFF,
121+
):
122+
if not re.search(mariadb_conn_failure_regex, terminate_state.message):
123+
raise UnexpectedFailureError(
124+
f"Service {TRUSTYAI_SERVICE_NAME} did not fail with a mariadb connection failure as expected.\
125+
\nExpected format: {mariadb_conn_failure_regex}\
126+
\nGot: {terminate_state.message}"
127+
)
128+
return True
129+
return False
130+
131+
132+
def create_trustyai_service(
133+
client: DynamicClient,
134+
namespace: str,
135+
storage: dict[str, str],
136+
metrics: dict[str, str],
137+
name: str = TRUSTYAI_SERVICE_NAME,
138+
data: Optional[dict[str, str]] = None,
139+
wait_for_replicas: bool = True,
140+
teardown: bool = True,
141+
) -> Generator[TrustyAIService, Any, Any]:
142+
"""Creates TrustyAIService and TrustyAI deployment.
143+
144+
Args:
145+
client: the client.
146+
namespace: Namespace to create the service in.
147+
storage: Dict with storage configuration.
148+
metrics: Dict with metrics configuration.
149+
name: Name of the TrustyAI service and deployment (default "trustyai-service").
150+
data: An optional dict with data.
151+
wait_for_replicas: Wait until replicas are available (default True).
152+
teardown: Teardown the service (default True).
153+
154+
Yields:
155+
Generator[TrustyAIService, Any, Any]: The TrustyAI service.
156+
"""
157+
with TrustyAIService(
158+
client=client,
159+
name=name,
160+
namespace=namespace,
161+
storage=storage,
162+
metrics=metrics,
163+
data=data,
164+
teardown=teardown,
165+
) as trustyai_service:
166+
trustyai_deployment = Deployment(namespace=namespace, name=name, wait_for_resource=True)
167+
if wait_for_replicas:
168+
trustyai_deployment.wait_for_replicas()
169+
yield trustyai_service

utilities/exceptions.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,3 +102,11 @@ class ResourceNotReadyError(Exception):
102102

103103
class PodContainersRestartError(Exception):
104104
pass
105+
106+
107+
class TooManyPodsError(Exception):
108+
pass
109+
110+
111+
class UnexpectedFailureError(Exception):
112+
pass

0 commit comments

Comments
 (0)