diff --git a/tests/model_serving/model_server/kserve/private_endpoint/conftest.py b/tests/model_serving/model_server/kserve/private_endpoint/conftest.py index 292873a80..1ce4c670e 100644 --- a/tests/model_serving/model_server/kserve/private_endpoint/conftest.py +++ b/tests/model_serving/model_server/kserve/private_endpoint/conftest.py @@ -7,11 +7,12 @@ from ocp_resources.namespace import Namespace from ocp_resources.pod import Pod from ocp_resources.secret import Secret +from ocp_resources.service_account import ServiceAccount from ocp_resources.serving_runtime import ServingRuntime from simple_logger.logger import get_logger -from tests.model_serving.model_server.kserve.private_endpoint.utils import create_sidecar_pod -from utilities.constants import KServeDeploymentType, ModelFormat, ModelStoragePath +from tests.model_serving.model_server.kserve.private_endpoint.utils import create_curl_pod +from utilities.constants import KServeDeploymentType, ModelStoragePath from utilities.inference_utils import create_isvc from utilities.infra import create_ns @@ -29,70 +30,43 @@ def endpoint_isvc( unprivileged_client: DynamicClient, serving_runtime_from_template: ServingRuntime, models_endpoint_s3_secret: Secret, + model_service_account: ServiceAccount, ) -> Generator[InferenceService, Any, Any]: with create_isvc( client=unprivileged_client, name="endpoint-isvc", namespace=serving_runtime_from_template.namespace, - deployment_mode=KServeDeploymentType.SERVERLESS, + deployment_mode=KServeDeploymentType.RAW_DEPLOYMENT, storage_key=models_endpoint_s3_secret.name, - storage_path=ModelStoragePath.FLAN_T5_SMALL_CAIKIT, - model_format=ModelFormat.CAIKIT, + storage_path=ModelStoragePath.OPENVINO_EXAMPLE_MODEL, + model_format=serving_runtime_from_template.instance.spec.supportedModelFormats[0].name, runtime=serving_runtime_from_template.name, - wait_for_predictor_pods=True, + model_service_account=model_service_account.name, + wait_for_predictor_pods=False, ) as isvc: yield isvc @pytest.fixture() -def endpoint_pod_with_istio_sidecar( +def same_namespace_pod( unprivileged_client: DynamicClient, unprivileged_model_namespace: Namespace ) -> Generator[Pod, Any, Any]: - with create_sidecar_pod( + with create_curl_pod( client=unprivileged_client, namespace=unprivileged_model_namespace.name, - use_istio=True, - pod_name="test-with-istio", + pod_name="curl-same-ns", ) as pod: yield pod @pytest.fixture() -def endpoint_pod_without_istio_sidecar( - unprivileged_client: DynamicClient, unprivileged_model_namespace: Namespace -) -> Generator[Pod, Any, Any]: - with create_sidecar_pod( - client=unprivileged_client, - namespace=unprivileged_model_namespace.name, - use_istio=False, - pod_name="test", - ) as pod: - yield pod - - -@pytest.fixture() -def diff_pod_with_istio_sidecar( - unprivileged_client: DynamicClient, - diff_namespace: Namespace, -) -> Generator[Pod, Any, Any]: - with create_sidecar_pod( - client=unprivileged_client, - namespace=diff_namespace.name, - use_istio=True, - pod_name="test-with-istio", - ) as pod: - yield pod - - -@pytest.fixture() -def diff_pod_without_istio_sidecar( +def diff_namespace_pod( unprivileged_client: DynamicClient, diff_namespace: Namespace, ) -> Generator[Pod, Any, Any]: - with create_sidecar_pod( + with create_curl_pod( client=unprivileged_client, namespace=diff_namespace.name, - use_istio=False, - pod_name="test", + pod_name="curl-diff-ns", ) as pod: yield pod diff --git a/tests/model_serving/model_server/kserve/private_endpoint/test_kserve_private_endpoint.py b/tests/model_serving/model_server/kserve/private_endpoint/test_kserve_private_endpoint.py index ce5aeef42..cb248b367 100644 --- a/tests/model_serving/model_server/kserve/private_endpoint/test_kserve_private_endpoint.py +++ b/tests/model_serving/model_server/kserve/private_endpoint/test_kserve_private_endpoint.py @@ -6,10 +6,13 @@ from simple_logger.logger import get_logger from tests.model_serving.model_server.kserve.private_endpoint.utils import curl_from_pod -from utilities.constants import CurlOutput, ModelEndpoint, Protocols, RuntimeTemplates +from utilities.constants import RuntimeTemplates LOGGER = get_logger(name=__name__) +OVMS_REST_PORT = 8888 +OVMS_HEALTH_ENDPOINT = "v2/health/ready" +HTTP_OK = "200" pytestmark = [pytest.mark.usefixtures("valid_aws_config")] @@ -20,8 +23,8 @@ pytest.param( {"name": "endpoint"}, { - "name": "flan-example-runtime", - "template-name": RuntimeTemplates.CAIKIT_TGIS_SERVING, + "name": "ovms-endpoint-runtime", + "template-name": RuntimeTemplates.OVMS_KSERVE, "multi-model": False, }, ) @@ -29,86 +32,58 @@ indirect=True, ) class TestKserveInternalEndpoint: - """Tests the internal endpoint of a kserve predictor""" + """ + Tests the internal endpoint of a KServe RawDeployment predictor using OVMS with S3 storage. + + Steps: + 1. Deploy OVMS ServingRuntime and InferenceService with S3 storage in RawDeployment mode. + 2. Verify the model state reaches "Loaded". + 3. Verify the internal endpoint URL is set correctly. + 4. Curl v2/health/ready from a pod in the same namespace — expect HTTP 200. + 5. Curl v2/health/ready from a pod in a different namespace — expect HTTP 200. + """ def test_deploy_model_state_loaded(self: Self, endpoint_isvc: InferenceService) -> None: - """Verifies that the predictor gets to state Loaded""" + """Verifies that the predictor gets to state Loaded.""" assert endpoint_isvc.instance.status.modelStatus.states.activeModelState == "Loaded" def test_deploy_model_url(self: Self, endpoint_isvc: InferenceService) -> None: - """Verifies that the internal endpoint has the expected formatting""" - assert ( - endpoint_isvc.instance.status.address.url - == f"https://{endpoint_isvc.name}.{endpoint_isvc.namespace}.svc.cluster.local" - ) - - def test_curl_with_istio_same_ns( - self: Self, - endpoint_isvc: InferenceService, - endpoint_pod_with_istio_sidecar: Pod, - ) -> None: - """ - Verifies the response from the health endpoint, - sending a request from a pod in the same ns and part of the Istio Service Mesh - """ - - curl_stdout = curl_from_pod( - isvc=endpoint_isvc, - pod=endpoint_pod_with_istio_sidecar, - endpoint=ModelEndpoint.HEALTH, - ) - assert curl_stdout == CurlOutput.HEALTH_OK + """Verifies that the internal endpoint URL is set.""" + url = endpoint_isvc.instance.status.address.url + assert url is not None + assert endpoint_isvc.name in url + assert endpoint_isvc.namespace in url - def test_curl_with_istio_diff_ns( + def test_curl_same_namespace( self: Self, endpoint_isvc: InferenceService, - diff_pod_with_istio_sidecar: Pod, + same_namespace_pod: Pod, ) -> None: """ - Verifies the response from the health endpoint, - sending a request from a pod in a different ns and part of the Istio Service Mesh + Verifies the v2 health endpoint is reachable + from a pod in the same namespace. """ - - curl_stdout = curl_from_pod( - isvc=endpoint_isvc, - pod=diff_pod_with_istio_sidecar, - endpoint=ModelEndpoint.HEALTH, - protocol=Protocols.HTTPS, - ) - assert curl_stdout == CurlOutput.HEALTH_OK - - def test_curl_outside_istio_same_ns( - self: Self, - endpoint_isvc: InferenceService, - endpoint_pod_without_istio_sidecar: Pod, - ) -> None: - """ - Verifies the response from the health endpoint, - sending a request from a pod in the same ns and not part of the Istio Service Mesh - """ - curl_stdout = curl_from_pod( isvc=endpoint_isvc, - pod=endpoint_pod_without_istio_sidecar, - endpoint=ModelEndpoint.HEALTH, - protocol=Protocols.HTTPS, + pod=same_namespace_pod, + endpoint=OVMS_HEALTH_ENDPOINT, + port=OVMS_REST_PORT, ) - assert curl_stdout == CurlOutput.HEALTH_OK + assert curl_stdout == HTTP_OK - def test_curl_outside_istio_diff_ns( + def test_curl_diff_namespace( self: Self, endpoint_isvc: InferenceService, - diff_pod_without_istio_sidecar: Pod, + diff_namespace_pod: Pod, ) -> None: """ - Verifies the response from the health endpoint, - sending a request from a pod in a different ns and not part of the Istio Service Mesh + Verifies the v2 health endpoint is reachable + from a pod in a different namespace. """ - curl_stdout = curl_from_pod( isvc=endpoint_isvc, - pod=diff_pod_without_istio_sidecar, - endpoint=ModelEndpoint.HEALTH, - protocol=Protocols.HTTPS, + pod=diff_namespace_pod, + endpoint=OVMS_HEALTH_ENDPOINT, + port=OVMS_REST_PORT, ) - assert curl_stdout == CurlOutput.HEALTH_OK + assert curl_stdout == HTTP_OK diff --git a/tests/model_serving/model_server/kserve/private_endpoint/utils.py b/tests/model_serving/model_server/kserve/private_endpoint/utils.py index 67bd49761..8d023b505 100644 --- a/tests/model_serving/model_server/kserve/private_endpoint/utils.py +++ b/tests/model_serving/model_server/kserve/private_endpoint/utils.py @@ -20,43 +20,47 @@ def curl_from_pod( pod: Pod, endpoint: str, protocol: str = Protocols.HTTP, + port: int | None = None, ) -> str: """ - Curl from pod + Curl from pod and return HTTP status code. Args: isvc (InferenceService): InferenceService object pod (Pod): Pod object - endpoint (str): endpoint - protocol (str): protocol + endpoint (str): endpoint path + protocol (str): protocol (http or https) + port (int | None): override the port in the ISVC URL Returns: - str: curl command output + str: HTTP status code as string (e.g. "200") """ if protocol not in (Protocols.HTTPS, Protocols.HTTP): raise ProtocolNotSupportedError(protocol) - host = isvc.instance.status.address.url - if protocol == "http": - parsed = urlparse(url=host) - host = parsed._replace(scheme="http").geturl() - return pod.execute(command=shlex.split(f"curl -k {host}/{endpoint}"), ignore_rc=True) + + parsed = urlparse(url=isvc.instance.status.address.url) + parsed = parsed._replace(scheme=protocol) + if port: + parsed = parsed._replace(netloc=f"{parsed.hostname}:{port}") + + url = f"{parsed.geturl()}/{endpoint}" + cmd = shlex.split(f"curl -s -o /dev/null -w '%{{http_code}}' -k {url}") + return pod.execute(command=cmd, ignore_rc=True) @contextmanager -def create_sidecar_pod( +def create_curl_pod( client: DynamicClient, namespace: str, - use_istio: bool, pod_name: str, ) -> Generator[Pod, Any, Any]: """ - Create a sidecar pod + Create a lightweight pod for running curl commands. Args: client (DynamicClient): DynamicClient object namespace (str): namespace name - use_istio (bool): use istio pod_name (str): pod name Returns: @@ -77,11 +81,6 @@ def create_sidecar_pod( } ] - pod_kwargs = {"client": client, "name": pod_name, "namespace": namespace, "containers": containers} - - if use_istio: - pod_kwargs.update({"annotations": {"sidecar.istio.io/inject": "true"}}) - - with Pod(**pod_kwargs) as pod: + with Pod(client=client, name=pod_name, namespace=namespace, containers=containers) as pod: pod.wait_for_condition(condition="Ready", status="True") yield pod