diff --git a/tests/model_serving/model_server/kserve/negative/conftest.py b/tests/model_serving/model_server/kserve/negative/conftest.py index d5241f698..8dbe00f54 100644 --- a/tests/model_serving/model_server/kserve/negative/conftest.py +++ b/tests/model_serving/model_server/kserve/negative/conftest.py @@ -3,7 +3,6 @@ from urllib.parse import urlparse import pytest -from _pytest.fixtures import FixtureRequest from kubernetes.dynamic import DynamicClient from ocp_resources.inference_service import InferenceService from ocp_resources.namespace import Namespace @@ -15,20 +14,58 @@ RuntimeTemplates, ) from utilities.inference_utils import create_isvc -from utilities.infra import get_pods_by_isvc_label +from utilities.infra import create_ns, get_pods_by_isvc_label, s3_endpoint_secret from utilities.serving_runtime import ServingRuntimeFromTemplate -@pytest.fixture(scope="class") +@pytest.fixture(scope="package") +def negative_test_namespace( + admin_client: DynamicClient, + unprivileged_client: DynamicClient, +) -> Generator[Namespace, Any, Any]: + """Create a shared namespace for all negative tests.""" + with create_ns( + admin_client=admin_client, + unprivileged_client=unprivileged_client, + name="negative-test-kserve", + ) as ns: + yield ns + + +@pytest.fixture(scope="package") +def negative_test_s3_secret( + unprivileged_client: DynamicClient, + negative_test_namespace: Namespace, + aws_access_key_id: str, + aws_secret_access_key: str, + ci_s3_bucket_name: str, + ci_s3_bucket_region: str, + ci_s3_bucket_endpoint: str, +) -> Generator[Secret, Any, Any]: + """Create S3 secret shared across all negative tests.""" + with s3_endpoint_secret( + client=unprivileged_client, + name="ci-bucket-secret", + namespace=negative_test_namespace.name, + aws_access_key=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + aws_s3_region=ci_s3_bucket_region, + aws_s3_bucket=ci_s3_bucket_name, + aws_s3_endpoint=ci_s3_bucket_endpoint, + ) as secret: + yield secret + + +@pytest.fixture(scope="package") def ovms_serving_runtime( admin_client: DynamicClient, - unprivileged_model_namespace: Namespace, + negative_test_namespace: Namespace, ) -> Generator[ServingRuntime, Any, Any]: - """Create OVMS serving runtime for negative tests.""" + """Create OVMS serving runtime shared across all negative tests.""" with ServingRuntimeFromTemplate( client=admin_client, name="negative-test-ovms-runtime", - namespace=unprivileged_model_namespace.name, + namespace=negative_test_namespace.name, template_name=RuntimeTemplates.OVMS_KSERVE, multi_model=False, enable_http=True, @@ -37,17 +74,16 @@ def ovms_serving_runtime( yield runtime -@pytest.fixture(scope="class") +@pytest.fixture(scope="package") def negative_test_ovms_isvc( - request: FixtureRequest, admin_client: DynamicClient, - unprivileged_model_namespace: Namespace, + negative_test_namespace: Namespace, ovms_serving_runtime: ServingRuntime, ci_s3_bucket_name: str, - ci_endpoint_s3_secret: Secret, + negative_test_s3_secret: Secret, ) -> Generator[InferenceService, Any, Any]: - """Create InferenceService with OVMS runtime for negative tests.""" - storage_uri = f"s3://{ci_s3_bucket_name}/{request.param['model-dir']}/" + """Create InferenceService with OVMS runtime shared across all negative tests.""" + storage_uri = f"s3://{ci_s3_bucket_name}/test-dir/" supported_formats = ovms_serving_runtime.instance.spec.supportedModelFormats if not supported_formats: raise ValueError(f"ServingRuntime '{ovms_serving_runtime.name}' has no supportedModelFormats") @@ -55,9 +91,9 @@ def negative_test_ovms_isvc( with create_isvc( client=admin_client, name="negative-test-ovms-isvc", - namespace=unprivileged_model_namespace.name, + namespace=negative_test_namespace.name, runtime=ovms_serving_runtime.name, - storage_key=ci_endpoint_s3_secret.name, + storage_key=negative_test_s3_secret.name, storage_path=urlparse(storage_uri).path, model_format=supported_formats[0].name, deployment_mode=KServeDeploymentType.RAW_DEPLOYMENT, diff --git a/tests/model_serving/model_server/kserve/negative/test_invalid_model_name.py b/tests/model_serving/model_server/kserve/negative/test_invalid_model_name.py new file mode 100644 index 000000000..a99b3bae5 --- /dev/null +++ b/tests/model_serving/model_server/kserve/negative/test_invalid_model_name.py @@ -0,0 +1,86 @@ +"""Tests for invalid model name in inference endpoint. + +Jira: RHOAIENG-48282 +""" + +import json +from http import HTTPStatus +from typing import Any + +import pytest +from kubernetes.dynamic import DynamicClient +from ocp_resources.inference_service import InferenceService + +from tests.model_serving.model_server.kserve.negative.utils import ( + VALID_OVMS_INFERENCE_BODY, + assert_pods_healthy, + send_inference_request, +) + +pytestmark = pytest.mark.usefixtures("valid_aws_config") + +VALID_BODY_RAW = json.dumps(VALID_OVMS_INFERENCE_BODY) + + +@pytest.mark.tier1 +class TestInvalidModelName: + """Test class for verifying error handling when targeting a non-existent model. + + Preconditions: + - InferenceService "negative-test-ovms-isvc" deployed and ready + - No InferenceService with name "nonexistent-model" + + Test Steps: + 1. Create InferenceService with OVMS runtime + 2. Wait for InferenceService status = Ready + 3. Send inference request to /v2/models/nonexistent-model/infer + 4. Verify error response and existing service health + + Expected Results: + - HTTP Status Code: 404 Not Found + - Error message indicates model not found + - No impact on existing model service + """ + + def test_nonexistent_model_returns_404( + self, + negative_test_ovms_isvc: InferenceService, + ) -> None: + """Verify that inference to a non-existent model returns 404 status code. + + Given an InferenceService is deployed and ready + When sending a POST request targeting a non-existent model name + Then the response should have HTTP status code 404 (Not Found) + """ + status_code, response_body = send_inference_request( + inference_service=negative_test_ovms_isvc, + body=VALID_BODY_RAW, + model_name="nonexistent-model", + ) + + assert status_code == HTTPStatus.NOT_FOUND, ( + f"Expected 404 Not Found for nonexistent model, got {status_code}. Response: {response_body}" + ) + + def test_existing_service_unaffected_after_invalid_model_request( + self, + admin_client: DynamicClient, + negative_test_ovms_isvc: InferenceService, + initial_pod_state: dict[str, dict[str, Any]], + ) -> None: + """Verify that the existing service remains healthy after invalid model requests. + + Given an InferenceService is deployed and ready + When sending a request targeting a non-existent model name + Then the existing service pods should remain running without restarts + """ + send_inference_request( + inference_service=negative_test_ovms_isvc, + body=VALID_BODY_RAW, + model_name="nonexistent-model", + ) + assert_pods_healthy( + admin_client=admin_client, + isvc=negative_test_ovms_isvc, + initial_pod_state=initial_pod_state, + ) diff --git a/tests/model_serving/model_server/kserve/negative/test_malformed_json_payload.py b/tests/model_serving/model_server/kserve/negative/test_malformed_json_payload.py new file mode 100644 index 000000000..de0285383 --- /dev/null +++ b/tests/model_serving/model_server/kserve/negative/test_malformed_json_payload.py @@ -0,0 +1,98 @@ +"""Tests for malformed JSON payload handling in inference requests. + +Jira: RHOAIENG-48279 +""" + +from http import HTTPStatus +from typing import Any + +import pytest +from kubernetes.dynamic import DynamicClient +from ocp_resources.inference_service import InferenceService + +from tests.model_serving.model_server.kserve.negative.utils import ( + assert_pods_healthy, + send_inference_request, +) + +pytestmark = pytest.mark.usefixtures("valid_aws_config") + +MALFORMED_JSON_EXPECTED_CODES: set[int] = { + HTTPStatus.BAD_REQUEST, + HTTPStatus.PRECONDITION_FAILED, +} +MISSING_BRACE_BODY = '{"inputs": [{"name": "Input3"' +TRAILING_COMMA_BODY = '{"inputs": [{"name": "Input3",}]}' + + +@pytest.mark.tier1 +@pytest.mark.rawdeployment +class TestMalformedJsonPayload: + """Test class for verifying error handling when receiving malformed JSON payloads. + + Preconditions: + - InferenceService deployed with OVMS runtime (RawDeployment) + - Model is ready and serving + + Test Steps: + 1. Create InferenceService with OVMS runtime + 2. Wait for InferenceService status = Ready + 3. Send POST with malformed JSON bodies (missing brace, trailing comma, plain text) + 4. Verify error responses and pod health + + Expected Results: + - HTTP Status Code: 400 Bad Request or 412 Precondition Failed + (OVMS returns 412 for JSON parse errors) + - Response indicates JSON parse failure + - No pod crash or restart + """ + + @pytest.mark.parametrize( + "malformed_body", + [ + pytest.param(MISSING_BRACE_BODY, id="missing_closing_brace"), + pytest.param(TRAILING_COMMA_BODY, id="trailing_comma"), + pytest.param("not json at all", id="plain_text"), + ], + ) + def test_malformed_json_returns_error( + self, + negative_test_ovms_isvc: InferenceService, + malformed_body: str, + ) -> None: + """Verify that malformed JSON payloads return an error status code. + + Given an InferenceService is deployed and ready + When sending a POST request with a malformed JSON body + Then the response should have HTTP status code 400 or 412 + """ + status_code, response_body = send_inference_request( + inference_service=negative_test_ovms_isvc, + body=malformed_body, + ) + + assert status_code in MALFORMED_JSON_EXPECTED_CODES, ( + f"Expected 400 or 412 for malformed JSON, got {status_code}. Response: {response_body}" + ) + + def test_model_pod_remains_healthy_after_malformed_json( + self, + admin_client: DynamicClient, + negative_test_ovms_isvc: InferenceService, + initial_pod_state: dict[str, dict[str, Any]], + ) -> None: + """Verify that the model pod remains healthy after receiving malformed JSON. + + Given an InferenceService is deployed and ready + When sending requests with malformed JSON payloads + Then the same pods should still be running without additional restarts + """ + send_inference_request( + inference_service=negative_test_ovms_isvc, + body=MISSING_BRACE_BODY, + ) + assert_pods_healthy( + admin_client=admin_client, + isvc=negative_test_ovms_isvc, + initial_pod_state=initial_pod_state, + ) diff --git a/tests/model_serving/model_server/kserve/negative/test_missing_required_fields.py b/tests/model_serving/model_server/kserve/negative/test_missing_required_fields.py new file mode 100644 index 000000000..56841f2c6 --- /dev/null +++ b/tests/model_serving/model_server/kserve/negative/test_missing_required_fields.py @@ -0,0 +1,91 @@ +"""Tests for missing required fields in inference requests. + +Jira: RHOAIENG-48281 +""" + +import json +from http import HTTPStatus +from typing import Any + +import pytest +from kubernetes.dynamic import DynamicClient +from ocp_resources.inference_service import InferenceService + +from tests.model_serving.model_server.kserve.negative.utils import ( + assert_pods_healthy, + send_inference_request, +) + +pytestmark = pytest.mark.usefixtures("valid_aws_config") + + +@pytest.mark.tier1 +@pytest.mark.rawdeployment +class TestMissingRequiredFields: + """Test class for verifying error handling when required fields are missing. + + Preconditions: + - InferenceService deployed with OVMS runtime + - Model is ready and serving + + Test Steps: + 1. Create InferenceService with OVMS runtime + 2. Wait for InferenceService status = Ready + 3. Send POST with empty body {} + 4. Send POST with body missing "inputs" field + 5. Verify error responses and pod health + + Expected Results: + - HTTP Status Code: 400 Bad Request + - Error message indicates missing required field + - No server crash + """ + + @pytest.mark.parametrize( + "incomplete_body", + [ + pytest.param("{}", id="empty_body"), + pytest.param(json.dumps({"id": "test-123"}), id="missing_inputs_field"), + ], + ) + def test_missing_required_fields_returns_400( + self, + negative_test_ovms_isvc: InferenceService, + incomplete_body: str, + ) -> None: + """Verify that requests missing required fields return 400 status code. + + Given an InferenceService is deployed and ready + When sending a POST request with missing required fields + Then the response should have HTTP status code 400 (Bad Request) + """ + status_code, response_body = send_inference_request( + inference_service=negative_test_ovms_isvc, + body=incomplete_body, + ) + + assert status_code == HTTPStatus.BAD_REQUEST, ( + f"Expected 400 Bad Request for incomplete payload, got {status_code}. Response: {response_body}" + ) + + def test_model_pod_remains_healthy_after_missing_fields( + self, + admin_client: DynamicClient, + negative_test_ovms_isvc: InferenceService, + initial_pod_state: dict[str, dict[str, Any]], + ) -> None: + """Verify that the model pod remains healthy after receiving incomplete requests. + + Given an InferenceService is deployed and ready + When sending requests with missing required fields + Then the same pods should still be running without additional restarts + """ + send_inference_request( + inference_service=negative_test_ovms_isvc, + body="{}", + ) + assert_pods_healthy( + admin_client=admin_client, + isvc=negative_test_ovms_isvc, + initial_pod_state=initial_pod_state, + ) diff --git a/tests/model_serving/model_server/kserve/negative/test_invalid_inference_requests.py b/tests/model_serving/model_server/kserve/negative/test_unsupported_content_type.py similarity index 54% rename from tests/model_serving/model_server/kserve/negative/test_invalid_inference_requests.py rename to tests/model_serving/model_server/kserve/negative/test_unsupported_content_type.py index 00d721818..d7c093abc 100644 --- a/tests/model_serving/model_server/kserve/negative/test_invalid_inference_requests.py +++ b/tests/model_serving/model_server/kserve/negative/test_unsupported_content_type.py @@ -1,11 +1,9 @@ -"""Tests for invalid inference requests handling. - -This module verifies that KServe properly handles inference requests with -unsupported Content-Type headers, returning appropriate error responses. +"""Tests for unsupported Content-Type headers in inference requests. Jira: RHOAIENG-48283 """ +import json from http import HTTPStatus from typing import Any @@ -14,26 +12,16 @@ from ocp_resources.inference_service import InferenceService from tests.model_serving.model_server.kserve.negative.utils import ( - send_inference_request_with_content_type, + VALID_OVMS_INFERENCE_BODY, + assert_pods_healthy, + send_inference_request, ) -from utilities.infra import get_pods_by_isvc_label pytestmark = pytest.mark.usefixtures("valid_aws_config") -@pytest.mark.jira("RHOAIENG-48283", run=False) @pytest.mark.tier1 @pytest.mark.rawdeployment -@pytest.mark.parametrize( - "unprivileged_model_namespace, negative_test_ovms_isvc", - [ - pytest.param( - {"name": "negative-test-content-type"}, - {"model-dir": "test-dir"}, - ) - ], - indirect=True, -) class TestUnsupportedContentType: """Test class for verifying error handling when using unsupported Content-Type headers. @@ -55,17 +43,6 @@ class TestUnsupportedContentType: - Model pod remains healthy (Running, no restarts) """ - VALID_INFERENCE_BODY: dict[str, Any] = { # noqa: RUF012 - "inputs": [ - { - "name": "Input3", - "shape": [1, 1, 28, 28], - "datatype": "FP32", - "data": [0.0] * 784, - } - ] - } - @pytest.mark.parametrize( "content_type", [ @@ -84,10 +61,10 @@ def test_unsupported_content_type_returns_415( When sending a POST request with an unsupported Content-Type header Then the response should have HTTP status code 415 (Unsupported Media Type) """ - status_code, response_body = send_inference_request_with_content_type( + status_code, response_body = send_inference_request( inference_service=negative_test_ovms_isvc, + body=json.dumps(VALID_OVMS_INFERENCE_BODY), content_type=content_type, - body=self.VALID_INFERENCE_BODY, ) assert status_code == HTTPStatus.UNSUPPORTED_MEDIA_TYPE, ( @@ -107,40 +84,13 @@ def test_model_pod_remains_healthy_after_invalid_requests( When sending requests with unsupported Content-Type headers Then the same pods (by UID) should still be running without additional restarts """ - send_inference_request_with_content_type( + send_inference_request( inference_service=negative_test_ovms_isvc, + body=json.dumps(VALID_OVMS_INFERENCE_BODY), content_type="text/xml", - body=self.VALID_INFERENCE_BODY, ) - - current_pods = get_pods_by_isvc_label( - client=admin_client, + assert_pods_healthy( + admin_client=admin_client, isvc=negative_test_ovms_isvc, + initial_pod_state=initial_pod_state, ) - - assert len(current_pods) > 0, "No pods found for the InferenceService" - - current_pod_uids = {pod.instance.metadata.uid for pod in current_pods} - initial_pod_uids = set(initial_pod_state.keys()) - - assert current_pod_uids == initial_pod_uids, ( - f"Pod UIDs changed after invalid requests. " - f"Initial: {initial_pod_uids}, Current: {current_pod_uids}. " - f"This indicates pods were recreated." - ) - - for pod in current_pods: - uid = pod.instance.metadata.uid - initial_state = initial_pod_state[uid] - - assert pod.instance.status.phase == "Running", ( - f"Pod {pod.name} is not running, status: {pod.instance.status.phase}" - ) - - container_statuses = pod.instance.status.containerStatuses or [] - for container in container_statuses: - initial_restart_count = initial_state["restart_counts"].get(container.name, 0) - assert container.restartCount == initial_restart_count, ( - f"Container {container.name} in pod {pod.name} restarted after invalid requests. " - f"Initial count: {initial_restart_count}, Current count: {container.restartCount}" - ) diff --git a/tests/model_serving/model_server/kserve/negative/test_wrong_input_data_type.py b/tests/model_serving/model_server/kserve/negative/test_wrong_input_data_type.py new file mode 100644 index 000000000..bc24fefad --- /dev/null +++ b/tests/model_serving/model_server/kserve/negative/test_wrong_input_data_type.py @@ -0,0 +1,102 @@ +"""Tests for wrong data types in input tensor. + +Jira: RHOAIENG-48280 +""" + +import copy +import json +from http import HTTPStatus +from typing import Any + +import pytest +from kubernetes.dynamic import DynamicClient +from ocp_resources.inference_service import InferenceService + +from tests.model_serving.model_server.kserve.negative.utils import ( + VALID_OVMS_INFERENCE_BODY, + assert_pods_healthy, + send_inference_request, +) + +pytestmark = pytest.mark.usefixtures("valid_aws_config") + + +def _make_body_with_input_override(**overrides: Any) -> str: + """Derive a serialized body from VALID_OVMS_INFERENCE_BODY with input field overrides.""" + body = copy.deepcopy(VALID_OVMS_INFERENCE_BODY) + body["inputs"][0].update(overrides) + return json.dumps(body) + + +STRING_VALUES_AS_FP32_BODY = _make_body_with_input_override(data=["string_value"] * 784) +INVALID_DATATYPE_BODY = _make_body_with_input_override(datatype="INVALID_TYPE") + + +@pytest.mark.tier1 +class TestWrongInputDataType: + """Test class for verifying error handling when input tensor has wrong data type. + + Preconditions: + - InferenceService deployed with OVMS runtime expecting FP32 inputs + - Model is ready and serving + + Test Steps: + 1. Create InferenceService with OVMS runtime + 2. Wait for InferenceService status = Ready + 3. Send inference request with string values where FP32 is expected + 4. Send inference request with mismatched datatype declaration + 5. Verify error responses and pod health + + Expected Results: + - HTTP Status Code: 400 or 422 indicating data type mismatch + - Model pod remains healthy (no restart) + """ + + @pytest.mark.parametrize( + "invalid_input_body", + [ + pytest.param(STRING_VALUES_AS_FP32_BODY, id="string_values_as_fp32"), + pytest.param(INVALID_DATATYPE_BODY, id="invalid_datatype_name"), + ], + ) + def test_wrong_data_type_returns_error( + self, + negative_test_ovms_isvc: InferenceService, + invalid_input_body: str, + ) -> None: + """Verify that wrong input data types return an error status code. + + Given an InferenceService is deployed and ready + When sending a POST request with mismatched input tensor data types + Then the response should have HTTP status code 400 or 422 + """ + status_code, response_body = send_inference_request( + inference_service=negative_test_ovms_isvc, + body=invalid_input_body, + ) + + assert status_code in (HTTPStatus.BAD_REQUEST, HTTPStatus.UNPROCESSABLE_ENTITY), ( + f"Expected 400 or 422 for wrong data type, got {status_code}. Response: {response_body}" + ) + + def test_model_pod_remains_healthy_after_wrong_dtype( + self, + admin_client: DynamicClient, + negative_test_ovms_isvc: InferenceService, + initial_pod_state: dict[str, dict[str, Any]], + ) -> None: + """Verify that the model pod remains healthy after receiving wrong data type inputs. + + Given an InferenceService is deployed and ready + When sending requests with wrong input tensor data types + Then the same pods should still be running without additional restarts + """ + send_inference_request( + inference_service=negative_test_ovms_isvc, + body=STRING_VALUES_AS_FP32_BODY, + ) + assert_pods_healthy( + admin_client=admin_client, + isvc=negative_test_ovms_isvc, + initial_pod_state=initial_pod_state, + ) diff --git a/tests/model_serving/model_server/kserve/negative/utils.py b/tests/model_serving/model_server/kserve/negative/utils.py index 9caf11c2c..51a4a0f77 100644 --- a/tests/model_serving/model_server/kserve/negative/utils.py +++ b/tests/model_serving/model_server/kserve/negative/utils.py @@ -1,27 +1,76 @@ """Utility functions for negative inference tests.""" -import json import shlex from typing import Any +from kubernetes.dynamic import DynamicClient from ocp_resources.inference_service import InferenceService from pyhelper_utils.shell import run_command +from utilities.infra import get_pods_by_isvc_label +from utilities.manifests.onnx import ONNX_INFERENCE_CONFIG -def send_inference_request_with_content_type( +VALID_OVMS_INFERENCE_BODY: dict[str, Any] = { + "inputs": ONNX_INFERENCE_CONFIG["default_query_model"]["infer"]["query_input"] +} + + +def assert_pods_healthy( + admin_client: DynamicClient, + isvc: InferenceService, + initial_pod_state: dict[str, dict[str, Any]], +) -> None: + """Assert that all pods remain running with no restarts compared to initial state. + + Args: + admin_client: Kubernetes client with admin privileges. + isvc: The InferenceService whose pods to check. + initial_pod_state: Mapping of pod UIDs to their initial state + (name, restart counts) captured before the test action. + """ + current_pods = get_pods_by_isvc_label(client=admin_client, isvc=isvc) + assert len(current_pods) > 0, "No pods found for the InferenceService" + + current_pod_uids = {pod.instance.metadata.uid for pod in current_pods} + initial_pod_uids = set(initial_pod_state.keys()) + assert current_pod_uids == initial_pod_uids, ( + f"Pod UIDs changed after invalid requests. " + f"Initial: {initial_pod_uids}, Current: {current_pod_uids}. " + f"This indicates pods were recreated." + ) + + for pod in current_pods: + uid = pod.instance.metadata.uid + initial_state = initial_pod_state[uid] + assert pod.instance.status.phase == "Running", ( + f"Pod {pod.name} is not running, status: {pod.instance.status.phase}" + ) + for container in pod.instance.status.containerStatuses or []: + initial_restart_count = initial_state["restart_counts"].get(container.name, 0) + assert container.restartCount == initial_restart_count, ( + f"Container {container.name} in pod {pod.name} restarted. " + f"Initial: {initial_restart_count}, Current: {container.restartCount}" + ) + + +def send_inference_request( inference_service: InferenceService, - content_type: str, - body: dict[str, Any], + body: str, + model_name: str | None = None, + content_type: str = "application/json", ) -> tuple[int, str]: - """Send an inference request with a specific Content-Type header. + """Send an inference request and return HTTP status code and response body. - This function is used for negative testing to verify error handling - when sending requests with unsupported Content-Type headers. + Unlike UserInference, this function does not retry or raise on error + status codes, making it suitable for negative testing where error + responses are the expected outcome. Args: inference_service: The InferenceService to send the request to. - content_type: The Content-Type header value to use. - body: The request body to send. + body: The raw string payload (can be invalid JSON for negative testing). + model_name: Override the model name in the URL path. + Defaults to the InferenceService name. + content_type: The Content-Type header value. Defaults to "application/json". Returns: A tuple of (status_code, response_body). @@ -29,17 +78,18 @@ def send_inference_request_with_content_type( Raises: ValueError: If the InferenceService has no URL or curl output is malformed. """ - url = inference_service.instance.status.url - if not url: + base_url = inference_service.instance.status.url + if not base_url: raise ValueError(f"InferenceService '{inference_service.name}' has no URL; is it Ready?") - endpoint = f"{url}/v2/models/{inference_service.name}/infer" + target_model = model_name or inference_service.name + endpoint = f"{base_url}/v2/models/{target_model}/infer" cmd = ( f"curl -s -w '\\n%{{http_code}}' " f"-X POST {endpoint} " f"-H 'Content-Type: {content_type}' " - f"-d '{json.dumps(body)}' " + f"--data-raw {shlex.quote(body)} " f"--insecure" ) @@ -50,7 +100,4 @@ def send_inference_request_with_content_type( status_code = int(lines[-1]) except ValueError as exc: raise ValueError(f"Could not parse HTTP status code from curl output: {out!r}") from exc - - response_body = "\n".join(lines[:-1]) - - return status_code, response_body + return status_code, "\n".join(lines[:-1])