opendatahub-io
diff --git a/‎tests/model_serving/model_server/metrics/utils.py‎ b/‎tests/model_serving/model_server/metrics/utils.py‎
diff --git a/‎tests/model_serving/model_server/model_mesh/conftest.py‎ b/‎tests/model_serving/model_server/model_mesh/conftest.py‎
diff --git a/‎tests/model_serving/model_server/serverless/conftest.py‎
Lines changed: 28 additions & 0 deletions b/‎tests/model_serving/model_server/serverless/conftest.py‎
Lines changed: 28 additions & 0 deletions
diff --git a/‎tests/model_serving/model_server/serverless/test_multiple_projects_in_ns.py‎
Lines changed: 71 additions & 0 deletions b/‎tests/model_serving/model_server/serverless/test_multiple_projects_in_ns.py‎
Lines changed: 71 additions & 0 deletions
diff --git a/‎tests/model_serving/model_server/utils.py‎
Lines changed: 6 additions & 3 deletions b/‎tests/model_serving/model_server/utils.py‎
Lines changed: 6 additions & 3 deletions
@@ -2,14 +2,20 @@
 
 import pytest
 from _pytest.fixtures import FixtureRequest
+from kubernetes.dynamic import DynamicClient
 from ocp_resources.inference_service import InferenceService
 from ocp_resources.resource import ResourceEditor
+from ocp_resources.namespace import Namespace
+from ocp_resources.secret import Secret
+from ocp_resources.serving_runtime import ServingRuntime
 
 from tests.model_serving.model_server.serverless.utils import wait_for_canary_rollout
 from tests.model_serving.model_server.utils import run_inference_multiple_times
 from utilities.constants import ModelFormat, Protocols
 from utilities.inference_utils import Inference
 from utilities.manifests.caikit_tgis import CAIKIT_TGIS_INFERENCE_CONFIG
+from utilities.constants import KServeDeploymentType, ModelName, ModelStoragePath
+from utilities.inference_utils import create_isvc
 
 
 @pytest.fixture(scope="class")
@@ -59,3 +65,25 @@ def multiple_tgis_inference_requests(s3_models_inference_service: InferenceServi
         iterations=50,
         run_in_parallel=True,
     )
+
+
+@pytest.fixture(scope="class")
+def s3_flan_small_hf_caikit_serverless_inference_service(
+    request: FixtureRequest,
+    admin_client: DynamicClient,
+    model_namespace: Namespace,
+    serving_runtime_from_template: ServingRuntime,
+    models_endpoint_s3_secret: Secret,
+) -> Generator[InferenceService, Any, Any]:
+    with create_isvc(
+        client=admin_client,
+        name=f"{ModelName.FLAN_T5_SMALL}-model",
+        namespace=model_namespace.name,
+        runtime=serving_runtime_from_template.name,
+        storage_key=models_endpoint_s3_secret.name,
+        storage_path=ModelStoragePath.FLAN_T5_SMALL_HF,
+        model_format=serving_runtime_from_template.instance.spec.supportedModelFormats[0].name,
+        deployment_mode=KServeDeploymentType.SERVERLESS,
+        external_route=True,
+    ) as isvc:
+        yield isvc
@@ -0,0 +1,71 @@
+import pytest
+
+from tests.model_serving.model_server.utils import run_inference_multiple_times
+from utilities.constants import (
+    KServeDeploymentType,
+    ModelAndFormat,
+    ModelName,
+    ModelStoragePath,
+    Protocols,
+    RuntimeTemplates,
+)
+from utilities.inference_utils import Inference
+from utilities.manifests.pytorch import PYTORCH_TGIS_INFERENCE_CONFIG
+from utilities.manifests.tgis_grpc import TGIS_INFERENCE_CONFIG
+
+pytestmark = [pytest.mark.serverless, pytest.mark.sanity]
+
+
+@pytest.mark.polarion("ODS-2371")
+@pytest.mark.parametrize(
+    "model_namespace, serving_runtime_from_template, s3_models_inference_service",
+    [
+        pytest.param(
+            {"name": "serverless-multi-tgis-models"},
+            {
+                "name": "tgis-runtime",
+                "template-name": RuntimeTemplates.TGIS_GRPC_SERVING,
+                "multi-model": False,
+                "enable-http": False,
+                "enable-grpc": True,
+            },
+            {
+                "name": f"{ModelName.BLOOM_560M}-model",
+                "deployment-mode": KServeDeploymentType.SERVERLESS,
+                "model-dir": f"{ModelStoragePath.BLOOM_560M_CAIKIT}/artifacts",
+                "external-route": True,
+            },
+        )
+    ],
+    indirect=True,
+)
+@pytest.mark.usefixtures("s3_flan_small_hf_caikit_serverless_inference_service")
+class TestServerlessMultipleProjectsInNamespace:
+    def test_serverless_multi_tgis_models_inference_bloom(
+        self,
+        s3_models_inference_service,
+    ):
+        """Test inference with Bloom Caikit model when multiple models in the same namespace"""
+        run_inference_multiple_times(
+            isvc=s3_models_inference_service,
+            inference_config=PYTORCH_TGIS_INFERENCE_CONFIG,
+            model_name=ModelAndFormat.BLOOM_560M_CAIKIT,
+            inference_type=Inference.ALL_TOKENS,
+            protocol=Protocols.GRPC,
+            run_in_parallel=True,
+            iterations=5,
+        )
+
+    def test_serverless_multi_tgis_models_inference_flan(
+        self, s3_flan_small_hf_caikit_serverless_inference_service, s3_models_inference_service
+    ):
+        """Test inference with Flan Caikit model when multiple models in the same namespace"""
+        run_inference_multiple_times(
+            isvc=s3_flan_small_hf_caikit_serverless_inference_service,
+            inference_config=TGIS_INFERENCE_CONFIG,
+            model_name=ModelAndFormat.FLAN_T5_SMALL_CAIKIT,
+            inference_type=Inference.ALL_TOKENS,
+            protocol=Protocols.GRPC,
+            run_in_parallel=True,
+            iterations=5,
+        )
@@ -204,7 +204,10 @@ def run_inference_multiple_times(
                 verify_inference_response(**infer_kwargs)
 
         if futures:
+            exceptions = []
             for result in as_completed(futures):
-                _exception = result.exception()
-                if _exception:
-                    LOGGER.error(f"Failed to run inference. Error: {_exception}")
+                if _exception := result.exception():
+                    exceptions.append(_exception)
+
+            if exceptions:
+                raise InferenceResponseError(f"Failed to run inference. Error: {exceptions}")