Skip to content

Commit 09f0343

Browse files
authored
[model server] Tgis serverless multi models using one serving runtime (#168)
* Create size-labeler.yml * Delete .github/workflows/size-labeler.yml * model mesh - add auth tests * xx * feat: add multi server serverless * feat: add multi server serverless * feat: add multi server serverless
1 parent 7edab8d commit 09f0343

File tree

5 files changed

+105
-3
lines changed

5 files changed

+105
-3
lines changed

tests/model_serving/model_server/metrics/utils.py

Whitespace-only changes.

tests/model_serving/model_server/model_mesh/conftest.py

Whitespace-only changes.

tests/model_serving/model_server/serverless/conftest.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,20 @@
22

33
import pytest
44
from _pytest.fixtures import FixtureRequest
5+
from kubernetes.dynamic import DynamicClient
56
from ocp_resources.inference_service import InferenceService
67
from ocp_resources.resource import ResourceEditor
8+
from ocp_resources.namespace import Namespace
9+
from ocp_resources.secret import Secret
10+
from ocp_resources.serving_runtime import ServingRuntime
711

812
from tests.model_serving.model_server.serverless.utils import wait_for_canary_rollout
913
from tests.model_serving.model_server.utils import run_inference_multiple_times
1014
from utilities.constants import ModelFormat, Protocols
1115
from utilities.inference_utils import Inference
1216
from utilities.manifests.caikit_tgis import CAIKIT_TGIS_INFERENCE_CONFIG
17+
from utilities.constants import KServeDeploymentType, ModelName, ModelStoragePath
18+
from utilities.inference_utils import create_isvc
1319

1420

1521
@pytest.fixture(scope="class")
@@ -59,3 +65,25 @@ def multiple_tgis_inference_requests(s3_models_inference_service: InferenceServi
5965
iterations=50,
6066
run_in_parallel=True,
6167
)
68+
69+
70+
@pytest.fixture(scope="class")
71+
def s3_flan_small_hf_caikit_serverless_inference_service(
72+
request: FixtureRequest,
73+
admin_client: DynamicClient,
74+
model_namespace: Namespace,
75+
serving_runtime_from_template: ServingRuntime,
76+
models_endpoint_s3_secret: Secret,
77+
) -> Generator[InferenceService, Any, Any]:
78+
with create_isvc(
79+
client=admin_client,
80+
name=f"{ModelName.FLAN_T5_SMALL}-model",
81+
namespace=model_namespace.name,
82+
runtime=serving_runtime_from_template.name,
83+
storage_key=models_endpoint_s3_secret.name,
84+
storage_path=ModelStoragePath.FLAN_T5_SMALL_HF,
85+
model_format=serving_runtime_from_template.instance.spec.supportedModelFormats[0].name,
86+
deployment_mode=KServeDeploymentType.SERVERLESS,
87+
external_route=True,
88+
) as isvc:
89+
yield isvc
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
import pytest
2+
3+
from tests.model_serving.model_server.utils import run_inference_multiple_times
4+
from utilities.constants import (
5+
KServeDeploymentType,
6+
ModelAndFormat,
7+
ModelName,
8+
ModelStoragePath,
9+
Protocols,
10+
RuntimeTemplates,
11+
)
12+
from utilities.inference_utils import Inference
13+
from utilities.manifests.pytorch import PYTORCH_TGIS_INFERENCE_CONFIG
14+
from utilities.manifests.tgis_grpc import TGIS_INFERENCE_CONFIG
15+
16+
pytestmark = [pytest.mark.serverless, pytest.mark.sanity]
17+
18+
19+
@pytest.mark.polarion("ODS-2371")
20+
@pytest.mark.parametrize(
21+
"model_namespace, serving_runtime_from_template, s3_models_inference_service",
22+
[
23+
pytest.param(
24+
{"name": "serverless-multi-tgis-models"},
25+
{
26+
"name": "tgis-runtime",
27+
"template-name": RuntimeTemplates.TGIS_GRPC_SERVING,
28+
"multi-model": False,
29+
"enable-http": False,
30+
"enable-grpc": True,
31+
},
32+
{
33+
"name": f"{ModelName.BLOOM_560M}-model",
34+
"deployment-mode": KServeDeploymentType.SERVERLESS,
35+
"model-dir": f"{ModelStoragePath.BLOOM_560M_CAIKIT}/artifacts",
36+
"external-route": True,
37+
},
38+
)
39+
],
40+
indirect=True,
41+
)
42+
@pytest.mark.usefixtures("s3_flan_small_hf_caikit_serverless_inference_service")
43+
class TestServerlessMultipleProjectsInNamespace:
44+
def test_serverless_multi_tgis_models_inference_bloom(
45+
self,
46+
s3_models_inference_service,
47+
):
48+
"""Test inference with Bloom Caikit model when multiple models in the same namespace"""
49+
run_inference_multiple_times(
50+
isvc=s3_models_inference_service,
51+
inference_config=PYTORCH_TGIS_INFERENCE_CONFIG,
52+
model_name=ModelAndFormat.BLOOM_560M_CAIKIT,
53+
inference_type=Inference.ALL_TOKENS,
54+
protocol=Protocols.GRPC,
55+
run_in_parallel=True,
56+
iterations=5,
57+
)
58+
59+
def test_serverless_multi_tgis_models_inference_flan(
60+
self, s3_flan_small_hf_caikit_serverless_inference_service, s3_models_inference_service
61+
):
62+
"""Test inference with Flan Caikit model when multiple models in the same namespace"""
63+
run_inference_multiple_times(
64+
isvc=s3_flan_small_hf_caikit_serverless_inference_service,
65+
inference_config=TGIS_INFERENCE_CONFIG,
66+
model_name=ModelAndFormat.FLAN_T5_SMALL_CAIKIT,
67+
inference_type=Inference.ALL_TOKENS,
68+
protocol=Protocols.GRPC,
69+
run_in_parallel=True,
70+
iterations=5,
71+
)

tests/model_serving/model_server/utils.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,10 @@ def run_inference_multiple_times(
204204
verify_inference_response(**infer_kwargs)
205205

206206
if futures:
207+
exceptions = []
207208
for result in as_completed(futures):
208-
_exception = result.exception()
209-
if _exception:
210-
LOGGER.error(f"Failed to run inference. Error: {_exception}")
209+
if _exception := result.exception():
210+
exceptions.append(_exception)
211+
212+
if exceptions:
213+
raise InferenceResponseError(f"Failed to run inference. Error: {exceptions}")

0 commit comments

Comments
 (0)