Skip to content

Commit cde5c83

Browse files
authored
Merge branch 'main' into build_conf
2 parents d571bea + 09f0343 commit cde5c83

13 files changed

Lines changed: 507 additions & 18 deletions

File tree

tests/model_serving/model_server/conftest.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,12 @@ def s3_models_inference_service(
148148
if (enable_auth := request.param.get("enable-auth")) is not None:
149149
isvc_kwargs["enable_auth"] = enable_auth
150150

151+
if (scale_metric := request.param.get("scale-metric")) is not None:
152+
isvc_kwargs["scale_metric"] = scale_metric
153+
154+
if (scale_target := request.param.get("scale-target")) is not None:
155+
isvc_kwargs["scale_target"] = scale_target
156+
151157
with create_isvc(**isvc_kwargs) as isvc:
152158
yield isvc
153159

tests/model_serving/model_server/metrics/utils.py

Whitespace-only changes.

tests/model_serving/model_server/model_mesh/conftest.py

Whitespace-only changes.

tests/model_serving/model_server/serverless/conftest.py

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,21 @@
1+
from typing import Any, Generator
2+
13
import pytest
24
from _pytest.fixtures import FixtureRequest
5+
from kubernetes.dynamic import DynamicClient
36
from ocp_resources.inference_service import InferenceService
47
from ocp_resources.resource import ResourceEditor
8+
from ocp_resources.namespace import Namespace
9+
from ocp_resources.secret import Secret
10+
from ocp_resources.serving_runtime import ServingRuntime
11+
12+
from tests.model_serving.model_server.serverless.utils import wait_for_canary_rollout
13+
from tests.model_serving.model_server.utils import run_inference_multiple_times
14+
from utilities.constants import ModelFormat, Protocols
15+
from utilities.inference_utils import Inference
16+
from utilities.manifests.caikit_tgis import CAIKIT_TGIS_INFERENCE_CONFIG
17+
from utilities.constants import KServeDeploymentType, ModelName, ModelStoragePath
18+
from utilities.inference_utils import create_isvc
519

620

721
@pytest.fixture(scope="class")
@@ -19,3 +33,57 @@ def inference_service_patched_replicas(
1933
).update()
2034

2135
return ovms_serverless_inference_service
36+
37+
38+
@pytest.fixture
39+
def inference_service_updated_canary_config(
40+
request: FixtureRequest, s3_models_inference_service: InferenceService
41+
) -> Generator[InferenceService, Any, Any]:
42+
canary_percent = request.param["canary-traffic-percent"]
43+
predictor_config = {
44+
"spec": {
45+
"predictor": {"canaryTrafficPercent": canary_percent},
46+
}
47+
}
48+
49+
if model_path := request.param.get("model-path"):
50+
predictor_config["spec"]["predictor"]["model"] = {"storage": {"path": model_path}}
51+
52+
with ResourceEditor(patches={s3_models_inference_service: predictor_config}):
53+
wait_for_canary_rollout(isvc=s3_models_inference_service, percentage=canary_percent)
54+
yield s3_models_inference_service
55+
56+
57+
@pytest.fixture
58+
def multiple_tgis_inference_requests(s3_models_inference_service: InferenceService) -> None:
59+
run_inference_multiple_times(
60+
isvc=s3_models_inference_service,
61+
inference_config=CAIKIT_TGIS_INFERENCE_CONFIG,
62+
inference_type=Inference.ALL_TOKENS,
63+
protocol=Protocols.HTTPS,
64+
model_name=ModelFormat.CAIKIT,
65+
iterations=50,
66+
run_in_parallel=True,
67+
)
68+
69+
70+
@pytest.fixture(scope="class")
71+
def s3_flan_small_hf_caikit_serverless_inference_service(
72+
request: FixtureRequest,
73+
admin_client: DynamicClient,
74+
model_namespace: Namespace,
75+
serving_runtime_from_template: ServingRuntime,
76+
models_endpoint_s3_secret: Secret,
77+
) -> Generator[InferenceService, Any, Any]:
78+
with create_isvc(
79+
client=admin_client,
80+
name=f"{ModelName.FLAN_T5_SMALL}-model",
81+
namespace=model_namespace.name,
82+
runtime=serving_runtime_from_template.name,
83+
storage_key=models_endpoint_s3_secret.name,
84+
storage_path=ModelStoragePath.FLAN_T5_SMALL_HF,
85+
model_format=serving_runtime_from_template.instance.spec.supportedModelFormats[0].name,
86+
deployment_mode=KServeDeploymentType.SERVERLESS,
87+
external_route=True,
88+
) as isvc:
89+
yield isvc
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
import pytest
2+
3+
from tests.model_serving.model_server.serverless.utils import verify_canary_traffic
4+
from tests.model_serving.model_server.utils import verify_inference_response
5+
from utilities.constants import (
6+
KServeDeploymentType,
7+
ModelAndFormat,
8+
ModelName,
9+
ModelStoragePath,
10+
Protocols,
11+
RuntimeTemplates,
12+
)
13+
from utilities.inference_utils import Inference
14+
from utilities.manifests.pytorch import PYTORCH_TGIS_INFERENCE_CONFIG
15+
from utilities.manifests.tgis_grpc import TGIS_INFERENCE_CONFIG
16+
17+
pytestmark = [pytest.mark.serverless, pytest.mark.sanity]
18+
19+
20+
@pytest.mark.polarion("ODS-2371")
21+
@pytest.mark.parametrize(
22+
"model_namespace, serving_runtime_from_template, s3_models_inference_service",
23+
[
24+
pytest.param(
25+
{"name": "serverless-canary-rollout"},
26+
{
27+
"name": "tgis-runtime",
28+
"template-name": RuntimeTemplates.TGIS_GRPC_SERVING,
29+
"multi-model": False,
30+
"enable-http": False,
31+
"enable-grpc": True,
32+
},
33+
{
34+
"name": f"{ModelName.BLOOM_560M}-model",
35+
"deployment-mode": KServeDeploymentType.SERVERLESS,
36+
"model-dir": f"{ModelStoragePath.BLOOM_560M_CAIKIT}/artifacts",
37+
"external-route": True,
38+
},
39+
)
40+
],
41+
indirect=True,
42+
)
43+
class TestServerlessCanaryRollout:
44+
def test_serverless_before_model_update(
45+
self,
46+
s3_models_inference_service,
47+
):
48+
"""Test inference with Bloom before model is updated."""
49+
verify_inference_response(
50+
inference_service=s3_models_inference_service,
51+
inference_config=PYTORCH_TGIS_INFERENCE_CONFIG,
52+
inference_type=Inference.ALL_TOKENS,
53+
protocol=Protocols.GRPC,
54+
model_name=ModelAndFormat.BLOOM_560M_CAIKIT,
55+
use_default_query=True,
56+
)
57+
58+
@pytest.mark.parametrize(
59+
"inference_service_updated_canary_config",
60+
[
61+
pytest.param(
62+
{"canary-traffic-percent": 30, "model-path": ModelStoragePath.FLAN_T5_SMALL_HF},
63+
)
64+
],
65+
indirect=True,
66+
)
67+
def test_serverless_during_canary_rollout(self, inference_service_updated_canary_config):
68+
"""Test inference during canary rollout"""
69+
verify_canary_traffic(
70+
isvc=inference_service_updated_canary_config,
71+
inference_config=TGIS_INFERENCE_CONFIG,
72+
model_name=ModelAndFormat.FLAN_T5_SMALL_CAIKIT,
73+
inference_type=Inference.ALL_TOKENS,
74+
protocol=Protocols.GRPC,
75+
iterations=20,
76+
expected_percentage=30,
77+
tolerance=10,
78+
)
79+
80+
@pytest.mark.parametrize(
81+
"inference_service_updated_canary_config",
82+
[
83+
pytest.param(
84+
{"canary-traffic-percent": 100},
85+
)
86+
],
87+
indirect=True,
88+
)
89+
def test_serverless_after_canary_rollout(self, inference_service_updated_canary_config):
90+
"""Test inference after canary rollout"""
91+
verify_canary_traffic(
92+
isvc=inference_service_updated_canary_config,
93+
inference_config=TGIS_INFERENCE_CONFIG,
94+
model_name=ModelAndFormat.FLAN_T5_SMALL_CAIKIT,
95+
inference_type=Inference.ALL_TOKENS,
96+
protocol=Protocols.GRPC,
97+
iterations=5,
98+
expected_percentage=100,
99+
)
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
import pytest
2+
3+
from tests.model_serving.model_server.serverless.utils import (
4+
inference_service_pods_sampler,
5+
)
6+
from utilities.constants import (
7+
KServeDeploymentType,
8+
ModelFormat,
9+
ModelInferenceRuntime,
10+
ModelStoragePath,
11+
RuntimeTemplates,
12+
Timeout,
13+
)
14+
15+
pytestmark = [
16+
pytest.mark.serverless,
17+
pytest.mark.sanity,
18+
pytest.mark.usefixtures("valid_aws_config"),
19+
]
20+
21+
22+
@pytest.mark.parametrize(
23+
"model_namespace, serving_runtime_from_template, s3_models_inference_service",
24+
[
25+
pytest.param(
26+
{"name": "serverless-auto-scale"},
27+
{
28+
"name": f"{ModelInferenceRuntime.CAIKIT_TGIS_RUNTIME}",
29+
"template-name": RuntimeTemplates.CAIKIT_TGIS_SERVING,
30+
"multi-model": False,
31+
"enable-http": True,
32+
},
33+
{
34+
"name": f"{ModelFormat.CAIKIT}-auto-scale",
35+
"deployment-mode": KServeDeploymentType.SERVERLESS,
36+
"model-dir": ModelStoragePath.FLAN_T5_SMALL_CAIKIT,
37+
"scale-metric": "concurrency",
38+
"scale-target": 1,
39+
},
40+
)
41+
],
42+
indirect=True,
43+
)
44+
class TestConcurrencyAutoScale:
45+
@pytest.mark.dependency(name="test_auto_scale_using_concurrency")
46+
def test_auto_scale_using_concurrency(
47+
self,
48+
admin_client,
49+
s3_models_inference_service,
50+
multiple_tgis_inference_requests,
51+
):
52+
"""Verify model is successfully scaled up based on concurrency metrics (KPA)"""
53+
for pods in inference_service_pods_sampler(
54+
client=admin_client,
55+
isvc=s3_models_inference_service,
56+
timeout=Timeout.TIMEOUT_1MIN,
57+
):
58+
if pods:
59+
if len(pods) > 1 and all([pod.status == pod.Status.RUNNING for pod in pods]):
60+
return
61+
62+
@pytest.mark.dependency(requires=["test_auto_scale_using_concurrency"])
63+
def test_pods_scaled_down_when_no_requests(self, admin_client, s3_models_inference_service):
64+
"""Verify auto-scaled pods are deleted when there are no inference requests"""
65+
for pods in inference_service_pods_sampler(
66+
client=admin_client,
67+
isvc=s3_models_inference_service,
68+
timeout=Timeout.TIMEOUT_4MIN,
69+
):
70+
if pods and len(pods) == 1:
71+
return
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
import pytest
2+
3+
from tests.model_serving.model_server.utils import run_inference_multiple_times
4+
from utilities.constants import (
5+
KServeDeploymentType,
6+
ModelAndFormat,
7+
ModelName,
8+
ModelStoragePath,
9+
Protocols,
10+
RuntimeTemplates,
11+
)
12+
from utilities.inference_utils import Inference
13+
from utilities.manifests.pytorch import PYTORCH_TGIS_INFERENCE_CONFIG
14+
from utilities.manifests.tgis_grpc import TGIS_INFERENCE_CONFIG
15+
16+
pytestmark = [pytest.mark.serverless, pytest.mark.sanity]
17+
18+
19+
@pytest.mark.polarion("ODS-2371")
20+
@pytest.mark.parametrize(
21+
"model_namespace, serving_runtime_from_template, s3_models_inference_service",
22+
[
23+
pytest.param(
24+
{"name": "serverless-multi-tgis-models"},
25+
{
26+
"name": "tgis-runtime",
27+
"template-name": RuntimeTemplates.TGIS_GRPC_SERVING,
28+
"multi-model": False,
29+
"enable-http": False,
30+
"enable-grpc": True,
31+
},
32+
{
33+
"name": f"{ModelName.BLOOM_560M}-model",
34+
"deployment-mode": KServeDeploymentType.SERVERLESS,
35+
"model-dir": f"{ModelStoragePath.BLOOM_560M_CAIKIT}/artifacts",
36+
"external-route": True,
37+
},
38+
)
39+
],
40+
indirect=True,
41+
)
42+
@pytest.mark.usefixtures("s3_flan_small_hf_caikit_serverless_inference_service")
43+
class TestServerlessMultipleProjectsInNamespace:
44+
def test_serverless_multi_tgis_models_inference_bloom(
45+
self,
46+
s3_models_inference_service,
47+
):
48+
"""Test inference with Bloom Caikit model when multiple models in the same namespace"""
49+
run_inference_multiple_times(
50+
isvc=s3_models_inference_service,
51+
inference_config=PYTORCH_TGIS_INFERENCE_CONFIG,
52+
model_name=ModelAndFormat.BLOOM_560M_CAIKIT,
53+
inference_type=Inference.ALL_TOKENS,
54+
protocol=Protocols.GRPC,
55+
run_in_parallel=True,
56+
iterations=5,
57+
)
58+
59+
def test_serverless_multi_tgis_models_inference_flan(
60+
self, s3_flan_small_hf_caikit_serverless_inference_service, s3_models_inference_service
61+
):
62+
"""Test inference with Flan Caikit model when multiple models in the same namespace"""
63+
run_inference_multiple_times(
64+
isvc=s3_flan_small_hf_caikit_serverless_inference_service,
65+
inference_config=TGIS_INFERENCE_CONFIG,
66+
model_name=ModelAndFormat.FLAN_T5_SMALL_CAIKIT,
67+
inference_type=Inference.ALL_TOKENS,
68+
protocol=Protocols.GRPC,
69+
run_in_parallel=True,
70+
iterations=5,
71+
)

0 commit comments

Comments
 (0)