Skip to content

Commit acfa113

Browse files
committed
ci: resolve conflicts
2 parents 7f8b680 + 7edab8d commit acfa113

File tree

13 files changed

+608
-25
lines changed

13 files changed

+608
-25
lines changed

conftest.py

Lines changed: 54 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,15 @@
66
import shutil
77

88
import shortuuid
9-
from pytest import Parser, Session, FixtureRequest, FixtureDef, Item, Config, CollectReport
9+
from pytest import (
10+
Parser,
11+
Session,
12+
FixtureRequest,
13+
FixtureDef,
14+
Item,
15+
Config,
16+
CollectReport,
17+
)
1018
from _pytest.terminal import TerminalReporter
1119
from typing import Optional, Any
1220
from pytest_testconfig import config as py_config
@@ -39,18 +47,26 @@ def pytest_addoption(parser: Parser) -> None:
3947

4048
# Buckets options
4149
buckets_group.addoption(
42-
"--ci-s3-bucket-name", default=os.environ.get("CI_S3_BUCKET_NAME"), help="Ci S3 bucket name"
50+
"--ci-s3-bucket-name",
51+
default=os.environ.get("CI_S3_BUCKET_NAME"),
52+
help="Ci S3 bucket name",
4353
)
4454
buckets_group.addoption(
45-
"--ci-s3-bucket-region", default=os.environ.get("CI_S3_BUCKET_REGION"), help="Ci S3 bucket region"
55+
"--ci-s3-bucket-region",
56+
default=os.environ.get("CI_S3_BUCKET_REGION"),
57+
help="Ci S3 bucket region",
4658
)
4759

4860
buckets_group.addoption(
49-
"--ci-s3-bucket-endpoint", default=os.environ.get("CI_S3_BUCKET_ENDPOINT"), help="Ci S3 bucket endpoint"
61+
"--ci-s3-bucket-endpoint",
62+
default=os.environ.get("CI_S3_BUCKET_ENDPOINT"),
63+
help="Ci S3 bucket endpoint",
5064
)
5165

5266
buckets_group.addoption(
53-
"--models-s3-bucket-name", default=os.environ.get("MODELS_S3_BUCKET_NAME"), help="Models S3 bucket name"
67+
"--models-s3-bucket-name",
68+
default=os.environ.get("MODELS_S3_BUCKET_NAME"),
69+
help="Models S3 bucket name",
5470
)
5571
buckets_group.addoption(
5672
"--models-s3-bucket-region",
@@ -91,6 +107,11 @@ def pytest_addoption(parser: Parser) -> None:
91107
action="store_true",
92108
help="Delete pre-upgrade resources; useful when debugging pre-upgrade tests",
93109
)
110+
upgrade_group.addoption(
111+
"--upgrade-deployment-modes",
112+
help="Coma-separated str; specify inference service deployment modes tests to run in upgrade tests. "
113+
"If not set, all will be tested.",
114+
)
94115

95116

96117
def pytest_cmdline_main(config: Any) -> None:
@@ -102,19 +123,45 @@ def pytest_collection_modifyitems(session: Session, config: Config, items: list[
102123
Pytest fixture to filter or re-order the items in-place.
103124
104125
Filters upgrade tests based on '--pre-upgrade' / '--post-upgrade' option and marker.
126+
If `--upgrade-deployment-modes` option is set, only tests with the specified deployment modes will be added.
105127
"""
128+
129+
def _add_upgrade_test(_item: Item, _upgrade_deployment_modes: list[str]) -> bool:
130+
"""
131+
Add upgrade test to the list of tests to run.
132+
133+
Args:
134+
_item (Item): The test item.
135+
_upgrade_deployment_modes (list[str]): The deployment modes to test.
136+
137+
Returns:
138+
True if the test should be added, False otherwise.
139+
140+
"""
141+
if not _upgrade_deployment_modes:
142+
return True
143+
144+
return any([keyword for keyword in _item.keywords if keyword in _upgrade_deployment_modes])
145+
106146
pre_upgrade_tests: list[Item] = []
107147
post_upgrade_tests: list[Item] = []
108148
non_upgrade_tests: list[Item] = []
149+
upgrade_deployment_modes: list[str] = []
109150

110151
run_pre_upgrade_tests: str | None = config.getoption(name="pre_upgrade")
111152
run_post_upgrade_tests: str | None = config.getoption(name="post_upgrade")
153+
if config_upgrade_deployment_modes := config.getoption(name="upgrade_deployment_modes"):
154+
upgrade_deployment_modes = config_upgrade_deployment_modes.split(",")
112155

113156
for item in items:
114-
if "pre_upgrade" in item.keywords:
157+
if "pre_upgrade" in item.keywords and _add_upgrade_test(
158+
_item=item, _upgrade_deployment_modes=upgrade_deployment_modes
159+
):
115160
pre_upgrade_tests.append(item)
116161

117-
elif "post_upgrade" in item.keywords:
162+
elif "post_upgrade" in item.keywords and _add_upgrade_test(
163+
_item=item, _upgrade_deployment_modes=upgrade_deployment_modes
164+
):
118165
post_upgrade_tests.append(item)
119166

120167
else:

tests/model_serving/model_server/conftest.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,12 @@ def s3_models_inference_service(
148148
if (enable_auth := request.param.get("enable-auth")) is not None:
149149
isvc_kwargs["enable_auth"] = enable_auth
150150

151+
if (scale_metric := request.param.get("scale-metric")) is not None:
152+
isvc_kwargs["scale_metric"] = scale_metric
153+
154+
if (scale_target := request.param.get("scale-target")) is not None:
155+
isvc_kwargs["scale_target"] = scale_target
156+
151157
with create_isvc(**isvc_kwargs) as isvc:
152158
yield isvc
153159

tests/model_serving/model_server/serverless/conftest.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,69 @@
44
from _pytest.fixtures import FixtureRequest
55
from kubernetes.dynamic import DynamicClient
66
from ocp_resources.inference_service import InferenceService
7+
from ocp_resources.resource import ResourceEditor
78
from ocp_resources.namespace import Namespace
89
from ocp_resources.secret import Secret
910
from ocp_resources.serving_runtime import ServingRuntime
1011

12+
from tests.model_serving.model_server.serverless.utils import wait_for_canary_rollout
13+
from tests.model_serving.model_server.utils import run_inference_multiple_times
14+
from utilities.constants import ModelFormat, Protocols
15+
from utilities.inference_utils import Inference
16+
from utilities.manifests.caikit_tgis import CAIKIT_TGIS_INFERENCE_CONFIG
1117
from utilities.constants import KServeDeploymentType, ModelName, ModelStoragePath
1218
from utilities.inference_utils import create_isvc
1319

1420

21+
@pytest.fixture(scope="class")
22+
def inference_service_patched_replicas(
23+
request: FixtureRequest, ovms_serverless_inference_service: InferenceService
24+
) -> InferenceService:
25+
ResourceEditor(
26+
patches={
27+
ovms_serverless_inference_service: {
28+
"spec": {
29+
"predictor": {"minReplicas": request.param["min-replicas"]},
30+
}
31+
}
32+
}
33+
).update()
34+
35+
return ovms_serverless_inference_service
36+
37+
38+
@pytest.fixture
39+
def inference_service_updated_canary_config(
40+
request: FixtureRequest, s3_models_inference_service: InferenceService
41+
) -> Generator[InferenceService, Any, Any]:
42+
canary_percent = request.param["canary-traffic-percent"]
43+
predictor_config = {
44+
"spec": {
45+
"predictor": {"canaryTrafficPercent": canary_percent},
46+
}
47+
}
48+
49+
if model_path := request.param.get("model-path"):
50+
predictor_config["spec"]["predictor"]["model"] = {"storage": {"path": model_path}}
51+
52+
with ResourceEditor(patches={s3_models_inference_service: predictor_config}):
53+
wait_for_canary_rollout(isvc=s3_models_inference_service, percentage=canary_percent)
54+
yield s3_models_inference_service
55+
56+
57+
@pytest.fixture
58+
def multiple_tgis_inference_requests(s3_models_inference_service: InferenceService) -> None:
59+
run_inference_multiple_times(
60+
isvc=s3_models_inference_service,
61+
inference_config=CAIKIT_TGIS_INFERENCE_CONFIG,
62+
inference_type=Inference.ALL_TOKENS,
63+
protocol=Protocols.HTTPS,
64+
model_name=ModelFormat.CAIKIT,
65+
iterations=50,
66+
run_in_parallel=True,
67+
)
68+
69+
1570
@pytest.fixture(scope="class")
1671
def s3_flan_small_hf_caikit_serverless_inference_service(
1772
request: FixtureRequest,
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
import pytest
2+
3+
from tests.model_serving.model_server.serverless.utils import verify_canary_traffic
4+
from tests.model_serving.model_server.utils import verify_inference_response
5+
from utilities.constants import (
6+
KServeDeploymentType,
7+
ModelAndFormat,
8+
ModelName,
9+
ModelStoragePath,
10+
Protocols,
11+
RuntimeTemplates,
12+
)
13+
from utilities.inference_utils import Inference
14+
from utilities.manifests.pytorch import PYTORCH_TGIS_INFERENCE_CONFIG
15+
from utilities.manifests.tgis_grpc import TGIS_INFERENCE_CONFIG
16+
17+
pytestmark = [pytest.mark.serverless, pytest.mark.sanity]
18+
19+
20+
@pytest.mark.polarion("ODS-2371")
21+
@pytest.mark.parametrize(
22+
"model_namespace, serving_runtime_from_template, s3_models_inference_service",
23+
[
24+
pytest.param(
25+
{"name": "serverless-canary-rollout"},
26+
{
27+
"name": "tgis-runtime",
28+
"template-name": RuntimeTemplates.TGIS_GRPC_SERVING,
29+
"multi-model": False,
30+
"enable-http": False,
31+
"enable-grpc": True,
32+
},
33+
{
34+
"name": f"{ModelName.BLOOM_560M}-model",
35+
"deployment-mode": KServeDeploymentType.SERVERLESS,
36+
"model-dir": f"{ModelStoragePath.BLOOM_560M_CAIKIT}/artifacts",
37+
"external-route": True,
38+
},
39+
)
40+
],
41+
indirect=True,
42+
)
43+
class TestServerlessCanaryRollout:
44+
def test_serverless_before_model_update(
45+
self,
46+
s3_models_inference_service,
47+
):
48+
"""Test inference with Bloom before model is updated."""
49+
verify_inference_response(
50+
inference_service=s3_models_inference_service,
51+
inference_config=PYTORCH_TGIS_INFERENCE_CONFIG,
52+
inference_type=Inference.ALL_TOKENS,
53+
protocol=Protocols.GRPC,
54+
model_name=ModelAndFormat.BLOOM_560M_CAIKIT,
55+
use_default_query=True,
56+
)
57+
58+
@pytest.mark.parametrize(
59+
"inference_service_updated_canary_config",
60+
[
61+
pytest.param(
62+
{"canary-traffic-percent": 30, "model-path": ModelStoragePath.FLAN_T5_SMALL_HF},
63+
)
64+
],
65+
indirect=True,
66+
)
67+
def test_serverless_during_canary_rollout(self, inference_service_updated_canary_config):
68+
"""Test inference during canary rollout"""
69+
verify_canary_traffic(
70+
isvc=inference_service_updated_canary_config,
71+
inference_config=TGIS_INFERENCE_CONFIG,
72+
model_name=ModelAndFormat.FLAN_T5_SMALL_CAIKIT,
73+
inference_type=Inference.ALL_TOKENS,
74+
protocol=Protocols.GRPC,
75+
iterations=20,
76+
expected_percentage=30,
77+
tolerance=10,
78+
)
79+
80+
@pytest.mark.parametrize(
81+
"inference_service_updated_canary_config",
82+
[
83+
pytest.param(
84+
{"canary-traffic-percent": 100},
85+
)
86+
],
87+
indirect=True,
88+
)
89+
def test_serverless_after_canary_rollout(self, inference_service_updated_canary_config):
90+
"""Test inference after canary rollout"""
91+
verify_canary_traffic(
92+
isvc=inference_service_updated_canary_config,
93+
inference_config=TGIS_INFERENCE_CONFIG,
94+
model_name=ModelAndFormat.FLAN_T5_SMALL_CAIKIT,
95+
inference_type=Inference.ALL_TOKENS,
96+
protocol=Protocols.GRPC,
97+
iterations=5,
98+
expected_percentage=100,
99+
)
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
import pytest
2+
3+
from tests.model_serving.model_server.serverless.utils import (
4+
inference_service_pods_sampler,
5+
)
6+
from utilities.constants import (
7+
KServeDeploymentType,
8+
ModelFormat,
9+
ModelInferenceRuntime,
10+
ModelStoragePath,
11+
RuntimeTemplates,
12+
Timeout,
13+
)
14+
15+
pytestmark = [
16+
pytest.mark.serverless,
17+
pytest.mark.sanity,
18+
pytest.mark.usefixtures("valid_aws_config"),
19+
]
20+
21+
22+
@pytest.mark.parametrize(
23+
"model_namespace, serving_runtime_from_template, s3_models_inference_service",
24+
[
25+
pytest.param(
26+
{"name": "serverless-auto-scale"},
27+
{
28+
"name": f"{ModelInferenceRuntime.CAIKIT_TGIS_RUNTIME}",
29+
"template-name": RuntimeTemplates.CAIKIT_TGIS_SERVING,
30+
"multi-model": False,
31+
"enable-http": True,
32+
},
33+
{
34+
"name": f"{ModelFormat.CAIKIT}-auto-scale",
35+
"deployment-mode": KServeDeploymentType.SERVERLESS,
36+
"model-dir": ModelStoragePath.FLAN_T5_SMALL_CAIKIT,
37+
"scale-metric": "concurrency",
38+
"scale-target": 1,
39+
},
40+
)
41+
],
42+
indirect=True,
43+
)
44+
class TestConcurrencyAutoScale:
45+
@pytest.mark.dependency(name="test_auto_scale_using_concurrency")
46+
def test_auto_scale_using_concurrency(
47+
self,
48+
admin_client,
49+
s3_models_inference_service,
50+
multiple_tgis_inference_requests,
51+
):
52+
"""Verify model is successfully scaled up based on concurrency metrics (KPA)"""
53+
for pods in inference_service_pods_sampler(
54+
client=admin_client,
55+
isvc=s3_models_inference_service,
56+
timeout=Timeout.TIMEOUT_1MIN,
57+
):
58+
if pods:
59+
if len(pods) > 1 and all([pod.status == pod.Status.RUNNING for pod in pods]):
60+
return
61+
62+
@pytest.mark.dependency(requires=["test_auto_scale_using_concurrency"])
63+
def test_pods_scaled_down_when_no_requests(self, admin_client, s3_models_inference_service):
64+
"""Verify auto-scaled pods are deleted when there are no inference requests"""
65+
for pods in inference_service_pods_sampler(
66+
client=admin_client,
67+
isvc=s3_models_inference_service,
68+
timeout=Timeout.TIMEOUT_4MIN,
69+
):
70+
if pods and len(pods) == 1:
71+
return

0 commit comments

Comments
 (0)