|
1 | 1 | import pytest |
2 | 2 |
|
| 3 | +from tests.model_serving.model_server.metrics.utils import validate_metrics_configuration |
3 | 4 | from tests.model_serving.model_server.utils import ( |
4 | 5 | run_inference_multiple_times, |
5 | 6 | verify_inference_response, |
6 | 7 | ) |
7 | 8 | from utilities.constants import ( |
8 | 9 | KServeDeploymentType, |
9 | | - ModelFormat, |
| 10 | + ModelAndFormat, |
10 | 11 | ModelInferenceRuntime, |
11 | 12 | ModelStoragePath, |
| 13 | + ModelVersion, |
12 | 14 | Protocols, |
13 | | - RuntimeTemplates, |
14 | 15 | ) |
| 16 | +from timeout_sampler import TimeoutSampler |
15 | 17 | from utilities.inference_utils import Inference |
16 | | -from utilities.manifests.caikit_tgis import CAIKIT_TGIS_INFERENCE_CONFIG |
| 18 | +from utilities.manifests.openvino import OPENVINO_KSERVE_INFERENCE_CONFIG |
17 | 19 | from utilities.monitoring import get_metrics_value, validate_metrics_field |
18 | 20 |
|
19 | 21 | pytestmark = [ |
20 | | - pytest.mark.serverless, |
21 | 22 | pytest.mark.usefixtures("valid_aws_config", "user_workload_monitoring_config_map"), |
22 | 23 | pytest.mark.metrics, |
23 | 24 | ] |
24 | 25 |
|
25 | 26 |
|
26 | | -@pytest.mark.serverless |
27 | 27 | @pytest.mark.parametrize( |
28 | | - "unprivileged_model_namespace, serving_runtime_from_template, s3_models_inference_service", |
| 28 | + "unprivileged_model_namespace, ovms_kserve_serving_runtime, ovms_kserve_inference_service", |
29 | 29 | [ |
30 | 30 | pytest.param( |
31 | | - {"name": "test-kserve-tgis-metrics"}, |
| 31 | + {"name": "test-ovms-metrics"}, |
32 | 32 | { |
33 | | - "name": f"{Protocols.HTTP}-{ModelInferenceRuntime.CAIKIT_TGIS_RUNTIME}", |
34 | | - "template-name": RuntimeTemplates.CAIKIT_TGIS_SERVING, |
35 | | - "multi-model": False, |
36 | | - "enable-http": True, |
| 33 | + "runtime-name": ModelInferenceRuntime.OPENVINO_KSERVE_RUNTIME, |
| 34 | + "model-format": {ModelAndFormat.OPENVINO_IR: ModelVersion.OPSET1}, |
37 | 35 | }, |
38 | 36 | { |
39 | | - "name": f"{Protocols.HTTP}-{ModelFormat.CAIKIT}", |
40 | | - "deployment-mode": KServeDeploymentType.SERVERLESS, |
41 | | - "model-dir": ModelStoragePath.FLAN_T5_SMALL_CAIKIT, |
| 37 | + "name": "ovms-metrics", |
| 38 | + "model-dir": ModelStoragePath.KSERVE_OPENVINO_EXAMPLE_MODEL, |
| 39 | + "model-version": ModelVersion.OPSET1, |
| 40 | + "deployment-mode": KServeDeploymentType.RAW_DEPLOYMENT, |
42 | 41 | }, |
43 | 42 | ) |
44 | 43 | ], |
|
47 | 46 | class TestModelMetrics: |
48 | 47 | @pytest.mark.smoke |
49 | 48 | @pytest.mark.polarion("ODS-2555") |
50 | | - def test_model_metrics_num_success_requests(self, s3_models_inference_service, prometheus): |
| 49 | + def test_model_metrics_num_success_requests(self, ovms_kserve_inference_service, prometheus): |
51 | 50 | """Verify number of successful model requests in OpenShift monitoring system (UserWorkloadMonitoring) metrics""" |
| 51 | + validate_metrics_configuration(inference_service=ovms_kserve_inference_service) |
| 52 | + |
52 | 53 | verify_inference_response( |
53 | | - inference_service=s3_models_inference_service, |
54 | | - inference_config=CAIKIT_TGIS_INFERENCE_CONFIG, |
55 | | - inference_type=Inference.ALL_TOKENS, |
| 54 | + inference_service=ovms_kserve_inference_service, |
| 55 | + inference_config=OPENVINO_KSERVE_INFERENCE_CONFIG, |
| 56 | + inference_type=Inference.INFER, |
56 | 57 | protocol=Protocols.HTTPS, |
57 | | - model_name=ModelFormat.CAIKIT, |
58 | 58 | use_default_query=True, |
59 | 59 | ) |
| 60 | + |
| 61 | + metrics_query = ( |
| 62 | + f'ovms_requests_success{{namespace="{ovms_kserve_inference_service.namespace}", ' |
| 63 | + f'name="{ovms_kserve_inference_service.name}"}}' |
| 64 | + ) |
| 65 | + |
60 | 66 | validate_metrics_field( |
61 | 67 | prometheus=prometheus, |
62 | | - metrics_query="tgi_request_success", |
| 68 | + metrics_query=metrics_query, |
63 | 69 | expected_value="1", |
64 | 70 | ) |
65 | 71 |
|
66 | 72 | @pytest.mark.smoke |
67 | 73 | @pytest.mark.polarion("ODS-2555") |
68 | | - def test_model_metrics_num_total_requests(self, s3_models_inference_service, prometheus): |
| 74 | + def test_model_metrics_num_total_requests(self, ovms_kserve_inference_service, prometheus): |
69 | 75 | """Verify number of total model requests in OpenShift monitoring system (UserWorkloadMonitoring) metrics""" |
| 76 | + validate_metrics_configuration(inference_service=ovms_kserve_inference_service) |
| 77 | + |
70 | 78 | total_runs = 5 |
71 | 79 |
|
72 | 80 | run_inference_multiple_times( |
73 | | - isvc=s3_models_inference_service, |
74 | | - inference_config=CAIKIT_TGIS_INFERENCE_CONFIG, |
75 | | - inference_type=Inference.ALL_TOKENS, |
| 81 | + isvc=ovms_kserve_inference_service, |
| 82 | + inference_config=OPENVINO_KSERVE_INFERENCE_CONFIG, |
| 83 | + inference_type=Inference.INFER, |
76 | 84 | protocol=Protocols.HTTPS, |
77 | | - model_name=ModelFormat.CAIKIT, |
78 | 85 | iterations=total_runs, |
79 | 86 | run_in_parallel=True, |
80 | 87 | ) |
| 88 | + |
| 89 | + metrics_query = ( |
| 90 | + f'ovms_requests_success{{namespace="{ovms_kserve_inference_service.namespace}", ' |
| 91 | + f'name="{ovms_kserve_inference_service.name}"}}' |
| 92 | + ) |
| 93 | + |
81 | 94 | validate_metrics_field( |
82 | 95 | prometheus=prometheus, |
83 | | - metrics_query="tgi_request_count", |
84 | | - expected_value=str(total_runs + 1), |
| 96 | + metrics_query=metrics_query, |
| 97 | + expected_value=str(total_runs), |
| 98 | + greater_than=True, |
85 | 99 | ) |
86 | 100 |
|
87 | 101 | @pytest.mark.smoke |
88 | 102 | @pytest.mark.polarion("ODS-2555") |
89 | | - def test_model_metrics_cpu_utilization(self, s3_models_inference_service, prometheus): |
| 103 | + def test_model_metrics_cpu_utilization(self, ovms_kserve_inference_service, prometheus): |
90 | 104 | """Verify CPU utilization data in OpenShift monitoring system (UserWorkloadMonitoring) metrics""" |
91 | | - assert get_metrics_value( |
| 105 | + validate_metrics_configuration(inference_service=ovms_kserve_inference_service) |
| 106 | + |
| 107 | + metrics_query = f"pod:container_cpu_usage:sum{{namespace='{ovms_kserve_inference_service.namespace}'}}" |
| 108 | + |
| 109 | + for cpu_value in TimeoutSampler( |
| 110 | + wait_timeout=120, |
| 111 | + sleep=10, |
| 112 | + func=get_metrics_value, |
92 | 113 | prometheus=prometheus, |
93 | | - metrics_query=f"pod:container_cpu_usage:sum{{namespace='{s3_models_inference_service.namespace}'}}", |
94 | | - ) |
| 114 | + metrics_query=metrics_query, |
| 115 | + ): |
| 116 | + if cpu_value is not None: |
| 117 | + break |
0 commit comments