Skip to content

Commit 6cd1c83

Browse files
committed
Add fix and extra step for metrices test
Signed-off-by: Milind Waykole <mwaykole@redhat.com>
1 parent bd6a1fc commit 6cd1c83

File tree

3 files changed

+125
-69
lines changed

3 files changed

+125
-69
lines changed
Lines changed: 54 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,44 +1,43 @@
11
import pytest
22

3+
from tests.model_serving.model_server.metrics.utils import validate_metrics_configuration
34
from tests.model_serving.model_server.utils import (
45
run_inference_multiple_times,
56
verify_inference_response,
67
)
78
from utilities.constants import (
89
KServeDeploymentType,
9-
ModelFormat,
10+
ModelAndFormat,
1011
ModelInferenceRuntime,
1112
ModelStoragePath,
13+
ModelVersion,
1214
Protocols,
13-
RuntimeTemplates,
1415
)
16+
from timeout_sampler import TimeoutSampler
1517
from utilities.inference_utils import Inference
16-
from utilities.manifests.caikit_tgis import CAIKIT_TGIS_INFERENCE_CONFIG
18+
from utilities.manifests.openvino import OPENVINO_KSERVE_INFERENCE_CONFIG
1719
from utilities.monitoring import get_metrics_value, validate_metrics_field
1820

1921
pytestmark = [
20-
pytest.mark.serverless,
2122
pytest.mark.usefixtures("valid_aws_config", "user_workload_monitoring_config_map"),
2223
pytest.mark.metrics,
2324
]
2425

2526

26-
@pytest.mark.serverless
2727
@pytest.mark.parametrize(
28-
"unprivileged_model_namespace, serving_runtime_from_template, s3_models_inference_service",
28+
"unprivileged_model_namespace, ovms_kserve_serving_runtime, ovms_kserve_inference_service",
2929
[
3030
pytest.param(
31-
{"name": "test-kserve-tgis-metrics"},
31+
{"name": "test-ovms-metrics"},
3232
{
33-
"name": f"{Protocols.HTTP}-{ModelInferenceRuntime.CAIKIT_TGIS_RUNTIME}",
34-
"template-name": RuntimeTemplates.CAIKIT_TGIS_SERVING,
35-
"multi-model": False,
36-
"enable-http": True,
33+
"runtime-name": ModelInferenceRuntime.OPENVINO_KSERVE_RUNTIME,
34+
"model-format": {ModelAndFormat.OPENVINO_IR: ModelVersion.OPSET1},
3735
},
3836
{
39-
"name": f"{Protocols.HTTP}-{ModelFormat.CAIKIT}",
40-
"deployment-mode": KServeDeploymentType.SERVERLESS,
41-
"model-dir": ModelStoragePath.FLAN_T5_SMALL_CAIKIT,
37+
"name": "ovms-metrics",
38+
"model-dir": ModelStoragePath.KSERVE_OPENVINO_EXAMPLE_MODEL,
39+
"model-version": ModelVersion.OPSET1,
40+
"deployment-mode": KServeDeploymentType.RAW_DEPLOYMENT,
4241
},
4342
)
4443
],
@@ -47,48 +46,72 @@
4746
class TestModelMetrics:
4847
@pytest.mark.smoke
4948
@pytest.mark.polarion("ODS-2555")
50-
def test_model_metrics_num_success_requests(self, s3_models_inference_service, prometheus):
49+
def test_model_metrics_num_success_requests(self, ovms_kserve_inference_service, prometheus):
5150
"""Verify number of successful model requests in OpenShift monitoring system (UserWorkloadMonitoring) metrics"""
51+
validate_metrics_configuration(inference_service=ovms_kserve_inference_service)
52+
5253
verify_inference_response(
53-
inference_service=s3_models_inference_service,
54-
inference_config=CAIKIT_TGIS_INFERENCE_CONFIG,
55-
inference_type=Inference.ALL_TOKENS,
54+
inference_service=ovms_kserve_inference_service,
55+
inference_config=OPENVINO_KSERVE_INFERENCE_CONFIG,
56+
inference_type=Inference.INFER,
5657
protocol=Protocols.HTTPS,
57-
model_name=ModelFormat.CAIKIT,
5858
use_default_query=True,
5959
)
60+
61+
metrics_query = (
62+
f'ovms_requests_success{{namespace="{ovms_kserve_inference_service.namespace}", '
63+
f'name="{ovms_kserve_inference_service.name}"}}'
64+
)
65+
6066
validate_metrics_field(
6167
prometheus=prometheus,
62-
metrics_query="tgi_request_success",
68+
metrics_query=metrics_query,
6369
expected_value="1",
6470
)
6571

6672
@pytest.mark.smoke
6773
@pytest.mark.polarion("ODS-2555")
68-
def test_model_metrics_num_total_requests(self, s3_models_inference_service, prometheus):
74+
def test_model_metrics_num_total_requests(self, ovms_kserve_inference_service, prometheus):
6975
"""Verify number of total model requests in OpenShift monitoring system (UserWorkloadMonitoring) metrics"""
76+
validate_metrics_configuration(inference_service=ovms_kserve_inference_service)
77+
7078
total_runs = 5
7179

7280
run_inference_multiple_times(
73-
isvc=s3_models_inference_service,
74-
inference_config=CAIKIT_TGIS_INFERENCE_CONFIG,
75-
inference_type=Inference.ALL_TOKENS,
81+
isvc=ovms_kserve_inference_service,
82+
inference_config=OPENVINO_KSERVE_INFERENCE_CONFIG,
83+
inference_type=Inference.INFER,
7684
protocol=Protocols.HTTPS,
77-
model_name=ModelFormat.CAIKIT,
7885
iterations=total_runs,
7986
run_in_parallel=True,
8087
)
88+
89+
metrics_query = (
90+
f'ovms_requests_success{{namespace="{ovms_kserve_inference_service.namespace}", '
91+
f'name="{ovms_kserve_inference_service.name}"}}'
92+
)
93+
8194
validate_metrics_field(
8295
prometheus=prometheus,
83-
metrics_query="tgi_request_count",
84-
expected_value=str(total_runs + 1),
96+
metrics_query=metrics_query,
97+
expected_value=str(total_runs),
98+
greater_than=True,
8599
)
86100

87101
@pytest.mark.smoke
88102
@pytest.mark.polarion("ODS-2555")
89-
def test_model_metrics_cpu_utilization(self, s3_models_inference_service, prometheus):
103+
def test_model_metrics_cpu_utilization(self, ovms_kserve_inference_service, prometheus):
90104
"""Verify CPU utilization data in OpenShift monitoring system (UserWorkloadMonitoring) metrics"""
91-
assert get_metrics_value(
105+
validate_metrics_configuration(inference_service=ovms_kserve_inference_service)
106+
107+
metrics_query = f"pod:container_cpu_usage:sum{{namespace='{ovms_kserve_inference_service.namespace}'}}"
108+
109+
for cpu_value in TimeoutSampler(
110+
wait_timeout=120,
111+
sleep=10,
112+
func=get_metrics_value,
92113
prometheus=prometheus,
93-
metrics_query=f"pod:container_cpu_usage:sum{{namespace='{s3_models_inference_service.namespace}'}}",
94-
)
114+
metrics_query=metrics_query,
115+
):
116+
if cpu_value is not None:
117+
break
Lines changed: 36 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,47 +1,37 @@
11
import pytest
22

3+
from tests.model_serving.model_server.metrics.utils import validate_metrics_configuration
34
from tests.model_serving.model_server.utils import (
45
run_inference_multiple_times,
5-
verify_inference_response,
66
)
7-
from utilities.constants import ModelFormat, ModelStoragePath, Protocols
7+
from utilities.constants import (
8+
KServeDeploymentType,
9+
ModelAndFormat,
10+
ModelInferenceRuntime,
11+
ModelStoragePath,
12+
ModelVersion,
13+
Protocols,
14+
)
815
from utilities.inference_utils import Inference
9-
from utilities.manifests.caikit_tgis import CAIKIT_TGIS_INFERENCE_CONFIG
16+
from utilities.manifests.openvino import OPENVINO_KSERVE_INFERENCE_CONFIG
1017
from utilities.monitoring import validate_metrics_field
1118

1219

1320
@pytest.mark.parametrize(
14-
"unprivileged_model_namespace, unprivileged_s3_caikit_serverless_inference_service",
15-
[
16-
pytest.param(
17-
{"name": "test-non-admin-serverless"},
18-
{"model-dir": ModelStoragePath.FLAN_T5_SMALL_CAIKIT},
19-
)
20-
],
21-
indirect=True,
22-
)
23-
@pytest.mark.smoke
24-
@pytest.mark.serverless
25-
class TestServerlessUnprivilegedUser:
26-
@pytest.mark.polarion("ODS-2552")
27-
def test_non_admin_deploy_serverless_and_query_metrics(self, unprivileged_s3_caikit_serverless_inference_service):
28-
"""Verify non admin can deploy a model and query using REST"""
29-
verify_inference_response(
30-
inference_service=unprivileged_s3_caikit_serverless_inference_service,
31-
inference_config=CAIKIT_TGIS_INFERENCE_CONFIG,
32-
inference_type=Inference.ALL_TOKENS,
33-
protocol=Protocols.HTTPS,
34-
model_name=ModelFormat.CAIKIT,
35-
use_default_query=True,
36-
)
37-
38-
39-
@pytest.mark.parametrize(
40-
"unprivileged_model_namespace, unprivileged_s3_caikit_raw_inference_service",
21+
"unprivileged_model_namespace, ovms_kserve_serving_runtime, ovms_kserve_inference_service",
4122
[
4223
pytest.param(
4324
{"name": "test-non-admin-metrics"},
44-
{"model-dir": ModelStoragePath.FLAN_T5_SMALL_HF},
25+
{
26+
"runtime-name": ModelInferenceRuntime.OPENVINO_KSERVE_RUNTIME,
27+
"model-format": {ModelAndFormat.OPENVINO_IR: ModelVersion.OPSET1},
28+
},
29+
{
30+
"name": "ovms-non-admin",
31+
"model-dir": ModelStoragePath.KSERVE_OPENVINO_EXAMPLE_MODEL,
32+
"model-version": ModelVersion.OPSET1,
33+
"deployment-mode": KServeDeploymentType.RAW_DEPLOYMENT,
34+
},
4535
)
4636
],
4737
indirect=True,
@@ -52,23 +42,31 @@ class TestRawUnprivilegedUserMetrics:
5242
@pytest.mark.metrics
5343
def test_non_admin_raw_metrics(
5444
self,
55-
unprivileged_s3_caikit_raw_inference_service,
45+
ovms_kserve_inference_service,
5646
prometheus,
5747
user_workload_monitoring_config_map,
5848
):
5949
"""Verify number of total model requests in OpenShift monitoring system (UserWorkloadMonitoring) metrics"""
50+
validate_metrics_configuration(inference_service=ovms_kserve_inference_service)
51+
6052
total_runs = 5
6153

6254
run_inference_multiple_times(
63-
isvc=unprivileged_s3_caikit_raw_inference_service,
64-
inference_config=CAIKIT_TGIS_INFERENCE_CONFIG,
65-
inference_type=Inference.ALL_TOKENS,
66-
protocol=Protocols.HTTP,
67-
model_name=ModelFormat.CAIKIT,
55+
isvc=ovms_kserve_inference_service,
56+
inference_config=OPENVINO_KSERVE_INFERENCE_CONFIG,
57+
inference_type=Inference.INFER,
58+
protocol=Protocols.HTTPS,
6859
iterations=total_runs,
6960
)
61+
62+
metrics_query = (
63+
f'ovms_requests_success{{namespace="{ovms_kserve_inference_service.namespace}", '
64+
f'name="{ovms_kserve_inference_service.name}"}}'
65+
)
66+
7067
validate_metrics_field(
7168
prometheus=prometheus,
72-
metrics_query="tgi_request_count",
69+
metrics_query=metrics_query,
7370
expected_value=str(total_runs),
71+
greater_than=True,
7472
)
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
from ocp_resources.config_map import ConfigMap
2+
from ocp_resources.inference_service import InferenceService
3+
4+
5+
def validate_metrics_configuration(inference_service: InferenceService) -> None:
6+
"""
7+
Validate that the InferenceService has proper metrics configuration.
8+
9+
Checks:
10+
- Metrics dashboard ConfigMap has supported=true
11+
12+
Args:
13+
inference_service: InferenceService object
14+
15+
Raises:
16+
AssertionError: If validation fails
17+
"""
18+
metrics_cm_name = f"{inference_service.name}-metrics-dashboard"
19+
metrics_cm = ConfigMap(
20+
client=inference_service.client,
21+
name=metrics_cm_name,
22+
namespace=inference_service.namespace,
23+
)
24+
25+
assert metrics_cm.exists, (
26+
f"Metrics dashboard ConfigMap '{metrics_cm_name}' not found in namespace "
27+
f"'{inference_service.namespace}'"
28+
)
29+
30+
supported_value = metrics_cm.instance.data.get("supported")
31+
32+
assert supported_value == "true", (
33+
f"Metrics dashboard ConfigMap '{metrics_cm_name}' has 'supported: {supported_value}'. "
34+
f"Expected 'supported: true' for metrics to be available. "
35+
)

0 commit comments

Comments
 (0)