44from kubernetes .dynamic import DynamicClient
55from ocp_resources .namespace import Namespace
66from ocp_resources .inference_service import InferenceService
7- from tests .model_serving .model_server .utils import verify_keda_scaledobject , verify_final_pod_count
7+ from tests .model_serving .model_server .utils import (
8+ verify_keda_scaledobject ,
9+ verify_final_pod_count ,
10+ run_inference_multiple_times ,
11+ )
812from tests .model_serving .model_runtime .vllm .constant import BASE_RAW_DEPLOYMENT_CONFIG
913from tests .model_serving .model_runtime .vllm .basic_model_deployment .test_granite_7b_starter import SERVING_ARGUMENT
10- from utilities .constants import ModelFormat , ModelVersion , RunTimeConfigs
14+ from utilities .constants import ModelFormat , ModelVersion , RunTimeConfigs , Protocols , Timeout
1115from utilities .monitoring import validate_metrics_field
16+ from utilities .inference_utils import Inference
17+ from utilities .manifests .onnx import ONNX_INFERENCE_CONFIG
1218
1319LOGGER = get_logger (name = __name__ )
1420
2127OVMS_MODEL_NAMESPACE = "ovms-keda"
2228OVMS_MODEL_NAME = "onnx-raw"
2329OVMS_METRICS_QUERY = (
24- f"sum by (name) (rate(ovms_inference_time_us_sum{{"
25- f"namespace='{ OVMS_MODEL_NAMESPACE } ', name='{ OVMS_MODEL_NAME } '"
26- f"}}[5m])) / "
27- f"sum by (name) (rate(ovms_inference_time_us_count{{"
28- f"namespace='{ OVMS_MODEL_NAMESPACE } ', name='{ OVMS_MODEL_NAME } '"
29- f"}}[5m]))"
30+ f'sum(sum_over_time(ovms_requests_success{{namespace="{ OVMS_MODEL_NAMESPACE } ", name="{ OVMS_MODEL_NAME } "}}[5m]))'
3031)
31- OVMS_METRICS_THRESHOLD = 200
32+ OVMS_METRICS_THRESHOLD = 2.0
3233
3334pytestmark = [pytest .mark .keda , pytest .mark .usefixtures ("valid_aws_config" )]
3435
4344 "name" : ModelFormat .ONNX ,
4445 "model-version" : ModelVersion .OPSET13 ,
4546 "model-dir" : "test-dir" ,
47+ "model-name" : OVMS_MODEL_NAME ,
4648 "initial_pod_count" : INITIAL_POD_COUNT ,
4749 "final_pod_count" : FINAL_POD_COUNT ,
4850 "metrics_query" : OVMS_METRICS_QUERY ,
@@ -65,13 +67,24 @@ def test_ovms_keda_scaling_verify_scaledobject(
6567 ovms_kserve_serving_runtime ,
6668 stressed_ovms_keda_inference_service : Generator [InferenceService , Any , Any ],
6769 ):
70+ """Test KEDA ScaledObject configuration and run inference multiple times to trigger scaling."""
6871 verify_keda_scaledobject (
6972 client = unprivileged_client ,
7073 isvc = stressed_ovms_keda_inference_service ,
7174 expected_trigger_type = "prometheus" ,
7275 expected_query = OVMS_METRICS_QUERY ,
7376 expected_threshold = OVMS_METRICS_THRESHOLD ,
7477 )
78+ # Run inference multiple times to test KEDA scaling
79+ run_inference_multiple_times (
80+ isvc = stressed_ovms_keda_inference_service ,
81+ inference_config = ONNX_INFERENCE_CONFIG ,
82+ inference_type = Inference .INFER ,
83+ protocol = Protocols .HTTPS ,
84+ model_name = OVMS_MODEL_NAME ,
85+ iterations = 10 ,
86+ run_in_parallel = True ,
87+ )
7588
7689 def test_ovms_keda_scaling_verify_metrics (
7790 self ,
@@ -81,11 +94,13 @@ def test_ovms_keda_scaling_verify_metrics(
8194 stressed_ovms_keda_inference_service : Generator [InferenceService , Any , Any ],
8295 prometheus ,
8396 ):
97+ """Test that OVMS metrics are available and above the expected threshold."""
8498 validate_metrics_field (
8599 prometheus = prometheus ,
86100 metrics_query = OVMS_METRICS_QUERY ,
87- expected_value = str ( OVMS_METRICS_THRESHOLD ) ,
101+ expected_value = OVMS_METRICS_THRESHOLD ,
88102 greater_than = True ,
103+ timeout = Timeout .TIMEOUT_5MIN ,
89104 )
90105
91106 def test_ovms_keda_scaling_verify_final_pod_count (
@@ -95,6 +110,7 @@ def test_ovms_keda_scaling_verify_final_pod_count(
95110 ovms_kserve_serving_runtime ,
96111 stressed_ovms_keda_inference_service : Generator [InferenceService , Any , Any ],
97112 ):
113+ """Test that pods scale up to the expected count after load generation."""
98114 verify_final_pod_count (
99115 unprivileged_client = unprivileged_client ,
100116 isvc = stressed_ovms_keda_inference_service ,
0 commit comments