44from kubernetes .dynamic import DynamicClient
55from ocp_resources .namespace import Namespace
66from ocp_resources .inference_service import InferenceService
7- from tests .model_serving .model_server .utils import verify_keda_scaledobject , verify_final_pod_count
7+ from tests .model_serving .model_server .utils import (
8+ verify_keda_scaledobject ,
9+ verify_final_pod_count ,
10+ run_inference_multiple_times ,
11+ )
812from tests .model_serving .model_runtime .vllm .constant import BASE_RAW_DEPLOYMENT_CONFIG
913from tests .model_serving .model_runtime .vllm .basic_model_deployment .test_granite_7b_starter import SERVING_ARGUMENT
10- from utilities .constants import ModelFormat , ModelVersion , RunTimeConfigs
14+ from utilities .constants import ModelFormat , ModelVersion , RunTimeConfigs , Protocols , Timeout
1115from utilities .monitoring import validate_metrics_field
16+ from utilities .inference_utils import Inference
17+ from utilities .manifests .onnx import ONNX_INFERENCE_CONFIG
18+ from utilities .jira import is_jira_open
1219
1320LOGGER = get_logger (name = __name__ )
1421
2128OVMS_MODEL_NAMESPACE = "ovms-keda"
2229OVMS_MODEL_NAME = "onnx-raw"
2330OVMS_METRICS_QUERY = (
24- f"sum by (name) (rate(ovms_inference_time_us_sum{{"
25- f"namespace='{ OVMS_MODEL_NAMESPACE } ', name='{ OVMS_MODEL_NAME } '"
26- f"}}[5m])) / "
27- f"sum by (name) (rate(ovms_inference_time_us_count{{"
28- f"namespace='{ OVMS_MODEL_NAMESPACE } ', name='{ OVMS_MODEL_NAME } '"
29- f"}}[5m]))"
31+ f'sum(sum_over_time(ovms_requests_success{{namespace="{ OVMS_MODEL_NAMESPACE } ", name="{ OVMS_MODEL_NAME } "}}[5m]))'
3032)
31- OVMS_METRICS_THRESHOLD = 200
33+ OVMS_METRICS_THRESHOLD = 2.0
3234
3335pytestmark = [pytest .mark .keda , pytest .mark .usefixtures ("valid_aws_config" )]
3436
4345 "name" : ModelFormat .ONNX ,
4446 "model-version" : ModelVersion .OPSET13 ,
4547 "model-dir" : "test-dir" ,
48+ "model-name" : OVMS_MODEL_NAME ,
4649 "initial_pod_count" : INITIAL_POD_COUNT ,
4750 "final_pod_count" : FINAL_POD_COUNT ,
4851 "metrics_query" : OVMS_METRICS_QUERY ,
@@ -64,14 +67,42 @@ def test_ovms_keda_scaling_verify_scaledobject(
6467 unprivileged_client : DynamicClient ,
6568 ovms_kserve_serving_runtime ,
6669 stressed_ovms_keda_inference_service : Generator [InferenceService , Any , Any ],
70+ admin_client : DynamicClient ,
6771 ):
72+ """Test KEDA ScaledObject configuration and run inference multiple times to trigger scaling."""
73+
74+ if is_jira_open (jira_id = "RHOAIENG-31386" , admin_client = admin_client ):
75+ patch_operations = [
76+ {
77+ "op" : "add" ,
78+ "path" : "/spec/predictor/autoScaling/metrics/0/external/authenticationRef" ,
79+ "value" : {"authModes" : "bearer" , "authenticationRef" : {"name" : "inference-prometheus-auth" }},
80+ }
81+ ]
82+ admin_client .resources .get (api_version = "v1beta1" , kind = "InferenceService" ).patch (
83+ name = stressed_ovms_keda_inference_service .name ,
84+ namespace = stressed_ovms_keda_inference_service .namespace ,
85+ body = patch_operations ,
86+ content_type = "application/json-patch+json" ,
87+ )
88+
6889 verify_keda_scaledobject (
6990 client = unprivileged_client ,
7091 isvc = stressed_ovms_keda_inference_service ,
7192 expected_trigger_type = "prometheus" ,
7293 expected_query = OVMS_METRICS_QUERY ,
7394 expected_threshold = OVMS_METRICS_THRESHOLD ,
7495 )
96+ # Run inference multiple times to test KEDA scaling
97+ run_inference_multiple_times (
98+ isvc = stressed_ovms_keda_inference_service ,
99+ inference_config = ONNX_INFERENCE_CONFIG ,
100+ inference_type = Inference .INFER ,
101+ protocol = Protocols .HTTPS ,
102+ model_name = OVMS_MODEL_NAME ,
103+ iterations = 10 ,
104+ run_in_parallel = True ,
105+ )
75106
76107 def test_ovms_keda_scaling_verify_metrics (
77108 self ,
@@ -81,11 +112,13 @@ def test_ovms_keda_scaling_verify_metrics(
81112 stressed_ovms_keda_inference_service : Generator [InferenceService , Any , Any ],
82113 prometheus ,
83114 ):
115+ """Test that OVMS metrics are available and above the expected threshold."""
84116 validate_metrics_field (
85117 prometheus = prometheus ,
86118 metrics_query = OVMS_METRICS_QUERY ,
87- expected_value = str ( OVMS_METRICS_THRESHOLD ) ,
119+ expected_value = OVMS_METRICS_THRESHOLD ,
88120 greater_than = True ,
121+ timeout = Timeout .TIMEOUT_5MIN ,
89122 )
90123
91124 def test_ovms_keda_scaling_verify_final_pod_count (
@@ -95,6 +128,7 @@ def test_ovms_keda_scaling_verify_final_pod_count(
95128 ovms_kserve_serving_runtime ,
96129 stressed_ovms_keda_inference_service : Generator [InferenceService , Any , Any ],
97130 ):
131+ """Test that pods scale up to the expected count after load generation."""
98132 verify_final_pod_count (
99133 unprivileged_client = unprivileged_client ,
100134 isvc = stressed_ovms_keda_inference_service ,
0 commit comments