66from ocp_resources .inference_service import InferenceService
77from ocp_resources .namespace import Namespace
88from ocp_resources .node import Node
9- from ocp_resources .persistent_volume_claim import PersistentVolumeClaim
109from ocp_resources .pod import Pod
1110from ocp_resources .resource import ResourceEditor
1211from ocp_resources .secret import Secret
1312from ocp_resources .serving_runtime import ServingRuntime
1413from pytest_testconfig import config as py_config
15- from timeout_sampler import TimeoutSampler
1614
1715from tests .model_serving .model_server .kserve .multi_node .utils import (
1816 delete_multi_node_pod_by_role ,
17+ wait_for_vllm_health ,
1918)
19+ from timeout_sampler import TimeoutSampler
20+
2021from utilities .constants import KServeDeploymentType , Labels , Protocols , Timeout , ModelCarImage
21- from utilities .general import download_model_data
2222from utilities .inference_utils import create_isvc
2323from utilities .infra import (
2424 get_pods_by_isvc_label ,
@@ -33,37 +33,20 @@ def nvidia_gpu_nodes(nodes: list[Node]) -> list[Node]:
3333 return [node for node in nodes if "nvidia.com/gpu.present" in node .labels .keys ()]
3434
3535
36+ @pytest .fixture (scope = "session" )
37+ def max_gpu_per_node (nvidia_gpu_nodes : list [Node ]) -> int :
38+ return max (
39+ (int (node .instance .status .allocatable .get ("nvidia.com/gpu" , 0 )) for node in nvidia_gpu_nodes ),
40+ default = 0 ,
41+ )
42+
43+
3644@pytest .fixture (scope = "session" )
3745def skip_if_no_gpu_nodes (nvidia_gpu_nodes : list [Node ]) -> None :
3846 if len (nvidia_gpu_nodes ) < 2 :
3947 pytest .skip ("Multi-node tests can only run on a Cluster with at least 2 GPU Worker nodes" )
4048
4149
42- @pytest .fixture (scope = "class" )
43- def models_bucket_downloaded_model_data (
44- request : FixtureRequest ,
45- admin_client : DynamicClient ,
46- unprivileged_model_namespace : Namespace ,
47- models_s3_bucket_name : str ,
48- model_pvc : PersistentVolumeClaim ,
49- aws_secret_access_key : str ,
50- aws_access_key_id : str ,
51- models_s3_bucket_endpoint : str ,
52- models_s3_bucket_region : str ,
53- ) -> str :
54- return download_model_data (
55- client = admin_client ,
56- aws_access_key_id = aws_access_key_id ,
57- aws_secret_access_key = aws_secret_access_key ,
58- model_namespace = unprivileged_model_namespace .name ,
59- model_pvc_name = model_pvc .name ,
60- bucket_name = models_s3_bucket_name ,
61- aws_endpoint_url = models_s3_bucket_endpoint ,
62- aws_default_region = models_s3_bucket_region ,
63- model_path = request .param ["model-dir" ],
64- )
65-
66-
6750@pytest .fixture (scope = "class" )
6851def multi_node_serving_runtime (
6952 request : FixtureRequest ,
@@ -86,35 +69,6 @@ def multi_node_inference_service(
8669 request : FixtureRequest ,
8770 unprivileged_client : DynamicClient ,
8871 multi_node_serving_runtime : ServingRuntime ,
89- model_pvc : PersistentVolumeClaim ,
90- models_bucket_downloaded_model_data : str ,
91- ) -> Generator [InferenceService , Any , Any ]:
92- with create_isvc (
93- client = unprivileged_client ,
94- name = request .param ["name" ],
95- namespace = multi_node_serving_runtime .namespace ,
96- runtime = multi_node_serving_runtime .name ,
97- storage_uri = f"pvc://{ model_pvc .name } /{ models_bucket_downloaded_model_data } " ,
98- model_format = multi_node_serving_runtime .instance .spec .supportedModelFormats [0 ].name ,
99- deployment_mode = KServeDeploymentType .RAW_DEPLOYMENT ,
100- autoscaler_mode = "external" ,
101- multi_node_worker_spec = {},
102- wait_for_predictor_pods = False ,
103- ) as isvc :
104- wait_for_inference_deployment_replicas (
105- client = unprivileged_client ,
106- isvc = isvc ,
107- expected_num_deployments = 2 ,
108- runtime_name = multi_node_serving_runtime .name ,
109- )
110- yield isvc
111-
112-
113- @pytest .fixture (scope = "class" )
114- def multi_node_oci_inference_service (
115- request : FixtureRequest ,
116- unprivileged_client : DynamicClient ,
117- multi_node_serving_runtime : ServingRuntime ,
11872) -> Generator [InferenceService , Any , Any ]:
11973 resources = {
12074 "requests" : {
@@ -136,7 +90,6 @@ def multi_node_oci_inference_service(
13690 ]
13791 }
13892
139- # NOTE: In KServe v0.15, the autoscaler_mode needs to be updated to "none".
14093 with create_isvc (
14194 client = unprivileged_client ,
14295 name = request .param ["name" ],
@@ -145,11 +98,10 @@ def multi_node_oci_inference_service(
14598 storage_uri = ModelCarImage .GRANITE_8B_CODE_INSTRUCT ,
14699 model_format = multi_node_serving_runtime .instance .spec .supportedModelFormats [0 ].name ,
147100 deployment_mode = KServeDeploymentType .RAW_DEPLOYMENT ,
148- autoscaler_mode = "external " ,
101+ autoscaler_mode = "none " ,
149102 resources = resources ,
150103 multi_node_worker_spec = worker_resources ,
151104 wait_for_predictor_pods = False ,
152- external_route = True ,
153105 timeout = Timeout .TIMEOUT_30MIN ,
154106 ) as isvc :
155107 wait_for_inference_deployment_replicas (
@@ -177,6 +129,12 @@ def multi_node_predictor_pods_scope_class(
177129def patched_multi_node_isvc_external_route (
178130 multi_node_inference_service : InferenceService ,
179131) -> Generator [InferenceService , Any , Any ]:
132+ multi_node_inference_service .wait_for_condition (
133+ condition = multi_node_inference_service .Condition .READY ,
134+ status = multi_node_inference_service .Condition .Status .TRUE ,
135+ timeout = Timeout .TIMEOUT_10MIN ,
136+ )
137+
180138 with ResourceEditor (
181139 patches = {
182140 multi_node_inference_service : {
@@ -185,7 +143,7 @@ def patched_multi_node_isvc_external_route(
185143 }
186144 ):
187145 for sample in TimeoutSampler (
188- wait_timeout = Timeout .TIMEOUT_1MIN ,
146+ wait_timeout = Timeout .TIMEOUT_5MIN ,
189147 sleep = 1 ,
190148 func = lambda : multi_node_inference_service .instance .status ,
191149 ):
@@ -258,3 +216,8 @@ def deleted_multi_node_pod(
258216 isvc = multi_node_inference_service ,
259217 timeout = Timeout .TIMEOUT_10MIN ,
260218 )
219+
220+ wait_for_vllm_health (
221+ client = unprivileged_client ,
222+ isvc = multi_node_inference_service ,
223+ )
0 commit comments