88from ocp_resources .node import Node
99from ocp_resources .persistent_volume_claim import PersistentVolumeClaim
1010from ocp_resources .pod import Pod
11+ from ocp_resources .resource import ResourceEditor
12+ from ocp_resources .secret import Secret
1113from ocp_resources .serving_runtime import ServingRuntime
14+ from pytest_testconfig import config as py_config
15+ from timeout_sampler import TimeoutSampler
1216
13- from utilities .constants import KServeDeploymentType
17+ from tests .model_serving .model_server .multi_node .utils import (
18+ delete_multi_node_pod_by_role ,
19+ )
20+ from utilities .constants import KServeDeploymentType , Labels , Protocols , Timeout
1421from utilities .general import download_model_data
1522from utilities .inference_utils import create_isvc
1623from utilities .infra import (
1724 get_pods_by_isvc_label ,
25+ verify_no_failed_pods ,
1826 wait_for_inference_deployment_replicas ,
1927)
28+ from utilities .serving_runtime import ServingRuntimeFromTemplate
2029
2130
2231@pytest .fixture (scope = "session" )
@@ -61,28 +70,47 @@ def models_bucket_downloaded_model_data(
6170
6271
6372@pytest .fixture (scope = "class" )
64- def multi_node_inference_service (
73+ def multi_node_serving_runtime (
6574 request : FixtureRequest ,
6675 admin_client : DynamicClient ,
6776 model_namespace : Namespace ,
68- serving_runtime_from_template : ServingRuntime ,
77+ ) -> Generator [ServingRuntime , Any , Any ]:
78+ with ServingRuntimeFromTemplate (
79+ client = admin_client ,
80+ name = "vllm-multinode-runtime" , # TODO: rename servingruntime when RHOAIENG-16147 is resolved
81+ namespace = model_namespace .name ,
82+ template_name = "vllm-multinode-runtime-template" ,
83+ multi_model = False ,
84+ enable_http = True ,
85+ ) as model_runtime :
86+ yield model_runtime
87+
88+
89+ @pytest .fixture (scope = "class" )
90+ def multi_node_inference_service (
91+ request : FixtureRequest ,
92+ admin_client : DynamicClient ,
93+ multi_node_serving_runtime : ServingRuntime ,
6994 model_pvc : PersistentVolumeClaim ,
7095 models_bucket_downloaded_model_data : str ,
7196) -> Generator [InferenceService , Any , Any ]:
7297 with create_isvc (
7398 client = admin_client ,
7499 name = request .param ["name" ],
75- namespace = model_namespace . name ,
76- runtime = serving_runtime_from_template .name ,
100+ namespace = multi_node_serving_runtime . namespace ,
101+ runtime = multi_node_serving_runtime .name ,
77102 storage_uri = f"pvc://{ model_pvc .name } /{ models_bucket_downloaded_model_data } " ,
78- model_format = serving_runtime_from_template .instance .spec .supportedModelFormats [0 ].name ,
103+ model_format = multi_node_serving_runtime .instance .spec .supportedModelFormats [0 ].name ,
79104 deployment_mode = KServeDeploymentType .RAW_DEPLOYMENT ,
80105 autoscaler_mode = "external" ,
81106 multi_node_worker_spec = {},
82107 wait_for_predictor_pods = False ,
83108 ) as isvc :
84109 wait_for_inference_deployment_replicas (
85- client = admin_client , isvc = isvc , expected_num_deployments = 2 , runtime_name = serving_runtime_from_template .name
110+ client = admin_client ,
111+ isvc = isvc ,
112+ expected_num_deployments = 2 ,
113+ runtime_name = multi_node_serving_runtime .name ,
86114 )
87115 yield isvc
88116
@@ -96,3 +124,90 @@ def multi_node_predictor_pods_scope_class(
96124 client = admin_client ,
97125 isvc = multi_node_inference_service ,
98126 )
127+
128+
129+ @pytest .fixture (scope = "function" )
130+ def patched_multi_node_isvc_external_route (
131+ multi_node_inference_service : InferenceService ,
132+ ) -> Generator [InferenceService , Any , Any ]:
133+ with ResourceEditor (
134+ patches = {
135+ multi_node_inference_service : {
136+ "metadata" : {"labels" : {Labels .Kserve .NETWORKING_KSERVE_IO : Labels .Kserve .EXPOSED }},
137+ }
138+ }
139+ ):
140+ for sample in TimeoutSampler (
141+ wait_timeout = Timeout .TIMEOUT_1MIN ,
142+ sleep = 1 ,
143+ func = lambda : multi_node_inference_service .instance .status ,
144+ ):
145+ if sample and sample .get ("url" , "" ).startswith (Protocols .HTTPS ):
146+ break
147+
148+ yield multi_node_inference_service
149+
150+
151+ @pytest .fixture (scope = "function" )
152+ def patched_multi_node_worker_spec (
153+ request : FixtureRequest ,
154+ multi_node_inference_service : InferenceService ,
155+ ) -> Generator [InferenceService , Any , Any ]:
156+ with ResourceEditor (
157+ patches = {
158+ multi_node_inference_service : {
159+ "spec" : {
160+ "predictor" : {"workerSpec" : request .param ["worker-spec" ]},
161+ },
162+ }
163+ }
164+ ):
165+ yield multi_node_inference_service
166+
167+
168+ @pytest .fixture ()
169+ def ray_ca_tls_secret (admin_client : DynamicClient ) -> Secret :
170+ return Secret (
171+ client = admin_client ,
172+ name = "ray-ca-tls" ,
173+ namespace = py_config ["applications_namespace" ],
174+ )
175+
176+
177+ @pytest .fixture ()
178+ def ray_tls_secret (admin_client : DynamicClient , multi_node_inference_service : InferenceService ) -> Secret :
179+ return Secret (
180+ client = admin_client ,
181+ name = "ray-tls" ,
182+ namespace = multi_node_inference_service .namespace ,
183+ )
184+
185+
186+ @pytest .fixture ()
187+ def deleted_serving_runtime (
188+ multi_node_serving_runtime : ServingRuntime ,
189+ ) -> Generator [None , Any , None ]:
190+ multi_node_serving_runtime .clean_up ()
191+
192+ yield
193+
194+ multi_node_serving_runtime .deploy ()
195+
196+
197+ @pytest .fixture ()
198+ def deleted_multi_node_pod (
199+ request : FixtureRequest ,
200+ admin_client : DynamicClient ,
201+ multi_node_inference_service : InferenceService ,
202+ ) -> None :
203+ delete_multi_node_pod_by_role (
204+ client = admin_client ,
205+ isvc = multi_node_inference_service ,
206+ role = request .param ["pod-role" ],
207+ )
208+
209+ verify_no_failed_pods (
210+ client = admin_client ,
211+ isvc = multi_node_inference_service ,
212+ timeout = Timeout .TIMEOUT_10MIN ,
213+ )
0 commit comments