99from ocp_resources .inference_service import InferenceService
1010from ocp_resources .namespace import Namespace
1111from ocp_resources .node import Node
12- from ocp_resources .persistent_volume_claim import PersistentVolumeClaim
1312from ocp_resources .pod import Pod
1413from ocp_resources .resource import ResourceEditor
1514from ocp_resources .secret import Secret
2322 get_pods_by_isvc_generation ,
2423)
2524from utilities .constants import KServeDeploymentType , Labels , ModelCarImage , Protocols , Timeout
26- from utilities .general import download_model_data
2725from utilities .inference_utils import create_isvc
2826from utilities .infra import (
2927 get_pods_by_isvc_label ,
30- verify_no_failed_pods ,
3128 wait_for_inference_deployment_replicas ,
3229)
3330from utilities .serving_runtime import ServingRuntimeFromTemplate
@@ -46,31 +43,6 @@ def skip_if_no_gpu_nodes(nvidia_gpu_nodes: list[Node]) -> None:
4643 pytest .skip ("Multi-node tests can only run on a Cluster with at least 2 GPU Worker nodes" )
4744
4845
49- @pytest .fixture (scope = "class" )
50- def models_bucket_downloaded_model_data (
51- request : FixtureRequest ,
52- admin_client : DynamicClient ,
53- unprivileged_model_namespace : Namespace ,
54- models_s3_bucket_name : str ,
55- model_pvc : PersistentVolumeClaim ,
56- aws_secret_access_key : str ,
57- aws_access_key_id : str ,
58- models_s3_bucket_endpoint : str ,
59- models_s3_bucket_region : str ,
60- ) -> str :
61- return download_model_data (
62- client = admin_client ,
63- aws_access_key_id = aws_access_key_id ,
64- aws_secret_access_key = aws_secret_access_key ,
65- model_namespace = unprivileged_model_namespace .name ,
66- model_pvc_name = model_pvc .name ,
67- bucket_name = models_s3_bucket_name ,
68- aws_endpoint_url = models_s3_bucket_endpoint ,
69- aws_default_region = models_s3_bucket_region ,
70- model_path = request .param ["model-dir" ],
71- )
72-
73-
7446@pytest .fixture (scope = "class" )
7547def multi_node_serving_runtime (
7648 request : FixtureRequest ,
@@ -93,26 +65,47 @@ def multi_node_inference_service(
9365 request : FixtureRequest ,
9466 unprivileged_client : DynamicClient ,
9567 multi_node_serving_runtime : ServingRuntime ,
96- model_pvc : PersistentVolumeClaim ,
97- models_bucket_downloaded_model_data : str ,
9868) -> Generator [InferenceService , Any , Any ]:
69+ resources = {
70+ "requests" : {
71+ "cpu" : "1" ,
72+ "memory" : "4G" ,
73+ },
74+ "limits" : {
75+ "cpu" : "2" ,
76+ "memory" : "12G" ,
77+ },
78+ }
79+
80+ worker_resources = {
81+ "containers" : [
82+ {
83+ "name" : "worker-container" ,
84+ "resources" : resources ,
85+ }
86+ ]
87+ }
88+
9989 with create_isvc (
10090 client = unprivileged_client ,
10191 name = request .param ["name" ],
10292 namespace = multi_node_serving_runtime .namespace ,
10393 runtime = multi_node_serving_runtime .name ,
104- storage_uri = f"pvc:// { model_pvc . name } / { models_bucket_downloaded_model_data } " ,
94+ storage_uri = ModelCarImage . GRANITE_8B_CODE_INSTRUCT ,
10595 model_format = multi_node_serving_runtime .instance .spec .supportedModelFormats [0 ].name ,
10696 deployment_mode = KServeDeploymentType .RAW_DEPLOYMENT ,
10797 autoscaler_mode = "none" ,
108- multi_node_worker_spec = {},
98+ resources = resources ,
99+ multi_node_worker_spec = worker_resources ,
109100 wait_for_predictor_pods = False ,
101+ timeout = Timeout .TIMEOUT_30MIN ,
110102 ) as isvc :
111103 wait_for_inference_deployment_replicas (
112104 client = unprivileged_client ,
113105 isvc = isvc ,
114106 expected_num_deployments = 2 ,
115107 runtime_name = multi_node_serving_runtime .name ,
108+ timeout = Timeout .TIMEOUT_15MIN ,
116109 )
117110 yield isvc
118111
@@ -269,16 +262,11 @@ def deleted_multi_node_pod(
269262 role = request .param ["pod-role" ],
270263 )
271264
272- verify_no_failed_pods (
273- client = unprivileged_client ,
274- isvc = multi_node_inference_service ,
275- timeout = Timeout .TIMEOUT_10MIN ,
276- )
277-
278265 wait_for_inference_deployment_replicas (
279266 client = unprivileged_client ,
280267 isvc = multi_node_inference_service ,
281268 expected_num_deployments = 2 ,
269+ timeout = Timeout .TIMEOUT_15MIN ,
282270 )
283271
284272 _warmup_inference_and_wait_for_recovery (
@@ -317,7 +305,7 @@ def _warmup_inference_and_wait_for_recovery(
317305 ]
318306
319307 for sample in TimeoutSampler (
320- wait_timeout = Timeout .TIMEOUT_10MIN ,
308+ wait_timeout = Timeout .TIMEOUT_30MIN ,
321309 sleep = 30 ,
322310 func = _probe_inference_health ,
323311 client = client ,
0 commit comments