1010from utilities .exceptions import DeploymentValidationError
1111from utilities .constants import RunTimeConfigs , KServeDeploymentType , ModelVersion , Timeout
1212from utilities .general import create_isvc_label_selector_str
13+ from ocp_resources .pod import Pod
1314
1415
1516pytestmark = [
1819 pytest .mark .usefixtures ("valid_aws_config" ),
1920]
2021
21- local_queue_name = "local-queue-raw"
22- cluster_queue_name = "cluster-queue-raw"
23- resource_flavor_name = "default-flavor-raw"
24- cpu_quota = 2
25- memory_quota = "10Gi"
26- isvc_resources = {"requests" : {"cpu" : "1" , "memory" : "8Gi" }, "limits" : {"cpu" : cpu_quota , "memory" : memory_quota }}
27- min_replicas = (
22+ NAMESPACE_NAME = "kueue-isvc-raw-test"
23+ LOCAL_QUEUE_NAME = "local-queue-raw"
24+ CLUSTER_QUEUE_NAME = "cluster-queue-raw"
25+ RESOURCE_FLAVOR_NAME = "default-flavor-raw"
26+ CPU_QUOTA = 2
27+ MEMORY_QUOTA = "10Gi"
28+ ISVC_RESOURCES = {"requests" : {"cpu" : "1" , "memory" : "8Gi" }, "limits" : {"cpu" : CPU_QUOTA , "memory" : MEMORY_QUOTA }}
29+ MIN_REPLICAS = (
2830 1 # min_replicas needs to be 1 or you need to change the test to check for the number of available replicas
2931)
30- max_replicas = 2
32+ MAX_REPLICAS = 2
3133
3234
3335@pytest .mark .rawdeployment
3638 "kueue_cluster_queue_from_template, kueue_resource_flavor_from_template, kueue_local_queue_from_template" ,
3739 [
3840 pytest .param (
39- {"name" : "kueue-isvc-raw-test" , "add-kueue-label" : True },
41+ {"name" : NAMESPACE_NAME , "add-kueue-label" : True },
4042 RunTimeConfigs .ONNX_OPSET13_RUNTIME_CONFIG ,
4143 {
4244 "name" : "kueue-isvc-raw" ,
43- "min-replicas" : min_replicas ,
44- "max-replicas" : max_replicas ,
45- "labels" : {"kueue.x-k8s.io/queue-name" : local_queue_name },
45+ "min-replicas" : MIN_REPLICAS ,
46+ "max-replicas" : MAX_REPLICAS ,
47+ "labels" : {"kueue.x-k8s.io/queue-name" : LOCAL_QUEUE_NAME },
4648 "deployment-mode" : KServeDeploymentType .RAW_DEPLOYMENT ,
4749 "model-dir" : "test-dir" ,
4850 "model-version" : ModelVersion .OPSET13 ,
49- "resources" : isvc_resources ,
51+ "resources" : ISVC_RESOURCES ,
5052 },
5153 {
52- "name" : cluster_queue_name ,
53- "resource_flavor_name" : resource_flavor_name ,
54- "cpu_quota" : cpu_quota ,
55- "memory_quota" : memory_quota ,
54+ "name" : CLUSTER_QUEUE_NAME ,
55+ "resource_flavor_name" : RESOURCE_FLAVOR_NAME ,
56+ "cpu_quota" : CPU_QUOTA ,
57+ "memory_quota" : MEMORY_QUOTA ,
58+ # "namespace_selector": {"matchLabels": {"kubernetes.io/metadata.name": NAMESPACE_NAME}},
5659 "namespace_selector" : {},
5760 },
58- {"name" : resource_flavor_name },
59- {"name" : local_queue_name , "cluster_queue" : cluster_queue_name },
61+ {"name" : RESOURCE_FLAVOR_NAME },
62+ {"name" : LOCAL_QUEUE_NAME , "cluster_queue" : CLUSTER_QUEUE_NAME },
6063 )
6164 ],
6265 indirect = True ,
@@ -67,8 +70,8 @@ class TestKueueInferenceServiceRaw:
6770 def test_kueue_inference_service_raw (
6871 self ,
6972 admin_client ,
70- kueue_cluster_queue_from_template ,
7173 kueue_resource_flavor_from_template ,
74+ kueue_cluster_queue_from_template ,
7275 kueue_local_queue_from_template ,
7376 kueue_raw_inference_service ,
7477 kueue_kserve_serving_runtime ,
@@ -89,12 +92,14 @@ def test_kueue_inference_service_raw(
8992 )
9093 )
9194 if len (deployments ) != 1 :
92- raise DeploymentValidationError ("Too many deployments found" )
95+ deployment_names = [deployment .instance .metadata .name for deployment in deployments ]
96+ raise DeploymentValidationError (f"Expected 1 deployment, got { len (deployments )} : { deployment_names } " )
9397
9498 deployment = deployments [0 ]
9599 deployment .wait_for_replicas (deployed = True )
96- if deployment .instance .spec .replicas != 1 :
97- raise DeploymentValidationError ("Deployment should have 1 replica" )
100+ replicas = deployment .instance .spec .replicas
101+ if replicas != 1 :
102+ raise DeploymentValidationError (f"Deployment should have 1 replica, got { replicas } " )
98103
99104 # Update inference service to request 2 replicas
100105 isvc_to_update = kueue_raw_inference_service .instance .to_dict ()
@@ -113,11 +118,55 @@ def test_kueue_inference_service_raw(
113118 )
114119 )
115120 if len (deployments ) != 1 :
116- raise DeploymentValidationError ("Too many deployments found" )
121+ deployment_names = [deployment .instance .metadata .name for deployment in deployments ]
122+ raise DeploymentValidationError (f"Expected 1 deployment, got { len (deployments )} : { deployment_names } " )
117123
118124 deployment = deployments [0 ]
119125 try :
120126 deployment .wait_for_replicas (deployed = True , timeout = Timeout .TIMEOUT_30SEC )
121- except TimeoutExpiredError :
122- if deployment .instance .status .availableReplicas != 1 :
123- raise DeploymentValidationError ("Deployment should have 1 available replica" ) from None
127+ except TimeoutExpiredError as e :
128+ available_replicas = deployment .instance .status .availableReplicas
129+ if available_replicas != 1 :
130+ raise DeploymentValidationError (
131+ f"Deployment should have 1 available replica, got { available_replicas } "
132+ ) from None
133+ # Get pods that match isvc labels and verify their status
134+ pods = list (
135+ Pod .get (
136+ label_selector = "," .join (labels ),
137+ namespace = kueue_raw_inference_service .namespace ,
138+ dyn_client = admin_client ,
139+ )
140+ )
141+
142+ if len (pods ) != 2 :
143+ pod_names = [pod .instance .metadata .name for pod in pods ]
144+ raise DeploymentValidationError (f"Expected 2 pods, got { len (pods )} : { pod_names } " ) from e
145+
146+ running_pods = 0
147+ gated_pods = 0
148+ for pod in pods :
149+ pod_phase = pod .instance .status .phase
150+ if pod_phase == "Running" :
151+ running_pods += 1
152+ elif pod_phase == "Pending" and all (
153+ condition .type == "PodScheduled"
154+ and condition .status == "False"
155+ and condition .reason == "SchedulingGated"
156+ for condition in pod .instance .status .conditions
157+ ):
158+ gated_pods += 1
159+
160+ if running_pods != 1 or gated_pods != 1 :
161+ raise DeploymentValidationError (
162+ f"Expected 1 Running pod and 1 SchedulingGated pod, "
163+ f"got { running_pods } Running and { gated_pods } SchedulingGated"
164+ ) from e
165+ # Check InferenceService status for total model copies
166+ # Refresh the isvc instance to get latest status
167+ kueue_raw_inference_service .get ()
168+ isvc = kueue_raw_inference_service .instance
169+ if isvc .status .modelStatus .copies .totalCopies != 1 :
170+ raise DeploymentValidationError (
171+ f"InferenceService should have 1 total model copy, got { isvc .status .modelStatus .copies .totalCopies } "
172+ ) from e
0 commit comments