11# test case for aggregating status of RayCluster
2- # case1. RayCluster with two status items
2+ # case1. RayCluster with two status items from different clusters
3+ # case2. RayCluster with single status item
4+ # case3. RayCluster with unhealthy cluster (replica failure)
35
46name : " RayCluster with two status items"
5- description : " Test aggregating status of RayCluster with two status items"
7+ description : " Test aggregating status of RayCluster with two status items from different clusters "
68desiredObj :
79 apiVersion : ray.io/v1
810 kind : RayCluster
911 metadata :
1012 name : sample
13+ namespace : default
1114 spec :
1215 rayVersion : ' 2.46.0'
1316 headGroupSpec :
@@ -40,14 +43,15 @@ desiredObj:
4043 template :
4144 spec :
4245 containers :
43- image : rayproject/ray:2.46.0
44- resources :
45- limits :
46- cpu : 1
47- memory : 1G
48- requests :
49- cpu : 1
50- memory : 1G
46+ - name : ray-worker
47+ image : rayproject/ray:2.46.0
48+ resources :
49+ limits :
50+ cpu : 1
51+ memory : 1G
52+ requests :
53+ cpu : 1
54+ memory : 1G
5155statusItems :
5256 - applied : true
5357 clusterName : member1
@@ -137,3 +141,335 @@ statusItems:
137141 ready : " 2025-09-21T03:56:50Z"
138142operation : AggregateStatus
139143output :
144+ aggregatedStatus :
145+ apiVersion : ray.io/v1
146+ kind : RayCluster
147+ metadata :
148+ name : sample
149+ namespace : default
150+ status :
151+ conditions :
152+ - lastTransitionTime : " 2025-09-21T03:55:30Z"
153+ message : " "
154+ reason : HeadPodRunningAndReady
155+ status : " True"
156+ type : HeadPodReady
157+ - lastTransitionTime : " 2025-09-21T03:55:45Z"
158+ message : All Ray Pods are ready for the first time
159+ reason : AllPodRunningAndReadyFirstTime
160+ status : " True"
161+ type : RayClusterProvisioned
162+ - lastTransitionTime : " 2025-09-21T03:54:44Z"
163+ message : " "
164+ reason : RayClusterSuspended
165+ status : " False"
166+ type : RayClusterSuspended
167+ - lastTransitionTime : " 2025-09-21T03:54:44Z"
168+ message : " "
169+ reason : RayClusterSuspending
170+ status : " False"
171+ type : RayClusterSuspending
172+ - lastTransitionTime : " 2025-09-21T03:56:30Z"
173+ message : " "
174+ reason : HeadPodRunningAndReady
175+ status : " True"
176+ type : HeadPodReady
177+ - lastTransitionTime : " 2025-09-21T03:56:50Z"
178+ message : All Ray Pods are ready for the first time
179+ reason : AllPodRunningAndReadyFirstTime
180+ status : " True"
181+ type : RayClusterProvisioned
182+ - lastTransitionTime : " 2025-09-21T03:54:50Z"
183+ message : " "
184+ reason : RayClusterSuspended
185+ status : " False"
186+ type : RayClusterSuspended
187+ - lastTransitionTime : " 2025-09-21T03:54:50Z"
188+ message : " "
189+ reason : RayClusterSuspending
190+ status : " False"
191+ type : RayClusterSuspending
192+ readyWorkerReplicas : 4
193+ availableWorkerReplicas : 3
194+ maxWorkerReplicas : 15
195+ minWorkerReplicas : 3
196+ desiredWorkerReplicas : 4
197+ desiredCPU : " 6"
198+ desiredGPU : " 1"
199+ desiredMemory : 8G
200+ desiredTPU : " 0"
201+ lastUpdateTime : " 2025-09-21T03:56:50Z"
202+ endpoints :
203+ client : " 10001"
204+ dashboard : " 8265"
205+ gcs-server : " 6379"
206+ metrics : " 8080"
207+ head :
208+ podIP : 10.244.0.6
209+ podName : sample-head-9cvfc
210+ serviceIP : 10.244.0.6
211+ serviceName : sample-head-svc
212+ state : ready
213+ stateTransitionTimes :
214+ ready : " 2025-09-21T03:55:45Z"
215+
216+ ---
217+ name : " RayCluster with single status item"
218+ description : " Test aggregating status of RayCluster with single status item"
219+ desiredObj :
220+ apiVersion : ray.io/v1
221+ kind : RayCluster
222+ metadata :
223+ name : single-cluster
224+ namespace : default
225+ spec :
226+ rayVersion : ' 2.46.0'
227+ headGroupSpec :
228+ template :
229+ spec :
230+ containers :
231+ - name : ray-head
232+ image : rayproject/ray:2.46.0
233+ resources :
234+ requests :
235+ cpu : 500m
236+ memory : 1G
237+ workerGroupSpecs :
238+ - replicas : 2
239+ groupName : small-group
240+ template :
241+ spec :
242+ containers :
243+ - name : ray-worker
244+ image : rayproject/ray:2.46.0
245+ statusItems :
246+ - applied : true
247+ clusterName : member1
248+ status :
249+ availableWorkerReplicas : 2
250+ conditions :
251+ - lastTransitionTime : " 2025-09-22T10:00:00Z"
252+ message : " "
253+ reason : HeadPodRunningAndReady
254+ status : " True"
255+ type : HeadPodReady
256+ - lastTransitionTime : " 2025-09-22T10:01:00Z"
257+ message : All Ray Pods are ready for the first time
258+ reason : AllPodRunningAndReadyFirstTime
259+ status : " True"
260+ type : RayClusterProvisioned
261+ - lastTransitionTime : " 2025-09-22T10:01:00Z"
262+ message : " "
263+ reason : NoReplicaFailure
264+ status : " False"
265+ type : RayClusterReplicaFailure
266+ desiredCPU : " 2.5"
267+ desiredMemory : 3G
268+ desiredWorkerReplicas : 2
269+ endpoints :
270+ client : " 10001"
271+ dashboard : " 8265"
272+ gcs-server : " 6379"
273+ head :
274+ podIP : 10.244.1.5
275+ podName : single-cluster-head-abc12
276+ serviceIP : 10.96.1.100
277+ serviceName : single-cluster-head-svc
278+ lastUpdateTime : " 2025-09-22T10:01:00Z"
279+ maxWorkerReplicas : 2
280+ minWorkerReplicas : 2
281+ readyWorkerReplicas : 2
282+ state : ready
283+ operation : AggregateStatus
284+ output :
285+ aggregatedStatus :
286+ apiVersion : ray.io/v1
287+ kind : RayCluster
288+ metadata :
289+ name : single-cluster
290+ namespace : default
291+ status :
292+ availableWorkerReplicas : 2
293+ conditions :
294+ - lastTransitionTime : " 2025-09-22T10:00:00Z"
295+ message : " "
296+ reason : HeadPodRunningAndReady
297+ status : " True"
298+ type : HeadPodReady
299+ - lastTransitionTime : " 2025-09-22T10:01:00Z"
300+ message : All Ray Pods are ready for the first time
301+ reason : AllPodRunningAndReadyFirstTime
302+ status : " True"
303+ type : RayClusterProvisioned
304+ - lastTransitionTime : " 2025-09-22T10:01:00Z"
305+ message : " "
306+ reason : NoReplicaFailure
307+ status : " False"
308+ type : RayClusterReplicaFailure
309+ desiredCPU : " 2.5"
310+ desiredMemory : 3G
311+ desiredWorkerReplicas : 2
312+ endpoints :
313+ client : " 10001"
314+ dashboard : " 8265"
315+ gcs-server : " 6379"
316+ head :
317+ podIP : 10.244.1.5
318+ podName : single-cluster-head-abc12
319+ serviceIP : 10.96.1.100
320+ serviceName : single-cluster-head-svc
321+ lastUpdateTime : " 2025-09-22T10:01:00Z"
322+ maxWorkerReplicas : 2
323+ minWorkerReplicas : 2
324+ readyWorkerReplicas : 2
325+ state : ready
326+
327+ ---
328+ name : " RayCluster with unhealthy cluster"
329+ description : " Test aggregating status when one cluster has replica failure"
330+ desiredObj :
331+ apiVersion : ray.io/v1
332+ kind : RayCluster
333+ metadata :
334+ name : unhealthy-cluster
335+ namespace : production
336+ spec :
337+ rayVersion : ' 2.46.0'
338+ headGroupSpec :
339+ template :
340+ spec :
341+ containers :
342+ - name : ray-head
343+ image : rayproject/ray:2.46.0
344+ workerGroupSpecs :
345+ - replicas : 5
346+ groupName : large-group
347+ template :
348+ spec :
349+ containers :
350+ - name : ray-worker
351+ image : rayproject/ray:2.46.0
352+ resources :
353+ requests :
354+ cpu : 2
355+ memory : 4G
356+ statusItems :
357+ - applied : true
358+ clusterName : member1
359+ status :
360+ availableWorkerReplicas : 5
361+ conditions :
362+ - lastTransitionTime : " 2025-09-23T12:00:00Z"
363+ message : " "
364+ reason : HeadPodRunningAndReady
365+ status : " True"
366+ type : HeadPodReady
367+ - lastTransitionTime : " 2025-09-23T12:05:00Z"
368+ message : All Ray Pods are ready
369+ reason : AllPodRunningAndReady
370+ status : " True"
371+ type : RayClusterProvisioned
372+ desiredCPU : " 11"
373+ desiredGPU : " 0"
374+ desiredMemory : 21G
375+ desiredTPU : " 0"
376+ desiredWorkerReplicas : 5
377+ endpoints :
378+ client : " 10001"
379+ dashboard : " 8265"
380+ gcs-server : " 6379"
381+ head :
382+ podIP : 10.244.2.10
383+ podName : unhealthy-cluster-head-xyz
384+ serviceIP : 10.96.2.200
385+ serviceName : unhealthy-cluster-head-svc
386+ lastUpdateTime : " 2025-09-23T12:05:00Z"
387+ maxWorkerReplicas : 5
388+ minWorkerReplicas : 0
389+ readyWorkerReplicas : 5
390+ state : ready
391+ - applied : true
392+ clusterName : member2
393+ status :
394+ availableWorkerReplicas : 2
395+ conditions :
396+ - lastTransitionTime : " 2025-09-23T12:00:30Z"
397+ message : " "
398+ reason : HeadPodRunningAndReady
399+ status : " True"
400+ type : HeadPodReady
401+ - lastTransitionTime : " 2025-09-23T12:03:00Z"
402+ message : Some Ray Pods are not ready
403+ reason : SomePodNotReady
404+ status : " False"
405+ type : RayClusterProvisioned
406+ - lastTransitionTime : " 2025-09-23T12:03:00Z"
407+ message : " 3 replicas failed to start"
408+ reason : WorkerReplicasFailed
409+ status : " True"
410+ type : RayClusterReplicaFailure
411+ desiredCPU : " 11"
412+ desiredGPU : " 0"
413+ desiredMemory : 21G
414+ desiredTPU : " 0"
415+ desiredWorkerReplicas : 5
416+ lastUpdateTime : " 2025-09-23T12:10:00Z"
417+ maxWorkerReplicas : 5
418+ minWorkerReplicas : 0
419+ readyWorkerReplicas : 2
420+ state : unhealthy
421+ operation : AggregateStatus
422+ output :
423+ aggregatedStatus :
424+ apiVersion : ray.io/v1
425+ kind : RayCluster
426+ metadata :
427+ name : unhealthy-cluster
428+ namespace : production
429+ status :
430+ conditions :
431+ - lastTransitionTime : " 2025-09-23T12:00:00Z"
432+ message : " "
433+ reason : HeadPodRunningAndReady
434+ status : " True"
435+ type : HeadPodReady
436+ - lastTransitionTime : " 2025-09-23T12:05:00Z"
437+ message : All Ray Pods are ready
438+ reason : AllPodRunningAndReady
439+ status : " True"
440+ type : RayClusterProvisioned
441+ - lastTransitionTime : " 2025-09-23T12:00:30Z"
442+ message : " "
443+ reason : HeadPodRunningAndReady
444+ status : " True"
445+ type : HeadPodReady
446+ - lastTransitionTime : " 2025-09-23T12:03:00Z"
447+ message : Some Ray Pods are not ready
448+ reason : SomePodNotReady
449+ status : " False"
450+ type : RayClusterProvisioned
451+ - lastTransitionTime : " 2025-09-23T12:03:00Z"
452+ message : " 3 replicas failed to start"
453+ reason : WorkerReplicasFailed
454+ status : " True"
455+ type : RayClusterReplicaFailure
456+ readyWorkerReplicas : 7
457+ availableWorkerReplicas : 7
458+ desiredWorkerReplicas : 10
459+ desiredCPU : " 22"
460+ desiredGPU : " 0"
461+ desiredMemory : 42G
462+ desiredTPU : " 0"
463+ maxWorkerReplicas : 10
464+ minWorkerReplicas : 0
465+ lastUpdateTime : " 2025-09-23T12:10:00Z"
466+ endpoints :
467+ client : " 10001"
468+ dashboard : " 8265"
469+ gcs-server : " 6379"
470+ head :
471+ podIP : 10.244.2.10
472+ podName : unhealthy-cluster-head-xyz
473+ serviceIP : 10.96.2.200
474+ serviceName : unhealthy-cluster-head-svc
475+ state : ready
0 commit comments