@@ -112,7 +112,9 @@ func TestBuildJobSubmitCommandWithSidecarMode(t *testing.T) {
112112 },
113113 }
114114
115+ address := "http://127.0.0.1:8265"
115116 expected := []string {
117+ // Wait for Dashboard GCS health
116118 "until" ,
117119 fmt .Sprintf (
118120 utils .BaseWgetHealthCommand ,
@@ -121,8 +123,19 @@ func TestBuildJobSubmitCommandWithSidecarMode(t *testing.T) {
121123 utils .RayDashboardGCSHealthPath ,
122124 ),
123125 ">/dev/null" , "2>&1" , ";" ,
124- "do" , "echo" , strconv .Quote ("Waiting for Ray Dashboard GCS to become healthy at http://127.0.0.1:8265 ..." ), ";" , "sleep" , "2" , ";" , "done" , ";" ,
125- "ray" , "job" , "submit" , "--address" , "http://127.0.0.1:8265" ,
126+ "do" , "echo" , strconv .Quote ("Waiting for Ray Dashboard GCS to become healthy at " + address + " ..." ), ";" , "sleep" , "2" , ";" , "done" , ";" ,
127+ // Wait for expected nodes to register
128+ "if" , "[" , "-n" , "\" $" + utils .RAY_EXPECTED_MIN_WORKERS + "\" " , "]" , "&&" , "[" , "\" $" + utils .RAY_EXPECTED_MIN_WORKERS + "\" " , "-gt" , "\" 0\" " , "]" , ";" , "then" ,
129+ "EXPECTED_NODES=$(($" + utils .RAY_EXPECTED_MIN_WORKERS + " + 1))" , ";" ,
130+ "echo" , strconv .Quote ("Waiting for $EXPECTED_NODES nodes (1 head + $" + utils .RAY_EXPECTED_MIN_WORKERS + " workers) to register..." ), ";" ,
131+ "until" , "[" ,
132+ "\" $(wget -q -O- " + address + "/nodes?view=summary 2>/dev/null | python3 -c \" import sys,json; d=json.load(sys.stdin); print(len([n for n in d.get('data',{}).get('summary',[]) if n.get('raylet',{}).get('state','')=='ALIVE']))\" 2>/dev/null || echo 0)\" " ,
133+ "-ge" , "\" $EXPECTED_NODES\" " , "]" , ";" ,
134+ "do" , "echo" , strconv .Quote ("Waiting for Ray nodes to register. Expected: $EXPECTED_NODES ..." ), ";" , "sleep" , "2" , ";" , "done" , ";" ,
135+ "echo" , strconv .Quote ("All expected nodes are registered." ), ";" ,
136+ "fi" , ";" ,
137+ // Job submit command
138+ "ray" , "job" , "submit" , "--address" , address ,
126139 "--runtime-env-json" , strconv .Quote (`{"test":"test"}` ),
127140 "--metadata-json" , strconv .Quote (`{"testKey":"testValue"}` ),
128141 "--submission-id" , "testJobId" ,
@@ -240,3 +253,120 @@ func TestGetSubmitterTemplate(t *testing.T) {
240253 template := GetSubmitterTemplate (& rayJob .Spec , & rayCluster .Spec )
241254 assert .Equal (t , template .Spec .Containers [0 ].Image , rayCluster .Spec .HeadGroupSpec .Template .Spec .Containers [utils .RayContainerIndex ].Image )
242255}
256+
257+ func TestGetMinReplicasFromSpec (t * testing.T ) {
258+ tests := []struct {
259+ spec * rayv1.RayClusterSpec
260+ name string
261+ expected int32
262+ }{
263+ {
264+ name : "nil spec returns 0" ,
265+ spec : nil ,
266+ expected : 0 ,
267+ },
268+ {
269+ name : "no worker groups returns 0" ,
270+ spec : & rayv1.RayClusterSpec {
271+ WorkerGroupSpecs : []rayv1.WorkerGroupSpec {},
272+ },
273+ expected : 0 ,
274+ },
275+ {
276+ name : "single worker group with minReplicas" ,
277+ spec : & rayv1.RayClusterSpec {
278+ WorkerGroupSpecs : []rayv1.WorkerGroupSpec {
279+ {
280+ MinReplicas : ptrInt32 (2 ),
281+ NumOfHosts : 1 ,
282+ },
283+ },
284+ },
285+ expected : 2 ,
286+ },
287+ {
288+ name : "multiple worker groups" ,
289+ spec : & rayv1.RayClusterSpec {
290+ WorkerGroupSpecs : []rayv1.WorkerGroupSpec {
291+ {
292+ MinReplicas : ptrInt32 (2 ),
293+ NumOfHosts : 1 ,
294+ },
295+ {
296+ MinReplicas : ptrInt32 (3 ),
297+ NumOfHosts : 1 ,
298+ },
299+ },
300+ },
301+ expected : 5 ,
302+ },
303+ {
304+ name : "worker group with NumOfHosts > 1" ,
305+ spec : & rayv1.RayClusterSpec {
306+ WorkerGroupSpecs : []rayv1.WorkerGroupSpec {
307+ {
308+ MinReplicas : ptrInt32 (2 ),
309+ NumOfHosts : 2 ,
310+ },
311+ },
312+ },
313+ expected : 4 ,
314+ },
315+ {
316+ name : "suspended worker group is skipped" ,
317+ spec : & rayv1.RayClusterSpec {
318+ WorkerGroupSpecs : []rayv1.WorkerGroupSpec {
319+ {
320+ MinReplicas : ptrInt32 (2 ),
321+ NumOfHosts : 1 ,
322+ Suspend : ptrBool (true ),
323+ },
324+ {
325+ MinReplicas : ptrInt32 (3 ),
326+ NumOfHosts : 1 ,
327+ },
328+ },
329+ },
330+ expected : 3 ,
331+ },
332+ {
333+ name : "nil minReplicas defaults to 0" ,
334+ spec : & rayv1.RayClusterSpec {
335+ WorkerGroupSpecs : []rayv1.WorkerGroupSpec {
336+ {
337+ MinReplicas : nil ,
338+ NumOfHosts : 1 ,
339+ },
340+ },
341+ },
342+ expected : 0 ,
343+ },
344+ {
345+ name : "NumOfHosts 0 results in 0 workers for that group" ,
346+ spec : & rayv1.RayClusterSpec {
347+ WorkerGroupSpecs : []rayv1.WorkerGroupSpec {
348+ {
349+ MinReplicas : ptrInt32 (2 ),
350+ NumOfHosts : 0 ,
351+ },
352+ },
353+ },
354+ expected : 0 ,
355+ },
356+ }
357+
358+ for _ , tt := range tests {
359+ t .Run (tt .name , func (t * testing.T ) {
360+ result := GetMinReplicasFromSpec (tt .spec )
361+ assert .Equal (t , tt .expected , result )
362+ })
363+ }
364+ }
365+
366+ func ptrInt32 (v int32 ) * int32 {
367+ return & v
368+ }
369+
370+ func ptrBool (v bool ) * bool {
371+ return & v
372+ }
0 commit comments