@@ -209,7 +209,7 @@ func testLogFileEndpointLiveCluster(test Test, g *WithT, namespace *corev1.Names
209209 {"lines+timeout+filter" , func (u , n string ) string { return fmt .Sprintf ("%s%s?node_id=%s&filename=%s&lines=50&timeout=10&filter_ansi_code=true" , u , EndpointLogFile , n , filename ) }, http .StatusOK },
210210
211211 // Missing mandatory parameters
212- {"missing node_id" , func (u , n string ) string { return fmt .Sprintf ("%s%s?filename=%s" , u , EndpointLogFile , filename ) }, http .StatusBadRequest },
212+ {"missing node_id and node_ip " , func (u , n string ) string { return fmt .Sprintf ("%s%s?filename=%s" , u , EndpointLogFile , filename ) }, http .StatusBadRequest },
213213 {"missing filename" , func (u , n string ) string { return fmt .Sprintf ("%s%s?node_id=%s" , u , EndpointLogFile , n ) }, http .StatusBadRequest },
214214 {"missing both" , func (u , n string ) string { return fmt .Sprintf ("%s%s" , u , EndpointLogFile ) }, http .StatusBadRequest },
215215
@@ -223,6 +223,9 @@ func testLogFileEndpointLiveCluster(test Test, g *WithT, namespace *corev1.Names
223223 {"file not found" , func (u , n string ) string { return fmt .Sprintf ("%s%s?node_id=%s&filename=nonexistent.log" , u , EndpointLogFile , n ) }, http .StatusInternalServerError },
224224 {"task_id invalid (not found)" , func (u , n string ) string { return fmt .Sprintf ("%s%s?task_id=nonexistent-task-id" , u , EndpointLogFile ) }, http .StatusInternalServerError },
225225
226+ // node_ip parameter tests
227+ {"node_ip invalid (non-existent)" , func (u , n string ) string { return fmt .Sprintf ("%s%s?node_ip=192.168.255.255&filename=%s" , u , EndpointLogFile , filename ) }, http .StatusInternalServerError },
228+
226229 // Path traversal attacks
227230 {"traversal ../etc/passwd" , func (u , n string ) string { return fmt .Sprintf ("%s%s?node_id=%s&filename=../etc/passwd" , u , EndpointLogFile , n ) }, http .StatusBadRequest },
228231 {"traversal .." , func (u , n string ) string { return fmt .Sprintf ("%s%s?node_id=%s&filename=.." , u , EndpointLogFile , n ) }, http .StatusBadRequest },
@@ -379,6 +382,30 @@ func testLogFileEndpointLiveCluster(test Test, g *WithT, namespace *corev1.Names
379382 g .Expect (resp .StatusCode ).To (Equal (http .StatusInternalServerError ), "Expected 500 for non-existent pid, got %d: %s" , resp .StatusCode , string (body ))
380383 })
381384
385+ // Sub-test for node_ip parameter (live cluster)
386+ test .T ().Run ("node_ip parameter" , func (t * testing.T ) {
387+ g := NewWithT (t )
388+
389+ // Get node IP from head pod (use Pod IP, not Host IP)
390+ // Ray registers nodes with Pod IP (--node-ip-address flag)
391+ headPod , err := GetHeadPod (test , rayCluster )
392+ g .Expect (err ).NotTo (HaveOccurred ())
393+ nodeIP := headPod .Status .PodIP
394+ g .Expect (nodeIP ).NotTo (BeEmpty (), "Head pod should have a pod IP" )
395+ LogWithTimestamp (t , "Found head pod with IP: %s" , nodeIP )
396+
397+ // Test successful case: node_ip + filename
398+ url := fmt .Sprintf ("%s%s?node_ip=%s&filename=%s" , historyServerURL , EndpointLogFile , nodeIP , filename )
399+ resp , err := client .Get (url )
400+ g .Expect (err ).NotTo (HaveOccurred ())
401+ body , _ := io .ReadAll (resp .Body )
402+ resp .Body .Close ()
403+ // For live cluster, the request is proxied to Ray Dashboard
404+ // The dashboard should be able to resolve node_ip to node_id
405+ g .Expect (resp .StatusCode ).To (Equal (http .StatusOK ), "Expected OK for valid node_ip, got %d: %s" , resp .StatusCode , string (body ))
406+ g .Expect (len (body )).To (BeNumerically (">" , 0 ))
407+ })
408+
382409 DeleteS3Bucket (test , g , s3Client )
383410 LogWithTimestamp (test .T (), "Log file endpoint tests completed" )
384411}
@@ -398,8 +425,15 @@ func testLogFileEndpointDeadCluster(test Test, g *WithT, namespace *corev1.Names
398425 rayCluster := PrepareTestEnv (test , g , namespace , s3Client )
399426 ApplyRayJobAndWaitForCompletion (test , g , namespace , rayCluster )
400427
428+ // Capture node IP and ID before deleting cluster (for node_ip tests later)
429+ headPod , err := GetHeadPod (test , rayCluster )
430+ g .Expect (err ).NotTo (HaveOccurred ())
431+ savedNodeIP := headPod .Status .PodIP
432+ savedNodeID := GetNodeIDFromHeadPod (test , g , rayCluster )
433+ LogWithTimestamp (test .T (), "Captured node IP %s and node ID %s before cluster deletion" , savedNodeIP , savedNodeID )
434+
401435 // Delete RayCluster to trigger log upload
402- err : = test .Client ().Ray ().RayV1 ().RayClusters (namespace .Name ).Delete (test .Ctx (), rayCluster .Name , metav1.DeleteOptions {})
436+ err = test .Client ().Ray ().RayV1 ().RayClusters (namespace .Name ).Delete (test .Ctx (), rayCluster .Name , metav1.DeleteOptions {})
403437 g .Expect (err ).NotTo (HaveOccurred ())
404438 LogWithTimestamp (test .T (), "Deleted RayCluster %s/%s" , namespace .Name , rayCluster .Name )
405439
@@ -458,7 +492,7 @@ func testLogFileEndpointDeadCluster(test Test, g *WithT, namespace *corev1.Names
458492 {"all parameters" , func (u , n string ) string { return fmt .Sprintf ("%s%s?node_id=%s&filename=%s&lines=100&timeout=15&attempt_number=0&download_file=true&filter_ansi_code=true" , u , EndpointLogFile , n , filename ) }, http .StatusOK },
459493
460494 // Missing mandatory parameters
461- {"missing node_id" , func (u , n string ) string { return fmt .Sprintf ("%s%s?filename=%s" , u , EndpointLogFile , filename ) }, http .StatusBadRequest },
495+ {"missing node_id and node_ip " , func (u , n string ) string { return fmt .Sprintf ("%s%s?filename=%s" , u , EndpointLogFile , filename ) }, http .StatusBadRequest },
462496 {"missing filename" , func (u , n string ) string { return fmt .Sprintf ("%s%s?node_id=%s" , u , EndpointLogFile , n ) }, http .StatusBadRequest },
463497 {"missing both" , func (u , n string ) string { return fmt .Sprintf ("%s%s" , u , EndpointLogFile ) }, http .StatusBadRequest },
464498
@@ -471,6 +505,9 @@ func testLogFileEndpointDeadCluster(test Test, g *WithT, namespace *corev1.Names
471505 {"task_id invalid (not found)" , func (u , n string ) string { return fmt .Sprintf ("%s%s?task_id=nonexistent-task-id" , u , EndpointLogFile ) }, http .StatusBadRequest },
472506 {"non-existent pid" , func (u , n string ) string { return fmt .Sprintf ("%s%s?pid=999999&node_id=%s" , u , EndpointLogFile , n ) }, http .StatusNotFound },
473507
508+ // node_ip parameter tests
509+ {"node_ip invalid (non-existent)" , func (u , n string ) string { return fmt .Sprintf ("%s%s?node_ip=192.168.255.255&filename=%s" , u , EndpointLogFile , filename ) }, http .StatusNotFound },
510+
474511 // Path traversal attacks
475512 {"traversal ../etc/passwd" , func (u , n string ) string { return fmt .Sprintf ("%s%s?node_id=%s&filename=../etc/passwd" , u , EndpointLogFile , n ) }, http .StatusBadRequest },
476513 {"traversal .." , func (u , n string ) string { return fmt .Sprintf ("%s%s?node_id=%s&filename=.." , u , EndpointLogFile , n ) }, http .StatusBadRequest },
@@ -687,6 +724,32 @@ func testLogFileEndpointDeadCluster(test Test, g *WithT, namespace *corev1.Names
687724 t .Skip ("Skipping pid parameter test for dead cluster: worker_pid not available in Ray export events (see https://github.com/ray-project/ray/issues/60129)" )
688725 })
689726
727+ // Sub-test for node_ip parameter (dead cluster)
728+ test .T ().Run ("node_ip parameter" , func (t * testing.T ) {
729+ g := NewWithT (t )
730+
731+ // Use the captured node IP and ID from before cluster deletion
732+ LogWithTimestamp (t , "Testing node_ip parameter with IP: %s, ID: %s" , savedNodeIP , savedNodeID )
733+
734+ // Test successful case: node_ip + filename
735+ url := fmt .Sprintf ("%s%s?node_ip=%s&filename=%s" , historyServerURL , EndpointLogFile , savedNodeIP , filename )
736+ resp , err := client .Get (url )
737+ g .Expect (err ).NotTo (HaveOccurred ())
738+ body , _ := io .ReadAll (resp .Body )
739+ resp .Body .Close ()
740+ g .Expect (resp .StatusCode ).To (Equal (http .StatusOK ), "Expected OK for valid node_ip, got %d: %s" , resp .StatusCode , string (body ))
741+ g .Expect (len (body )).To (BeNumerically (">" , 0 ))
742+
743+ // Test that node_ip and node_id point to the same node (should return same content)
744+ urlWithNodeID := fmt .Sprintf ("%s%s?node_id=%s&filename=%s" , historyServerURL , EndpointLogFile , savedNodeID , filename )
745+ resp2 , err := client .Get (urlWithNodeID )
746+ g .Expect (err ).NotTo (HaveOccurred ())
747+ bodyWithNodeID , _ := io .ReadAll (resp2 .Body )
748+ resp2 .Body .Close ()
749+ g .Expect (resp2 .StatusCode ).To (Equal (http .StatusOK ))
750+ g .Expect (len (body )).To (Equal (len (bodyWithNodeID )), "node_ip and node_id should return same content" )
751+ })
752+
690753 DeleteS3Bucket (test , g , s3Client )
691754 LogWithTimestamp (test .T (), "Dead cluster log file endpoint tests completed" )
692755}
0 commit comments