1717from ray_release .cluster_manager .cluster_manager import ClusterManager
1818from ray_release .cluster_manager .minimal import MinimalClusterManager
1919from ray_release .command_runner .anyscale_job_runner import AnyscaleJobRunner
20- from ray_release .command_runner .command_runner import CommandRunner
2120from ray_release .config import (
2221 DEFAULT_AUTOSUSPEND_MINS ,
2322 DEFAULT_BUILD_TIMEOUT ,
@@ -80,7 +79,7 @@ def _load_test_configuration(
8079 anyscale_project : str ,
8180 result : Result ,
8281 smoke_test : bool = False ,
83- ) -> Tuple [ClusterManager , CommandRunner , str ]:
82+ ) -> Tuple [ClusterManager , AnyscaleJobRunner , str ]:
8483 logger .info (f"Test config: { test } " )
8584
8685 # Populate result paramaters
@@ -136,6 +135,9 @@ def _load_test_configuration(
136135 except Exception as e :
137136 raise ReleaseTestSetupError (f"Error setting up release test: { e } " ) from e
138137
138+ if not isinstance (command_runner , AnyscaleJobRunner ):
139+ raise ReleaseTestSetupError ("Command runner is not an AnyscaleJobRunner" )
140+
139141 return cluster_manager , command_runner , artifact_path
140142
141143
@@ -221,32 +223,27 @@ def _setup_cluster_environment(
221223 return prepare_cmd , prepare_timeout , build_timeout , cluster_timeout , command_timeout
222224
223225
224- def _local_environment_information (
225- result : Result ,
226+ def _build_local_environment_information (
226227 cluster_manager : ClusterManager ,
227- command_runner : CommandRunner ,
228+ runner : AnyscaleJobRunner ,
228229 build_timeout : int ,
229230 cluster_timeout : int ,
230231 cluster_env_id : Optional [str ],
231232) -> None :
232233 # Start cluster
233234 buildkite_group (":gear: Building cluster environment" )
234-
235235 cluster_manager .cluster_env_id = cluster_env_id
236-
237236 cluster_manager .build_configs (timeout = build_timeout )
238-
239- if isinstance (command_runner , AnyscaleJobRunner ):
240- command_runner .job_manager .cluster_startup_timeout = cluster_timeout
237+ runner .job_manager .cluster_startup_timeout = cluster_timeout
241238
242239
243240def _prepare_remote_environment (
244241 test : Test ,
245- command_runner : CommandRunner ,
242+ runner : AnyscaleJobRunner ,
246243 prepare_cmd : bool ,
247244 prepare_timeout : int ,
248245) -> None :
249- command_runner .prepare_remote_env ()
246+ runner .prepare_remote_env ()
250247
251248 wait_for_nodes = test ["run" ].get ("wait_for_nodes" , None )
252249
@@ -257,11 +254,11 @@ def _prepare_remote_environment(
257254 wait_for_nodes .get ("timeout" , DEFAULT_WAIT_FOR_NODES_TIMEOUT )
258255 )
259256 num_nodes = test ["run" ]["wait_for_nodes" ]["num_nodes" ]
260- command_runner .wait_for_nodes (num_nodes , wait_timeout )
257+ runner .wait_for_nodes (num_nodes , wait_timeout )
261258
262259 if prepare_cmd :
263260 try :
264- command_runner .run_prepare_command (prepare_cmd , timeout = prepare_timeout )
261+ runner .run_prepare_command (prepare_cmd , timeout = prepare_timeout )
265262 except CommandError as e :
266263 raise PrepareCommandError (e )
267264 except CommandTimeout as e :
@@ -293,7 +290,7 @@ def _upload_working_dir_to_gcs(working_dir: str) -> str:
293290def _running_test_script (
294291 test : Test ,
295292 smoke_test : bool ,
296- command_runner : CommandRunner ,
293+ runner : AnyscaleJobRunner ,
297294 command_timeout : int ,
298295) -> None :
299296 command = test ["run" ]["script" ]
@@ -306,7 +303,7 @@ def _running_test_script(
306303 is_long_running = test ["run" ].get ("long_running" , False )
307304
308305 try :
309- command_runner .run_command (
306+ runner .run_command (
310307 command ,
311308 env = command_env ,
312309 timeout = command_timeout ,
@@ -329,22 +326,22 @@ def _running_test_script(
329326
330327def _fetching_results (
331328 result : Result ,
332- command_runner : CommandRunner ,
329+ runner : AnyscaleJobRunner ,
333330 artifact_path : Optional [str ],
334331 smoke_test : bool ,
335332 start_time_unix : int ,
336333) -> Tuple [dict , Exception ]:
337334 fetch_result_exception = None
338335 try :
339- command_results = command_runner .fetch_results ()
336+ command_results = runner .fetch_results ()
340337 except Exception as e :
341338 logger .exception (f"Could not fetch results for test command: { e } " )
342339 command_results = {}
343340 fetch_result_exception = e
344341
345342 if artifact_path :
346343 try :
347- command_runner .fetch_artifact ()
344+ runner .fetch_artifact ()
348345 except Exception as e :
349346 logger .error ("Could not fetch artifact for test command" )
350347 logger .exception (e )
@@ -356,7 +353,7 @@ def _fetching_results(
356353 )
357354
358355 try :
359- metrics = command_runner .fetch_metrics ()
356+ metrics = runner .fetch_metrics ()
360357 except Exception as e :
361358 logger .exception (f"Could not fetch metrics for test command: { e } " )
362359 metrics = {}
@@ -462,15 +459,15 @@ def run_release_test_anyscale(
462459) -> Result :
463460 old_wd = os .getcwd ()
464461 start_time = time .monotonic ()
465- command_runner = None
462+ runner = None
466463 cluster_manager = None
467464 pipeline_exception = None
468465 # non critical for some tests. So separate it from the general one.
469466 fetch_result_exception = None
470467 try :
471468
472469 buildkite_group (":spiral_note_pad: Loading test configuration" )
473- cluster_manager , command_runner , artifact_path = _load_test_configuration (
470+ cluster_manager , runner , artifact_path = _load_test_configuration (
474471 test ,
475472 anyscale_project ,
476473 result ,
@@ -502,11 +499,10 @@ def run_release_test_anyscale(
502499 test_definition_root ,
503500 )
504501
505- buildkite_group (":bulb: Local environment information" )
506- _local_environment_information (
507- result ,
502+ buildkite_group (":bulb: Building local environment information" )
503+ _build_local_environment_information (
508504 cluster_manager ,
509- command_runner ,
505+ runner ,
510506 build_timeout ,
511507 cluster_timeout ,
512508 cluster_env_id ,
@@ -516,7 +512,7 @@ def run_release_test_anyscale(
516512 buildkite_group (":wrench: Preparing remote environment" )
517513 _prepare_remote_environment (
518514 test ,
519- command_runner ,
515+ runner ,
520516 prepare_cmd ,
521517 prepare_timeout ,
522518 )
@@ -526,32 +522,31 @@ def run_release_test_anyscale(
526522 _running_test_script (
527523 test ,
528524 smoke_test ,
529- command_runner ,
525+ runner ,
530526 command_timeout ,
531527 )
532528
533529 buildkite_group (":floppy_disk: Fetching results" )
534530 metrics , fetch_result_exception = _fetching_results (
535531 result ,
536- command_runner ,
532+ runner ,
537533 artifact_path ,
538534 smoke_test ,
539535 start_time_unix ,
540536 )
537+
538+ # Obtain the cluster info again as it is set after the
539+ # command was run in case of anyscale jobs
540+ result .job_url = runner .job_manager .job_url
541+ result .job_id = runner .job_manager .job_id
542+ result .last_logs = runner .get_last_logs ()
543+
541544 except Exception as e :
542545 logger .exception (e )
543546 buildkite_open_last ()
544547 pipeline_exception = e
545548 metrics = {}
546549
547- # Obtain the cluster info again as it is set after the
548- # command was run in case of anyscale jobs
549- if isinstance (command_runner , AnyscaleJobRunner ):
550- result .job_url = command_runner .job_manager .job_url
551- result .job_id = command_runner .job_manager .job_id
552-
553- result .last_logs = command_runner .get_last_logs () if command_runner else None
554-
555550 reset_signal_handling ()
556551
557552 time_taken = time .monotonic () - start_time
0 commit comments