@@ -421,42 +421,61 @@ def run_release_test_kuberay(
421421 smoke_test : bool = False ,
422422 test_definition_root : Optional [str ] = None ,
423423) -> Result :
424- result .stable = test .get ("stable" , True )
425- result .smoke_test = smoke_test
426- cluster_compute = load_test_cluster_compute (test , test_definition_root )
427- kuberay_compute_config = convert_cluster_compute_to_kuberay_compute_config (
428- cluster_compute
429- )
430- kuberay_autoscaler_version = cluster_compute .get ("autoscaler_version" , None )
431- if kuberay_autoscaler_version :
432- kuberay_autoscaler_config = {"version" : kuberay_autoscaler_version }
433- else :
434- kuberay_autoscaler_config = None
435- working_dir_upload_path = upload_working_dir (get_working_dir (test ))
424+ start_time = time .monotonic ()
425+ pipeline_exception = None
426+ try :
427+ result .stable = test .get ("stable" , True )
428+ result .smoke_test = smoke_test
429+ cluster_compute = load_test_cluster_compute (test , test_definition_root )
430+ kuberay_compute_config = convert_cluster_compute_to_kuberay_compute_config (
431+ cluster_compute
432+ )
433+ kuberay_autoscaler_version = cluster_compute .get ("autoscaler_version" , None )
434+ if kuberay_autoscaler_version :
435+ kuberay_autoscaler_config = {"version" : kuberay_autoscaler_version }
436+ else :
437+ kuberay_autoscaler_config = None
438+ working_dir_upload_path = upload_working_dir (get_working_dir (test ))
439+
440+ command_timeout = int (test ["run" ].get ("timeout" , DEFAULT_COMMAND_TIMEOUT ))
441+ test_name_hash = hashlib .sha256 (test ["name" ].encode ()).hexdigest ()[:10 ]
442+ # random 8 digit suffix
443+ random_suffix = "" .join (random .choices (string .digits , k = 8 ))
444+ base_job_name = f"{ test ['name' ][:20 ]} -{ test_name_hash } -{ random_suffix } "
445+ job_name = base_job_name .replace ("_" , "-" )
446+ logger .info (f"Job name: { job_name } " )
447+ kuberay_job_manager = KubeRayJobManager ()
448+ retcode , duration = kuberay_job_manager .run_and_wait (
449+ job_name = job_name ,
450+ image = test .get_anyscale_byod_image (),
451+ cmd_to_run = test ["run" ]["script" ],
452+ env_vars = test .get_byod_runtime_env (),
453+ working_dir = working_dir_upload_path ,
454+ pip = test .get_byod_pips (),
455+ compute_config = kuberay_compute_config ,
456+ autoscaler_config = kuberay_autoscaler_config ,
457+ timeout = command_timeout ,
458+ )
459+ kuberay_job_manager .fetch_results ()
460+ result .return_code = retcode
461+ result .runtime = duration
462+ except Exception as e :
463+ logger .info (f"Exception: { e } " )
464+ pipeline_exception = e
465+ result .runtime = time .monotonic () - start_time
436466
437- command_timeout = int (test ["run" ].get ("timeout" , DEFAULT_COMMAND_TIMEOUT ))
438- test_name_hash = hashlib .sha256 (test ["name" ].encode ()).hexdigest ()[:10 ]
439- # random 8 digit suffix
440- random_suffix = "" .join (random .choices (string .digits , k = 8 ))
441- job_name = f"{ test ['name' ][:20 ]} -{ test_name_hash } -{ random_suffix } " .replace ("_" , "-" )
442- logger .info (f"Job name: { job_name } " )
443- logger .info (f"KubeRay compute config: { kuberay_compute_config } " )
444- logger .info (f"KubeRay autoscaler config: { kuberay_autoscaler_config } " )
445- kuberay_job_manager = KubeRayJobManager ()
446- retcode , duration = kuberay_job_manager .run_and_wait (
447- job_name = job_name ,
448- image = test .get_anyscale_byod_image (),
449- cmd_to_run = test ["run" ]["script" ],
450- env_vars = test .get_byod_runtime_env (),
451- working_dir = working_dir_upload_path ,
452- pip = test .get_byod_pips (),
453- compute_config = kuberay_compute_config ,
454- autoscaler_config = kuberay_autoscaler_config ,
455- timeout = command_timeout ,
456- )
457- kuberay_job_manager .fetch_results ()
458- result .return_code = retcode
459- result .runtime = duration
467+ if pipeline_exception :
468+ buildkite_group (":rotating_light: Handling errors" )
469+ exit_code , result_status , runtime = handle_exception (
470+ pipeline_exception ,
471+ result .runtime ,
472+ )
473+
474+ result .return_code = exit_code .value
475+ result .status = result_status .value
476+ if runtime is not None :
477+ result .runtime = runtime
478+ raise pipeline_exception
460479 return result
461480
462481
0 commit comments