@@ -77,7 +77,7 @@ def test_managed_jobs_basic(generic_cloud: str):
77
77
# Test the functionality for logging.
78
78
f's=$(sky jobs logs -n { name } -2 --no-follow); echo "$s"; echo "$s" | grep "start counting"' ,
79
79
f's=$(sky jobs logs --controller -n { name } -2 --no-follow); echo "$s"; echo "$s" | grep "Cluster launched:"' ,
80
- f '{ smoke_tests_utils .GET_JOB_QUEUE } | grep { name } -2 | head -n1 | grep "RUNNING\|SUCCEEDED"' ,
80
+ rf '{ smoke_tests_utils .GET_JOB_QUEUE } | grep { name } -2 | head -n1 | grep "RUNNING\|SUCCEEDED"' ,
81
81
],
82
82
# TODO(zhwu): Change to f'sky jobs cancel -y -n {name}-1 -n {name}-2' when
83
83
# canceling multiple job names is supported.
@@ -105,19 +105,19 @@ def test_job_pipeline(generic_cloud: str):
105
105
[
106
106
f'sky jobs launch -n { name } tests/test_yamls/pipeline.yaml --cloud { generic_cloud } -y -d' ,
107
107
'sleep 5' ,
108
- f '{ smoke_tests_utils .GET_JOB_QUEUE } | grep { name } | head -n1 | grep "STARTING\|RUNNING"' ,
108
+ rf '{ smoke_tests_utils .GET_JOB_QUEUE } | grep { name } | head -n1 | grep "STARTING\|RUNNING"' ,
109
109
# `grep -A 4 {name}` finds the job with {name} and the 4 lines
110
110
# after it, i.e. the 4 tasks within the job.
111
111
# `sed -n 2p` gets the second line of the 4 lines, i.e. the first
112
112
# task within the job.
113
- f '{ smoke_tests_utils .GET_JOB_QUEUE } | grep -A 4 { name } | sed -n 2p | grep "STARTING\|RUNNING"' ,
113
+ rf '{ smoke_tests_utils .GET_JOB_QUEUE } | grep -A 4 { name } | sed -n 2p | grep "STARTING\|RUNNING"' ,
114
114
f'{ smoke_tests_utils .GET_JOB_QUEUE } | grep -A 4 { name } | sed -n 3p | grep "PENDING"' ,
115
115
f'sky jobs cancel -y -n { name } ' ,
116
116
'sleep 5' ,
117
- f '{ smoke_tests_utils .GET_JOB_QUEUE } | grep -A 4 { name } | sed -n 2p | grep "CANCELLING\|CANCELLED"' ,
118
- f '{ smoke_tests_utils .GET_JOB_QUEUE } | grep -A 4 { name } | sed -n 3p | grep "CANCELLING\|CANCELLED"' ,
119
- f '{ smoke_tests_utils .GET_JOB_QUEUE } | grep -A 4 { name } | sed -n 4p | grep "CANCELLING\|CANCELLED"' ,
120
- f '{ smoke_tests_utils .GET_JOB_QUEUE } | grep -A 4 { name } | sed -n 5p | grep "CANCELLING\|CANCELLED"' ,
117
+ rf '{ smoke_tests_utils .GET_JOB_QUEUE } | grep -A 4 { name } | sed -n 2p | grep "CANCELLING\|CANCELLED"' ,
118
+ rf '{ smoke_tests_utils .GET_JOB_QUEUE } | grep -A 4 { name } | sed -n 3p | grep "CANCELLING\|CANCELLED"' ,
119
+ rf '{ smoke_tests_utils .GET_JOB_QUEUE } | grep -A 4 { name } | sed -n 4p | grep "CANCELLING\|CANCELLED"' ,
120
+ rf '{ smoke_tests_utils .GET_JOB_QUEUE } | grep -A 4 { name } | sed -n 5p | grep "CANCELLING\|CANCELLED"' ,
121
121
'sleep 200' ,
122
122
f'{ smoke_tests_utils .GET_JOB_QUEUE } | grep -A 4 { name } | sed -n 2p | grep "CANCELLED"' ,
123
123
f'{ smoke_tests_utils .GET_JOB_QUEUE } | grep -A 4 { name } | sed -n 3p | grep "CANCELLED"' ,
@@ -212,7 +212,7 @@ def test_managed_jobs_recovery_aws(aws_config_region):
212
212
test = smoke_tests_utils .Test (
213
213
'managed_jobs_recovery_aws' ,
214
214
[
215
- f 'sky jobs launch --cloud aws --region { region } --use-spot -n { name } "echo SKYPILOT_TASK_ID: \$SKYPILOT_TASK_ID; sleep 1800" -y -d' ,
215
+ rf 'sky jobs launch --cloud aws --region { region } --use-spot -n { name } "echo SKYPILOT_TASK_ID: \$SKYPILOT_TASK_ID; sleep 1800" -y -d' ,
216
216
smoke_tests_utils .
217
217
get_cmd_wait_until_managed_job_status_contains_matching_job_name (
218
218
job_name = name ,
@@ -258,7 +258,7 @@ def test_managed_jobs_recovery_gcp():
258
258
test = smoke_tests_utils .Test (
259
259
'managed_jobs_recovery_gcp' ,
260
260
[
261
- f 'sky jobs launch --cloud gcp --zone { zone } -n { name } --use-spot --cpus 2 "echo SKYPILOT_TASK_ID: \$SKYPILOT_TASK_ID; sleep 1800" -y -d' ,
261
+ rf 'sky jobs launch --cloud gcp --zone { zone } -n { name } --use-spot --cpus 2 "echo SKYPILOT_TASK_ID: \$SKYPILOT_TASK_ID; sleep 1800" -y -d' ,
262
262
smoke_tests_utils .
263
263
get_cmd_wait_until_managed_job_status_contains_matching_job_name (
264
264
job_name = name ,
@@ -288,14 +288,13 @@ def test_managed_jobs_pipeline_recovery_aws(aws_config_region):
288
288
"""Test managed job recovery for a pipeline."""
289
289
name = smoke_tests_utils .get_cluster_name ()
290
290
user_hash = common_utils .get_user_hash ()
291
- user_hash = user_hash [:common_utils .USER_HASH_LENGTH_IN_CLUSTER_NAME ]
292
291
region = aws_config_region
293
292
if region != 'us-east-2' :
294
293
pytest .skip ('Only run spot pipeline recovery test in us-east-2' )
295
294
test = smoke_tests_utils .Test (
296
295
'managed_jobs_pipeline_recovery_aws' ,
297
296
[
298
- f'sky jobs launch -n { name } tests/test_yamls/pipeline_aws.yaml -y -d' ,
297
+ f'sky jobs launch -n { name } tests/test_yamls/pipeline_aws.yaml -y -d' ,
299
298
smoke_tests_utils .
300
299
get_cmd_wait_until_managed_job_status_contains_matching_job_name (
301
300
job_name = name ,
@@ -342,7 +341,6 @@ def test_managed_jobs_pipeline_recovery_gcp():
342
341
name = smoke_tests_utils .get_cluster_name ()
343
342
zone = 'us-east4-b'
344
343
user_hash = common_utils .get_user_hash ()
345
- user_hash = user_hash [:common_utils .USER_HASH_LENGTH_IN_CLUSTER_NAME ]
346
344
query_cmd = (
347
345
'gcloud compute instances list --filter='
348
346
f'"(labels.ray-cluster-name:*-${{MANAGED_JOB_ID}}-{ user_hash } )" '
@@ -352,7 +350,7 @@ def test_managed_jobs_pipeline_recovery_gcp():
352
350
test = smoke_tests_utils .Test (
353
351
'managed_jobs_pipeline_recovery_gcp' ,
354
352
[
355
- f'sky jobs launch -n { name } tests/test_yamls/pipeline_gcp.yaml -y -d' ,
353
+ f'sky jobs launch -n { name } tests/test_yamls/pipeline_gcp.yaml -y -d' ,
356
354
smoke_tests_utils .
357
355
get_cmd_wait_until_managed_job_status_contains_matching_job_name (
358
356
job_name = name ,
@@ -426,7 +424,7 @@ def test_managed_jobs_recovery_multi_node_aws(aws_config_region):
426
424
test = smoke_tests_utils .Test (
427
425
'managed_jobs_recovery_multi_node_aws' ,
428
426
[
429
- f 'sky jobs launch --cloud aws --region { region } -n { name } --use-spot --num-nodes 2 "echo SKYPILOT_TASK_ID: \$SKYPILOT_TASK_ID; sleep 1800" -y -d' ,
427
+ rf 'sky jobs launch --cloud aws --region { region } -n { name } --use-spot --num-nodes 2 "echo SKYPILOT_TASK_ID: \$SKYPILOT_TASK_ID; sleep 1800" -y -d' ,
430
428
smoke_tests_utils .
431
429
get_cmd_wait_until_managed_job_status_contains_matching_job_name (
432
430
job_name = name ,
@@ -473,7 +471,7 @@ def test_managed_jobs_recovery_multi_node_gcp():
473
471
test = smoke_tests_utils .Test (
474
472
'managed_jobs_recovery_multi_node_gcp' ,
475
473
[
476
- f 'sky jobs launch --cloud gcp --zone { zone } -n { name } --use-spot --num-nodes 2 "echo SKYPILOT_TASK_ID: \$SKYPILOT_TASK_ID; sleep 1800" -y -d' ,
474
+ rf 'sky jobs launch --cloud gcp --zone { zone } -n { name } --use-spot --num-nodes 2 "echo SKYPILOT_TASK_ID: \$SKYPILOT_TASK_ID; sleep 1800" -y -d' ,
477
475
smoke_tests_utils .
478
476
get_cmd_wait_until_managed_job_status_contains_matching_job_name (
479
477
job_name = name ,
@@ -512,7 +510,7 @@ def test_managed_jobs_cancellation_aws(aws_config_region):
512
510
'managed_jobs_cancellation_aws' ,
513
511
[
514
512
# Test cancellation during spot cluster being launched.
515
- f'sky jobs launch --cloud aws --region { region } -n { name } --use-spot "sleep 1000" -y -d' ,
513
+ f'sky jobs launch --cloud aws --region { region } -n { name } --use-spot "sleep 1000" -y -d' ,
516
514
smoke_tests_utils .
517
515
get_cmd_wait_until_managed_job_status_contains_matching_job_name (
518
516
job_name = name ,
@@ -532,7 +530,7 @@ def test_managed_jobs_cancellation_aws(aws_config_region):
532
530
'--output text) && echo "$s" && echo; [[ -z "$s" ]] || [[ "$s" = "terminated" ]] || [[ "$s" = "shutting-down" ]]'
533
531
),
534
532
# Test cancelling the spot cluster during spot job being setup.
535
- f'sky jobs launch --cloud aws --region { region } -n { name } -2 --use-spot tests/test_yamls/test_long_setup.yaml -y -d' ,
533
+ f'sky jobs launch --cloud aws --region { region } -n { name } -2 --use-spot tests/test_yamls/test_long_setup.yaml -y -d' ,
536
534
# The job is set up in the cluster, will shown as RUNNING.
537
535
smoke_tests_utils .
538
536
get_cmd_wait_until_managed_job_status_contains_matching_job_name (
@@ -551,7 +549,7 @@ def test_managed_jobs_cancellation_aws(aws_config_region):
551
549
'--output text) && echo "$s" && echo; [[ -z "$s" ]] || [[ "$s" = "terminated" ]] || [[ "$s" = "shutting-down" ]]'
552
550
),
553
551
# Test cancellation during spot job is recovering.
554
- f'sky jobs launch --cloud aws --region { region } -n { name } -3 --use-spot "sleep 1000" -y -d' ,
552
+ f'sky jobs launch --cloud aws --region { region } -n { name } -3 --use-spot "sleep 1000" -y -d' ,
555
553
# The job is running in the cluster, will shown as RUNNING.
556
554
smoke_tests_utils .
557
555
get_cmd_wait_until_managed_job_status_contains_matching_job_name (
@@ -605,7 +603,7 @@ def test_managed_jobs_cancellation_gcp():
605
603
'managed_jobs_cancellation_gcp' ,
606
604
[
607
605
# Test cancellation during spot cluster being launched.
608
- f'sky jobs launch --cloud gcp --zone { zone } -n { name } --use-spot "sleep 1000" -y -d' ,
606
+ f'sky jobs launch --cloud gcp --zone { zone } -n { name } --use-spot "sleep 1000" -y -d' ,
609
607
smoke_tests_utils .
610
608
get_cmd_wait_until_managed_job_status_contains_matching_job_name (
611
609
job_name = name ,
@@ -618,7 +616,7 @@ def test_managed_jobs_cancellation_gcp():
618
616
job_status = [sky .ManagedJobStatus .CANCELLED ],
619
617
timeout = 155 ),
620
618
# Test cancelling the spot cluster during spot job being setup.
621
- f'sky jobs launch --cloud gcp --zone { zone } -n { name } -2 --use-spot tests/test_yamls/test_long_setup.yaml -y -d' ,
619
+ f'sky jobs launch --cloud gcp --zone { zone } -n { name } -2 --use-spot tests/test_yamls/test_long_setup.yaml -y -d' ,
622
620
# The job is set up in the cluster, will shown as RUNNING.
623
621
smoke_tests_utils .
624
622
get_cmd_wait_until_managed_job_status_contains_matching_job_name (
@@ -632,7 +630,7 @@ def test_managed_jobs_cancellation_gcp():
632
630
job_status = [sky .ManagedJobStatus .CANCELLED ],
633
631
timeout = 155 ),
634
632
# Test cancellation during spot job is recovering.
635
- f'sky jobs launch --cloud gcp --zone { zone } -n { name } -3 --use-spot "sleep 1000" -y -d' ,
633
+ f'sky jobs launch --cloud gcp --zone { zone } -n { name } -3 --use-spot "sleep 1000" -y -d' ,
636
634
smoke_tests_utils .
637
635
get_cmd_wait_until_managed_job_status_contains_matching_job_name (
638
636
job_name = f'{ name } -3' ,
@@ -885,7 +883,7 @@ def test_managed_jobs_inline_env(generic_cloud: str):
885
883
test = smoke_tests_utils .Test (
886
884
'test-managed-jobs-inline-env' ,
887
885
[
888
- f 'sky jobs launch -n { name } -y --cloud { generic_cloud } --env TEST_ENV="hello world" -- "echo "\\ $TEST_ENV"; ([[ ! -z \\ "\$TEST_ENV\\ " ]] && [[ ! -z \\ "\${ constants .SKYPILOT_NODE_IPS } \\ " ]] && [[ ! -z \\ "\${ constants .SKYPILOT_NODE_RANK } \\ " ]] && [[ ! -z \\ "\${ constants .SKYPILOT_NUM_NODES } \ \ " ]]) || exit 1"' ,
886
+ rf 'sky jobs launch -n { name } -y --cloud { generic_cloud } --env TEST_ENV="hello world" -- "echo "\$TEST_ENV"; ([[ ! -z \"\$TEST_ENV\" ]] && [[ ! -z \"\${ constants .SKYPILOT_NODE_IPS } \" ]] && [[ ! -z \"\${ constants .SKYPILOT_NODE_RANK } \" ]] && [[ ! -z \"\${ constants .SKYPILOT_NUM_NODES } \" ]]) || exit 1"' ,
889
887
smoke_tests_utils .
890
888
get_cmd_wait_until_managed_job_status_contains_matching_job_name (
891
889
job_name = name ,
0 commit comments