@@ -349,20 +349,19 @@ async def test_torque(db, event_loop):
349
349
re .compile (r"ppn=5" ),
350
350
re .compile (r"^#PBS some_option_asdf" , re .M ),
351
351
]
352
+ poll_running = (
353
+ re .compile (r"sudo.*qstat" ),
354
+ f"<job_state>R</job_state><exec_host>{ testhost } /1</exec_host>" ,
355
+ )
352
356
script = [
353
357
(re .compile (r"sudo.*qsub" ), str (testjob )),
354
358
(
355
359
re .compile (r"sudo.*qstat" ),
356
360
"<job_state>Q</job_state><exec_host></exec_host>" ,
357
361
), # pending
358
- (
359
- re .compile (r"sudo.*qstat" ),
360
- f"<job_state>R</job_state><exec_host>{ testhost } /1</exec_host>" ,
361
- ), # running
362
- (
363
- re .compile (r"sudo.*qstat" ),
364
- f"<job_state>R</job_state><exec_host>{ testhost } /1</exec_host>" ,
365
- ), # running
362
+ poll_running ,
363
+ poll_running ,
364
+ poll_running ,
366
365
(re .compile (r"sudo.*qdel" ), "STOP" ),
367
366
(re .compile (r"sudo.*qstat" ), "" ),
368
367
]
@@ -394,17 +393,16 @@ async def test_moab(db, event_loop):
394
393
re .compile (r"ppn=5" ),
395
394
re .compile (r"^#PBS some_option_asdf" , re .M ),
396
395
]
396
+ poll_running = (
397
+ re .compile (r"sudo.*mdiag" ),
398
+ f'State="Running" AllocNodeList="{ testhost } "' ,
399
+ )
397
400
script = [
398
401
(re .compile (r"sudo.*msub" ), str (testjob )),
399
402
(re .compile (r"sudo.*mdiag" ), 'State="Idle"' ), # pending
400
- (
401
- re .compile (r"sudo.*mdiag" ),
402
- f'State="Running" AllocNodeList="{ testhost } "' ,
403
- ), # running
404
- (
405
- re .compile (r"sudo.*mdiag" ),
406
- f'State="Running" AllocNodeList="{ testhost } "' ,
407
- ), # running
403
+ poll_running ,
404
+ poll_running ,
405
+ poll_running ,
408
406
(re .compile (r"sudo.*mjobctl.*-c" ), "STOP" ),
409
407
(re .compile (r"sudo.*mdiag" ), "" ),
410
408
]
@@ -436,17 +434,16 @@ async def test_pbs(db, event_loop):
436
434
re .compile (r"@some_pbs_admin_node" ),
437
435
re .compile (r"^#PBS some_option_asdf" , re .M ),
438
436
]
437
+ poll_running = (
438
+ re .compile (r"sudo.*qstat" ),
439
+ f"job_state = R\n exec_host = { testhost } /2*1" ,
440
+ )
439
441
script = [
440
442
(re .compile (r"sudo.*qsub" ), str (testjob )),
441
443
(re .compile (r"sudo.*qstat" ), "job_state = Q" ), # pending
442
- (
443
- re .compile (r"sudo.*qstat" ),
444
- f"job_state = R\n exec_host = { testhost } /2*1" ,
445
- ), # running
446
- (
447
- re .compile (r"sudo.*qstat" ),
448
- f"job_state = R\n exec_host = { testhost } /2*1" ,
449
- ), # running
444
+ poll_running ,
445
+ poll_running ,
446
+ poll_running ,
450
447
(re .compile (r"sudo.*qdel" ), "STOP" ),
451
448
(re .compile (r"sudo.*qstat" ), "" ),
452
449
]
@@ -504,6 +501,7 @@ async def test_slurm(db, event_loop):
504
501
), # unknown
505
502
(re .compile (r"sudo.*squeue" ), "RUNNING " + testhost ), # running
506
503
(re .compile (r"sudo.*squeue" ), "RUNNING " + testhost ),
504
+ (re .compile (r"sudo.*squeue" ), "RUNNING " + testhost ),
507
505
(re .compile (r"sudo.*scancel" ), "STOP" ),
508
506
(re .compile (r"sudo.*squeue" ), "" ),
509
507
]
@@ -573,6 +571,7 @@ async def test_condor(db, event_loop):
573
571
(re .compile (r"sudo.*condor_q" ), "1," ), # pending
574
572
(re .compile (r"sudo.*condor_q" ), f"2, @{ testhost } " ), # runing
575
573
(re .compile (r"sudo.*condor_q" ), f"2, @{ testhost } " ),
574
+ (re .compile (r"sudo.*condor_q" ), f"2, @{ testhost } " ),
576
575
(re .compile (r"sudo.*condor_rm" ), "STOP" ),
577
576
(re .compile (r"sudo.*condor_q" ), "" ),
578
577
]
@@ -611,6 +610,7 @@ async def test_lfs(db, event_loop):
611
610
(re .compile (r"sudo.*bjobs" ), "PEND " ), # pending
612
611
(re .compile (r"sudo.*bjobs" ), f"RUN { testhost } " ), # running
613
612
(re .compile (r"sudo.*bjobs" ), f"RUN { testhost } " ),
613
+ (re .compile (r"sudo.*bjobs" ), f"RUN { testhost } " ),
614
614
(re .compile (r"sudo.*bkill" ), "STOP" ),
615
615
(re .compile (r"sudo.*bjobs" ), "" ),
616
616
]
@@ -652,3 +652,19 @@ async def test_keepvars(db, event_loop):
652
652
spawner_kwargs = spawner_kwargs ,
653
653
batch_script_re_list = batch_script_re_list ,
654
654
)
655
+
656
+
657
+ async def test_early_stop (db , event_loop ):
658
+ script = [
659
+ (re .compile (r"sudo.*sbatch" ), str (testjob )),
660
+ (re .compile (r"sudo.*squeue" ), "PENDING " ), # pending
661
+ (
662
+ re .compile (r"sudo.*squeue" ),
663
+ "slurm_load_jobs error: Unable to contact slurm controller" ,
664
+ ), # unknown
665
+ # job exits early during start
666
+ (re .compile (r"sudo.*squeue" ), "" ),
667
+ (re .compile (r"sudo.*scancel" ), "STOP" ),
668
+ ]
669
+ with pytest .raises (RuntimeError , match = "job has disappeared" ):
670
+ await run_spawner_script (db , SlurmSpawner , script )
0 commit comments