4949pytestmark = [pytest .mark .cluster , pytest .mark .slow ]
5050
5151
52+ def _make_shared_tmp_dir () -> Path :
53+ """Create a temporary directory on the shared filesystem."""
54+ dir_str = tempfile .mkdtemp (dir = RESULTS_DIR )
55+ os .chmod (dir_str , os .stat (RESULTS_DIR ).st_mode )
56+ return Path (dir_str )
57+
58+
59+ def _cleanup_dir (path : Path ) -> None :
60+ """Try up to 10 times to remove a directory tree.
61+
62+ NOTE: There are times where the directory is not removed even after
63+ several attempts with a rest between them. Typically the dir is empty.
64+ """
65+ for _ in range (10 ):
66+ if not path .exists ():
67+ break
68+ time .sleep (1 )
69+ shutil .rmtree (path , ignore_errors = True )
70+
71+
5272@pytest .fixture
5373def shared_tmp_path () -> Iterator [Path ]:
5474 """Temporary directory on a shared filesystem visible to all cluster nodes.
@@ -59,22 +79,40 @@ def shared_tmp_path() -> Iterator[Path]:
5979 a temporary directory under the user's home directory (which lives
6080 on the shared ``/ihme`` filesystem) and cleans it up after the test.
6181 """
62- results_dir_str = tempfile .mkdtemp (dir = RESULTS_DIR )
63- # give the dir the same permissions as the parent directory so that cluster jobs
64- # can write to it
65- os .chmod (results_dir_str , os .stat (RESULTS_DIR ).st_mode )
66- results_dir = Path (results_dir_str )
82+ results_dir = _make_shared_tmp_dir ()
6783 yield results_dir
84+ _cleanup_dir (results_dir )
6885
69- # Try 10 times to delete the dir.
70- # NOTE: There seems to be times where the directory is not removed (even after
71- # the several attempts with a rest between them). Typically the dir is empty.
72- for _ in range (10 ):
73- if not results_dir .exists ():
74- break # the dir has been removed
75- # Take a quick nap to ensure processes are finished with the directory
76- time .sleep (1 )
77- shutil .rmtree (results_dir )
86+
87+ @pytest .fixture (scope = "module" )
88+ def completed_sim_output (request : pytest .FixtureRequest ) -> Iterator [Path ]:
89+ """Run the basic simulation once for the entire test module.
90+
91+ This avoids duplicating the expensive ``psimulate run`` for every test
92+ that needs a completed simulation as a prerequisite (restart, expand, etc.).
93+ The yielded path is the timestamped output directory and should be treated
94+ as **read-only** by consumers.
95+ """
96+ slurm_project = str (request .config .getoption ("--slurm-project" ))
97+ tmp_path = _make_shared_tmp_dir ()
98+ _ , output_dir = _run_basic_simulation (tmp_path , slurm_project )
99+ yield output_dir
100+ _cleanup_dir (tmp_path )
101+
102+
103+ @pytest .fixture
104+ def completed_sim_copy (completed_sim_output : Path ) -> Iterator [Path ]:
105+ """Provide an isolated deep-copy of a completed simulation run.
106+
107+ Tests that mutate the output directory (restart deletes files, expand adds
108+ jobs) should use this fixture instead of ``completed_sim_output`` directly
109+ so that each test starts from a pristine completed state.
110+ """
111+ copy_root = _make_shared_tmp_dir ()
112+ copy_dir = copy_root / "output"
113+ shutil .copytree (completed_sim_output , copy_dir )
114+ yield copy_dir
115+ _cleanup_dir (copy_root )
78116
79117
80118@pytest .fixture
@@ -171,9 +209,9 @@ def _run_basic_simulation(
171209class TestPsimulateRun :
172210 """E2E tests for ``psimulate run``."""
173211
174- def test_basic_run (self , shared_tmp_path : Path , slurm_project : str ) -> None :
212+ def test_basic_run (self , completed_sim_output : Path ) -> None :
175213 """Run a minimal simulation and verify output files are created."""
176- proc , output_dir = _run_basic_simulation ( shared_tmp_path , slurm_project )
214+ output_dir = completed_sim_output
177215
178216 # Verify metadata file
179217 metadata = _read_metadata (output_dir )
@@ -253,10 +291,10 @@ class TestPsimulateRestart:
253291 """E2E tests for ``psimulate restart``."""
254292
255293 def test_restart_completes_remaining (
256- self , shared_tmp_path : Path , slurm_project : str
294+ self , completed_sim_copy : Path , slurm_project : str
257295 ) -> None :
258296 """Delete partial outputs, restart, and verify only missing jobs re-run."""
259- _ , output_dir = _run_basic_simulation ( shared_tmp_path , slurm_project )
297+ output_dir = completed_sim_copy
260298
261299 # Verify initial completion
262300 metadata = _read_metadata (output_dir )
@@ -305,10 +343,10 @@ class TestPsimulateExpand:
305343 """E2E tests for ``psimulate expand``."""
306344
307345 def test_expand_adds_draws_and_seeds (
308- self , shared_tmp_path : Path , slurm_project : str
346+ self , completed_sim_copy : Path , slurm_project : str
309347 ) -> None :
310348 """Expand a completed run by adding draws and seeds, verify new jobs complete."""
311- _ , output_dir = _run_basic_simulation ( shared_tmp_path , slurm_project )
349+ output_dir = completed_sim_copy
312350
313351 # Verify initial completion: 2 draws x 2 seeds = 4 jobs
314352 metadata = _read_metadata (output_dir )
@@ -371,10 +409,8 @@ def test_expand_adds_draws_and_seeds(
371409class TestPsimulateLoadTest :
372410 """E2E tests for ``psimulate test``."""
373411
374- # Number of workers to use for the load test (keep small for speed)
412+ # Number of workers to use for the load test
375413 _NUM_WORKERS = 2
376- # large_results_test sleeps for 30s per worker, so allow generous timeout
377- _LOAD_TEST_TIMEOUT = _TIMEOUT
378414
379415 @pytest .mark .xfail (reason = "large_results load test currently failing" )
380416 def test_large_results (self , shared_tmp_path : Path , slurm_project : str ) -> None :
@@ -395,7 +431,7 @@ def test_large_results(self, shared_tmp_path: Path, slurm_project: str) -> None:
395431 "-w" ,
396432 str (self ._NUM_WORKERS ),
397433 ],
398- timeout = self . _LOAD_TEST_TIMEOUT ,
434+ timeout = _TIMEOUT ,
399435 )
400436 assert proc .returncode == 0 , (
401437 f"psimulate test large_results failed.\n "
0 commit comments