test(rlvr): write run cards under tmp_path, not the repo root

greenpolo · claude · greenpolo · commit f29f65fe0f23 · 2026-06-03T14:10:23.000-04:00
train_grpo.main() writes a run card to its output dir; these tests passed
relative paths (fake/out, fake/phase_b), so once they ran end-to-end (after
the trl-shim + run-logging fixes) they created ./fake/ in the working tree.
Route the output dirs through pytest's tmp_path fixture so the suite no longer
pollutes the repo.

Co-Authored-By: Claude Opus 4.8 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/tests/test_rlvr_env.py b/tests/test_rlvr_env.py
@@ -559,7 +559,7 @@ def test_grpo_default_config_has_stop_tool_names() -> None:
 # --- adapter resume smoke test (P2(g) + P4) --------------------------------
 
 
-def test_resume_from_adapter_attaches_trainable_peft_adapter() -> None:
+def test_resume_from_adapter_attaches_trainable_peft_adapter(tmp_path: Path) -> None:
     """Phase B: --resume-from-adapter attaches the adapter to the SFT base."""
     import sys  # noqa: PLC0415
 
@@ -594,7 +594,7 @@ def fake_from_pretrained(model_name, **kwargs):
 
     sft_path = Path("fake/sft")
     adapter_path = Path("fake/phase_a")
-    output_path = Path("fake/phase_b")
+    output_path = tmp_path / "phase_b"
 
     # Stub all the heavy training prep so we never hit BrainGlobe / disk.
     fake_grid = MagicMock()
@@ -666,7 +666,7 @@ def fake_from_pretrained(model_name, **kwargs):
     fake_trainer.train.assert_called_once()
 
 
-def test_no_resume_calls_get_peft_model() -> None:
+def test_no_resume_calls_get_peft_model(tmp_path: Path) -> None:
     """Without --resume-from-adapter, the driver must wrap the model in PEFT."""
     import sys  # noqa: PLC0415
 
@@ -725,7 +725,7 @@ def test_no_resume_calls_get_peft_model() -> None:
                 "--sft-model",
                 str(sft_path),
                 "--output-dir",
-                "fake/out",
+                str(tmp_path / "out"),
                 "--curriculum-mode",
                 "none",
                 "--reward-mode",
@@ -823,7 +823,7 @@ def fake_from_pretrained(model_name, **kwargs):
                 "--sft-model",
                 str(sft_path),
                 "--output-dir",
-                "fake/out",
+                str(tmp_path / "out"),
                 "--curriculum-mode",
                 "none",
                 "--reward-mode",
@@ -848,7 +848,7 @@ def fake_from_pretrained(model_name, **kwargs):
     assert fake_trl.GRPOTrainer.call_args.kwargs["model"] is sft_model
 
 
-def test_train_grpo_passes_stop_tool_names_to_grpo_config() -> None:
+def test_train_grpo_passes_stop_tool_names_to_grpo_config(tmp_path: Path) -> None:
     """The GRPOConfig built by main() must carry stop_tool_names from the TOML."""
     import sys  # noqa: PLC0415
 
@@ -918,7 +918,7 @@ def capture_reward(**kwargs):
                 "--sft-model",
                 str(sft_path),
                 "--output-dir",
-                "fake/out",
+                str(tmp_path / "out"),
                 "--curriculum-mode",
                 "none",
                 "--reward-mode",