Skip to content

Commit f29f65f

Browse files
greenpoloclaude
andcommitted
test(rlvr): write run cards under tmp_path, not the repo root
train_grpo.main() writes a run card to its output dir; these tests passed relative paths (fake/out, fake/phase_b), so once they ran end-to-end (after the trl-shim + run-logging fixes) they created ./fake/ in the working tree. Route the output dirs through pytest's tmp_path fixture so the suite no longer pollutes the repo. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent 29ae5dd commit f29f65f

1 file changed

Lines changed: 7 additions & 7 deletions

File tree

tests/test_rlvr_env.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -559,7 +559,7 @@ def test_grpo_default_config_has_stop_tool_names() -> None:
559559
# --- adapter resume smoke test (P2(g) + P4) --------------------------------
560560

561561

562-
def test_resume_from_adapter_attaches_trainable_peft_adapter() -> None:
562+
def test_resume_from_adapter_attaches_trainable_peft_adapter(tmp_path: Path) -> None:
563563
"""Phase B: --resume-from-adapter attaches the adapter to the SFT base."""
564564
import sys # noqa: PLC0415
565565

@@ -594,7 +594,7 @@ def fake_from_pretrained(model_name, **kwargs):
594594

595595
sft_path = Path("fake/sft")
596596
adapter_path = Path("fake/phase_a")
597-
output_path = Path("fake/phase_b")
597+
output_path = tmp_path / "phase_b"
598598

599599
# Stub all the heavy training prep so we never hit BrainGlobe / disk.
600600
fake_grid = MagicMock()
@@ -666,7 +666,7 @@ def fake_from_pretrained(model_name, **kwargs):
666666
fake_trainer.train.assert_called_once()
667667

668668

669-
def test_no_resume_calls_get_peft_model() -> None:
669+
def test_no_resume_calls_get_peft_model(tmp_path: Path) -> None:
670670
"""Without --resume-from-adapter, the driver must wrap the model in PEFT."""
671671
import sys # noqa: PLC0415
672672

@@ -725,7 +725,7 @@ def test_no_resume_calls_get_peft_model() -> None:
725725
"--sft-model",
726726
str(sft_path),
727727
"--output-dir",
728-
"fake/out",
728+
str(tmp_path / "out"),
729729
"--curriculum-mode",
730730
"none",
731731
"--reward-mode",
@@ -823,7 +823,7 @@ def fake_from_pretrained(model_name, **kwargs):
823823
"--sft-model",
824824
str(sft_path),
825825
"--output-dir",
826-
"fake/out",
826+
str(tmp_path / "out"),
827827
"--curriculum-mode",
828828
"none",
829829
"--reward-mode",
@@ -848,7 +848,7 @@ def fake_from_pretrained(model_name, **kwargs):
848848
assert fake_trl.GRPOTrainer.call_args.kwargs["model"] is sft_model
849849

850850

851-
def test_train_grpo_passes_stop_tool_names_to_grpo_config() -> None:
851+
def test_train_grpo_passes_stop_tool_names_to_grpo_config(tmp_path: Path) -> None:
852852
"""The GRPOConfig built by main() must carry stop_tool_names from the TOML."""
853853
import sys # noqa: PLC0415
854854

@@ -918,7 +918,7 @@ def capture_reward(**kwargs):
918918
"--sft-model",
919919
str(sft_path),
920920
"--output-dir",
921-
"fake/out",
921+
str(tmp_path / "out"),
922922
"--curriculum-mode",
923923
"none",
924924
"--reward-mode",

0 commit comments

Comments
 (0)