Skip to content

Commit edf8b19

Browse files
diag: sample 1/32 completions to job.out so we can see what verl rollout produces
1 parent 4378b8a commit edf8b19

1 file changed

Lines changed: 12 additions & 0 deletions

File tree

src/verifiable_rl_coder/training/grpo_reward.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,18 @@ def compute_reward(
9292

9393
try:
9494
result = _get_verifier().verify(solution_str, task, benchmark)
95+
# DIAGNOSTIC — sample 1/32 completions to job.out so we can see what
96+
# verl's rollout is actually generating. Remove once GRPO produces
97+
# meaningful rewards (i.e. the rollout-shape question is answered).
98+
import random
99+
if random.random() < 1.0 / 32:
100+
print(
101+
f"[compute_reward] task={task_id} "
102+
f"reward={result.reward:.3f} passed={result.passed} "
103+
f"len={len(solution_str)} "
104+
f"first200={solution_str[:200]!r}",
105+
flush=True,
106+
)
95107
return float(result.reward)
96108
except Exception:
97109
return 0.0

0 commit comments

Comments
 (0)