We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 43cb3bd commit ef192deCopy full SHA for ef192de
1 file changed
tunix/rl/agentic/agentic_grpo_learner.py
@@ -107,7 +107,7 @@ class GRPOConfig(agentic_rl_learner.AgenticRLConfig):
107
epsilon_high: float | None = None # 0.28 from DAPO.
108
off_policy_steps: int = 0
109
degenerate_group_masking: bool = (
110
- True # Whether to mask out degenerate groups with all-0 advantages.
+ False # Whether to mask out degenerate groups with all-0 advantages.
111
)
112
use_rollout_logps: bool = True
113
# Truncated importance-sampling (TIS) correction for the residual mismatch
0 commit comments