forked from ChenmienTan/RL2
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathorz.py
More file actions
24 lines (20 loc) · 612 Bytes
/
orz.py
File metadata and controls
24 lines (20 loc) · 612 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
from typing import Dict, Any
from functools import partial
from math_verify import parse, verify
import logging
from RL2.datasets import Sample, base_generate
logging.getLogger("math_verify.parser").disabled = True
logging.getLogger("math_verify.grader").disabled = True
async def env_step(sample: Sample) -> Dict[str, Any]:
reward = float(
verify(
parse(sample.sample["answer"]),
parse(sample.action_text)
)
)
return {
"next_state": None,
"done": True,
"reward": reward
}
generate = partial(base_generate, env_step_fn=env_step)