1414# limitations under the License.
1515
1616import random
17+ import re
1718from logging import getLogger
1819from pathlib import Path
1920
3031logger = getLogger (__name__ )
3132
3233
34+ def normalize_number (s ):
35+ """Normalize a number string to an integer.
36+ Reference https://github.com/openai/gpt-oss/blob/48db88d8e29f48493fe75f084a8c9bd900a2b92f/gpt_oss/evals/aime_eval.py#L20
37+ """
38+ match = re .match (r"\d+" , s ) # match digits from the start
39+ if not match :
40+ return None
41+ return int (match .group (0 ))
42+
43+
3344class AIME25 (
3445 Dataset ,
3546 dataset_id = "aime25" ,
@@ -110,12 +121,15 @@ def generate(
110121
111122 processed_rows = []
112123 for _ , row in df .iterrows ():
113- correct_answer = row ["answer" ]
114-
124+ correct_answer = (
125+ normalize_number (row ["answer" ])
126+ if isinstance (row ["answer" ], str )
127+ else row ["answer" ]
128+ )
115129 # Create processed row
116130 processed_row = {
117131 "question" : row ["question" ], # Original question
118- "answer" : correct_answer ,
132+ "answer" : str ( correct_answer ) ,
119133 }
120134
121135 processed_rows .append (processed_row )
@@ -126,21 +140,6 @@ def generate(
126140 logger .info (f"Saved { len (df )} samples to { dst_path } " )
127141 return df
128142
129- # @classmethod
130- # def generate_aime25_dataset(
131- # cls,
132- # datasets_dir: Path,
133- # max_samples: int | None = None,
134- # force: bool = False,
135- # ) -> pd.DataFrame:
136- # """Generate the AIME25 dataset to a file."""
137- # df = AIME25.generate(
138- # datasets_dir=Path(datasets_dir),
139- # max_samples=max_samples,
140- # force=force,
141- # )
142- # return df
143-
144143
145144class AIME_MLPerf (AIME25 ):
146145 """AIME_MLPerf: AIME 2025 MLPerf Dataset
0 commit comments