@@ -38,32 +38,26 @@ class AIMEEvaluator(Evaluator):
3838
3939 @override
4040 def load_dataset (self ) -> Iterable [Action ]:
41- """Load AIME dataset from HuggingFace.
41+ """Load AIME dataset from HuggingFace (streaming) .
4242
43- Returns :
44- Iterable of Action objects with problem text and ground truth.
43+ Yields :
44+ Action objects with problem text and ground truth.
4545 """
46- dataset = load_dataset (self .dataset_path , split = "train" )
46+ dataset = load_dataset (self .dataset_path , split = "train" , streaming = True )
4747
48- actions = []
4948 for i , row in enumerate (dataset ):
5049 problem , answer = row .get ("problem" ), row .get ("answer" )
5150 if problem is None or answer is None :
5251 logger .warning (f"Row { i } : missing problem/answer, skipped" )
5352 continue
54- actions .append (
55- Action (
56- message = str (problem ),
57- task_context = TaskContext (
58- id = f"{ self .benchmark_name } _{ row .get ('id' , i )} " ,
59- ground_truth = str (answer ),
60- ),
61- )
53+ yield Action (
54+ message = str (problem ),
55+ task_context = TaskContext (
56+ id = f"{ self .benchmark_name } _{ row .get ('id' , i )} " ,
57+ ground_truth = str (answer ),
58+ ),
6259 )
6360
64- logger .info (f"[{ self .benchmark_name } ] Loaded { len (actions )} /{ len (dataset )} prompts" )
65- return actions
66-
6761
6862@register ("aime-2024" )
6963class AIME2024Evaluator (AIMEEvaluator ):
0 commit comments