19
19
from eureka_ml_insights .data_utils import (
20
20
AddColumnAndData ,
21
21
ColumnRename ,
22
+ CopyColumn ,
22
23
DataReader ,
23
24
RunPythonTransform ,
24
25
SamplerTransform ,
33
34
34
35
from .aime import AIME_PIPELINE
35
36
36
- DEFAULT_N_ITER = 2
37
-
38
-
39
- resume_from_dict = {
40
- 1 : "/home/sayouse/git/eureka-ml-insights/logs/AIME_SEQ_PIPELINE/2025-03-04-21-07-09.687511/student_inference_result_1/inference_result.jsonl" ,
41
- 2 : None ,
42
- }
37
+ DEFAULT_N_ITER = 3
38
+ RESULT_COLS = [
39
+ "attempt_id" ,
40
+ "model_output" ,
41
+ "uid" ,
42
+ "prompt" ,
43
+ "ground_truth" ,
44
+ "Year" ,
45
+ "ID" ,
46
+ "student_extracted_answer" ,
47
+ "verification_result"
48
+ ]
49
+ resume_from_dict = {}
43
50
44
51
45
52
class AIME_SEQ_PIPELINE (AIME_PIPELINE ):
46
53
"""This class specifies the config for running AIME benchmark on any model"""
47
54
48
- def get_result_columns (self , i : int ) -> list [str ]:
49
- """Get the desired result columns to be saved for the given iteration
50
- Args:
51
- i (int): The iteration number
52
- Returns:
53
- list[str]: The list of columns to be saved
54
- """
55
- verification_cols_so_far = [f"verification_result_{ j } " for j in range (1 , i )]
56
- extracted_ans_cols_so_far = [f"student_extracted_answer_{ j } " for j in range (1 , i )]
57
- return (
58
- [
59
- "attempt_id" ,
60
- "model_output" ,
61
- "uid" ,
62
- "prompt" ,
63
- "ground_truth" ,
64
- "Year" ,
65
- "ID" ,
66
- ]
67
- + verification_cols_so_far
68
- + extracted_ans_cols_so_far
69
- )
55
+
56
+
70
57
71
58
def configure_pipeline (
72
59
self , model_config : ModelConfig , resume_from : str = None , ** kwargs : dict [str , Any ]
@@ -77,10 +64,6 @@ def configure_pipeline(
77
64
78
65
n_iter = kwargs .get ("n_iter" , DEFAULT_N_ITER )
79
66
80
- self .data_processing_comp .data_reader_config .init_args ["transform" ].transforms .append (
81
- SamplerTransform (random_seed = 40 , sample_count = 1 )
82
- )
83
-
84
67
component_configs = [self .data_processing_comp ]
85
68
for i in range (1 , n_iter + 1 ):
86
69
# Student inference component, reads prompts from the last prompt processing component
@@ -114,9 +97,12 @@ def configure_pipeline(
114
97
"transform" : SequenceTransform (
115
98
[
116
99
# extract and verify the student answer
117
- AIMEExtractAnswer (f"model_output" , f"student_extracted_answer_ { i } " ),
118
- MetricBasedVerifier (ExactMatch , f"student_extracted_answer_ { i } " ),
100
+ AIMEExtractAnswer (f"model_output" , f"student_extracted_answer " ),
101
+ MetricBasedVerifier (ExactMatch , f"student_extracted_answer " ),
119
102
AddColumnAndData ("attempt_id" , i ),
103
+ CopyColumn (
104
+ column_name_src = "model_output" ,
105
+ column_name_dst = f"student_output" )
120
106
]
121
107
),
122
108
},
@@ -145,7 +131,7 @@ def configure_pipeline(
145
131
"format" : ".jsonl" ,
146
132
},
147
133
),
148
- output_data_columns = self . get_result_columns ( i ) ,
134
+ output_data_columns = RESULT_COLS ,
149
135
output_dir = os .path .join (self .log_dir , f"last_inference_result_join_{ i } " ),
150
136
)
151
137
last_agg_dir = self .last_inference_result_join_comp .output_dir
@@ -176,12 +162,6 @@ def configure_pipeline(
176
162
{
177
163
"path" : os .path .join (self .filtering_comp .output_dir , "transformed_data.jsonl" ),
178
164
"format" : ".jsonl" ,
179
- "transform" : ColumnRename (
180
- name_mapping = {
181
- "verification_result" : f"verification_result_{ i } " ,
182
- "model_output" : f"student_output_{ i } " ,
183
- }
184
- ),
185
165
},
186
166
),
187
167
prompt_template_path = os .path .join (
@@ -228,8 +208,9 @@ def configure_pipeline(
228
208
229
209
# Pass the combined results from all iterations to the eval reporting component
230
210
self .evalreporting_comp .data_reader_config .init_args ["path" ] = os .path .join (
231
- self . last_inference_result_join_comp . output_dir , "transformed_data.jsonl"
211
+ last_agg_dir , "transformed_data.jsonl"
232
212
)
213
+ self .evalreporting_comp .metric_config .init_args ["model_output_col" ] = "student_extracted_answer"
233
214
234
215
component_configs .append (self .evalreporting_comp )
235
216
0 commit comments