fix dataset parsing error (#540)

AlanShao-zy · lewtun · web-flow · commit 1802bec75f43 · 2025-03-28T13:17:04.000+01:00
* fix dataset parsing error

support defined question field to fix errors when datasets' question field is not 'problem'

* add question field config

add script_args: question field

* refactor: datasets prompt column

---------

Co-authored-by: lewtun &lt;lewis.c.tunstall@gmail.com&gt;
diff --git a/.gitignore b/.gitignore
@@ -177,4 +177,5 @@ logs/
 eval_results/
 results/
 
-.vscode/
+.vscode/
+.python-version
diff --git a/src/open_r1/configs.py b/src/open_r1/configs.py
@@ -154,3 +154,7 @@ class GRPOScriptArguments(trl.ScriptArguments):
             "help": "for each generation, evaluate these many test cases in parallel, then check if any of them failed (0 score): if so stop evaluating; otherwise continue with the next batch of test cases. Useful to avoid overloading the eval server + save time on wrong solutions"
         },
     )
+    dataset_prompt_column: str = field(
+        default="prompt",
+        metadata={"help": "Column to use as prompts for training."},
+    )
diff --git a/src/open_r1/grpo.py b/src/open_r1/grpo.py
@@ -84,13 +84,16 @@ def main(script_args, training_args, model_args):
     reward_funcs = get_reward_funcs(script_args)
 
     # Format into conversation
-    def make_conversation(example):
+    def make_conversation(example, prompt_column: str = script_args.dataset_prompt_column):
         prompt = []
 
         if training_args.system_prompt is not None:
             prompt.append({"role": "system", "content": training_args.system_prompt})
 
-        prompt.append({"role": "user", "content": example["problem"]})
+        if prompt_column not in example:
+            raise ValueError(f"Dataset Question Field Error: {prompt_column} is not supported.")
+
+        prompt.append({"role": "user", "content": example[prompt_column]})
         return {"prompt": prompt}
 
     dataset = dataset.map(make_conversation)

Original file line number	Diff line number	Diff line change
`@@ -154,3 +154,7 @@ class GRPOScriptArguments(trl.ScriptArguments):`
`154`	`154`	`"help": "for each generation, evaluate these many test cases in parallel, then check if any of them failed (0 score): if so stop evaluating; otherwise continue with the next batch of test cases. Useful to avoid overloading the eval server + save time on wrong solutions"`
`155`	`155`	`},`
`156`	`156`	`)`
	`157`	`+ dataset_prompt_column: str = field(`
	`158`	`+ default="prompt",`
	`159`	`+ metadata={"help": "Column to use as prompts for training."},`
	`160`	`+ )`