fix the Pair problem: extract helpers, prepend at sandbox runtime, expose in prompt

Devesh-Maheshwari · Devesh-Maheshwari · commit 8c7c1aecb402 · 2026-04-24T23:03:45.000-05:00
diff --git a/scripts/build_grpo_dataset.py b/scripts/build_grpo_dataset.py
@@ -99,6 +99,7 @@ def main() -> int:
                     "task_id": t.task_id,
                     "test": t.test,
                     "entry_point": t.entry_point,
+                    "helpers": t.helpers,
                     "benchmark": BENCHMARK,
                 },
             }
diff --git a/src/verifiable_rl_coder/benchmarks/base.py b/src/verifiable_rl_coder/benchmarks/base.py
@@ -32,6 +32,12 @@ class Task:
     canonical_solution: str
     test: str
     entry_point: str
+    # Optional helpers (imports, classes, helper functions) extracted from
+    # canonical_solution. For MBPP tasks where tests reference custom types
+    # like `Pair`, these definitions are prepended to the model's solution
+    # at sandbox-execution time so tests can run regardless of whether the
+    # model copied them into its output.
+    helpers: str = ""
 
 
 def to_sandbox_inputs(
@@ -64,15 +70,22 @@ def to_sandbox_inputs(
         return solution, tests
 
     if benchmark == "mbpp_plus":
-        # MBPP+'s `assertion` field is a block of `assert ...` lines that call
-        # the entry point by name. Indent them into a test_main body.
+        # Prepend helpers (e.g. `class Pair`) so test assertions can reference
+        # custom types/imports without depending on whether the model copied
+        # them into its output.
+        if task.helpers:
+            solution = f"{task.helpers}\n\n{completion}"
+        else:
+            solution = completion
+        # `from solution import *` so the helpers (which are at solution.py's
+        # module level) are visible to the test assertions.
         lines = [line for line in task.test.splitlines() if line.strip()]
         indented = "\n    ".join(lines)
         tests = (
-            f"from solution import {task.entry_point}\n"
+            "from solution import *  # noqa: F401, F403\n"
             "def test_main() -> None:\n"
             f"    {indented}\n"
         )
-        return completion, tests
+        return solution, tests
 
     raise ValueError(f"unknown benchmark: {benchmark!r}")
diff --git a/src/verifiable_rl_coder/benchmarks/mbpp_plus.py b/src/verifiable_rl_coder/benchmarks/mbpp_plus.py
@@ -18,24 +18,43 @@
 from .base import Task
 
 
+def _extract_mbpp_helpers(canonical_solution: str, entry_point: str) -> str:
+    """Same helper-extraction logic as mbpp_train (kept independent to
+    avoid an import cycle between mbpp_plus and mbpp_train)."""
+    import re
+    pattern = re.compile(
+        rf"^def\s+{re.escape(entry_point)}\s*\(",
+        re.MULTILINE,
+    )
+    match = pattern.search(canonical_solution)
+    if not match:
+        return ""
+    return canonical_solution[: match.start()].rstrip()
+
+
 def load_mbpp_plus() -> list[Task]:
     """Return all MBPP+ tasks in deterministic task_id order.
 
     MBPP+ stores tests under `assertion` (raw `assert ...` lines referencing
     the entry_point by name), not `test` (HumanEval+'s `def check(candidate)`
-    wrapper). We load the assertion block verbatim into `Task.test`; the
-    sandbox executor in Task 5 will wrap it appropriately per benchmark.
+    wrapper). We load the assertion block verbatim into `Task.test`. We also
+    extract any preamble (imports, classes) from `canonical_solution` into
+    `Task.helpers` so the sandbox executor can prepend them at run time.
     """
     raw = cast("dict[str, dict[str, Any]]", get_mbpp_plus())
-    tasks = [
-        Task(
-            task_id=task_id,
-            prompt=item["prompt"],
-            canonical_solution=item["canonical_solution"],
-            test=item["assertion"],
-            entry_point=item["entry_point"],
+    tasks: list[Task] = []
+    for task_id, item in raw.items():
+        canonical = item["canonical_solution"]
+        entry_point = item["entry_point"]
+        tasks.append(
+            Task(
+                task_id=task_id,
+                prompt=item["prompt"],
+                canonical_solution=canonical,
+                test=item["assertion"],
+                entry_point=entry_point,
+                helpers=_extract_mbpp_helpers(canonical, entry_point),
+            )
         )
-        for task_id, item in raw.items()
-    ]
     tasks.sort(key=lambda t: int(t.task_id.split("/")[-1]))
     return tasks
diff --git a/src/verifiable_rl_coder/benchmarks/mbpp_train.py b/src/verifiable_rl_coder/benchmarks/mbpp_train.py
@@ -31,17 +31,48 @@ def infer_entry_point(test_list: list[str]) -> str | None:
     return None
 
 
-def build_mbpp_prompt(text: str, test_list: list[str]) -> str:
-    """Compose the user-turn prompt for an MBPP-style task."""
-    example = test_list[0].strip() if test_list else ""
-    return (
-        f"Task: {text.strip()}\n"
-        "\n"
-        "Your function must satisfy this example:\n"
-        f"    {example}\n"
-        "\n"
-        "Write the complete Python function."
+def extract_helpers(canonical_solution: str, entry_point: str) -> str:
+    """Return everything before the entry_point's `def` line.
+
+    Captures imports, helper classes (e.g. `Pair`), and helper functions
+    that the canonical solution defines alongside the target function.
+    Returns "" if the entry_point's def isn't found, or the canonical
+    solution starts with the function (no preamble).
+    """
+    pattern = re.compile(
+        rf"^def\s+{re.escape(entry_point)}\s*\(",
+        re.MULTILINE,
     )
+    match = pattern.search(canonical_solution)
+    if not match:
+        return ""
+    return canonical_solution[: match.start()].rstrip()
+
+
+def build_mbpp_prompt(text: str, test_list: list[str], helpers: str = "") -> str:
+    """Compose the user-turn prompt for an MBPP-style task.
+
+    If `helpers` is non-empty (i.e. the task's canonical solution defines
+    a class/import the tests rely on), include it in the prompt so the
+    model knows what types/symbols it can use.
+    """
+    example = test_list[0].strip() if test_list else ""
+    parts: list[str] = [f"Task: {text.strip()}", ""]
+    if helpers:
+        parts.extend([
+            "Supporting definitions (already available — do not redefine):",
+            "```python",
+            helpers,
+            "```",
+            "",
+        ])
+    parts.extend([
+        "Your function must satisfy this example:",
+        f"    {example}",
+        "",
+        "Write the complete Python function.",
+    ])
+    return "\n".join(parts)
 
 
 def load_mbpp_train() -> list[Task]:
@@ -62,13 +93,16 @@ def load_mbpp_train() -> list[Task]:
             entry_point = infer_entry_point(test_list)
             if not entry_point or not test_list:
                 continue
+            canonical = str(item_d["code"])
+            helpers = extract_helpers(canonical, entry_point)
             tasks.append(
                 Task(
                     task_id=f"Mbpp/{task_id_int}",
-                    prompt=build_mbpp_prompt(str(item_d["text"]), test_list),
-                    canonical_solution=str(item_d["code"]),
+                    prompt=build_mbpp_prompt(str(item_d["text"]), test_list, helpers),
+                    canonical_solution=canonical,
                     test="\n".join(test_list),
                     entry_point=entry_point,
+                    helpers=helpers,
                 )
             )
     return tasks
diff --git a/src/verifiable_rl_coder/training/grpo_reward.py b/src/verifiable_rl_coder/training/grpo_reward.py
@@ -82,6 +82,9 @@ def compute_reward(
         canonical_solution="",
         test=test,
         entry_point=entry_point,
+        # Helpers (e.g. `class Pair` for some MBPP tasks) get prepended to
+        # the model's solution at sandbox-execution time.
+        helpers=str(extra_info.get("helpers", "")),
     )
 
     try:
diff --git a/tests/test_mbpp_train.py b/tests/test_mbpp_train.py
@@ -0,0 +1,89 @@
+"""Tests for `extract_helpers` and helper-aware MBPP prompt construction."""
+
+from __future__ import annotations
+
+from verifiable_rl_coder.benchmarks.mbpp_train import (
+    build_mbpp_prompt,
+    extract_helpers,
+    infer_entry_point,
+)
+
+
+# --- entry-point inference -------------------------------------------------
+
+
+def test_infer_entry_point_simple() -> None:
+    assert infer_entry_point(["assert add(1, 2) == 3"]) == "add"
+
+
+def test_infer_entry_point_picks_first() -> None:
+    assert infer_entry_point(["assert max_chain([Pair(1,2)]) == 1"]) == "max_chain"
+
+
+def test_infer_entry_point_returns_none_when_no_assert() -> None:
+    assert infer_entry_point(["# no assertion here"]) is None
+
+
+# --- helper extraction -----------------------------------------------------
+
+
+def test_extract_helpers_returns_class_definition() -> None:
+    canonical = (
+        "class Pair:\n"
+        "    def __init__(self, a, b):\n"
+        "        self.a = a\n"
+        "        self.b = b\n"
+        "\n"
+        "def max_chain_length(arr, n):\n"
+        "    return n\n"
+    )
+    helpers = extract_helpers(canonical, "max_chain_length")
+    assert "class Pair" in helpers
+    assert "def max_chain_length" not in helpers
+    assert helpers.endswith("self.b = b")
+
+
+def test_extract_helpers_returns_imports() -> None:
+    canonical = (
+        "from heapq import nlargest\n"
+        "import math\n"
+        "\n"
+        "def largest_n(arr, n):\n"
+        "    return nlargest(n, arr)\n"
+    )
+    helpers = extract_helpers(canonical, "largest_n")
+    assert "from heapq import nlargest" in helpers
+    assert "import math" in helpers
+    assert "def largest_n" not in helpers
+
+
+def test_extract_helpers_returns_empty_when_no_preamble() -> None:
+    canonical = "def add(a, b):\n    return a + b\n"
+    assert extract_helpers(canonical, "add") == ""
+
+
+def test_extract_helpers_returns_empty_when_def_not_found() -> None:
+    canonical = "    return a + b\n"  # body only, no def line — like HumanEval+
+    assert extract_helpers(canonical, "add") == ""
+
+
+# --- prompt construction ---------------------------------------------------
+
+
+def test_prompt_without_helpers_omits_supporting_section() -> None:
+    prompt = build_mbpp_prompt("Add two numbers.", ["assert add(1, 2) == 3"])
+    assert "Supporting definitions" not in prompt
+    assert "Task: Add two numbers." in prompt
+    assert "assert add(1, 2) == 3" in prompt
+
+
+def test_prompt_with_helpers_includes_supporting_section() -> None:
+    helpers = "class Pair:\n    pass"
+    prompt = build_mbpp_prompt(
+        "Find longest chain.",
+        ["assert max_chain([Pair(1, 2)]) == 1"],
+        helpers=helpers,
+    )
+    assert "Supporting definitions" in prompt
+    assert "class Pair" in prompt
+    assert "do not redefine" in prompt
diff --git a/tests/test_sandbox_inputs.py b/tests/test_sandbox_inputs.py
@@ -8,13 +8,19 @@
 from verifiable_rl_coder.benchmarks.base import Task, to_sandbox_inputs
 
 
-def _task(test: str, entry_point: str = "add", prompt: str = "def add(a, b):\n") -> Task:
+def _task(
+    test: str,
+    entry_point: str = "add",
+    prompt: str = "def add(a, b):\n",
+    helpers: str = "",
+) -> Task:
     return Task(
         task_id="t/0",
         prompt=prompt,
         canonical_solution="    return a + b\n",
         test=test,
         entry_point=entry_point,
+        helpers=helpers,
     )
 
 
@@ -62,3 +68,29 @@ def test_unknown_benchmark_raises() -> None:
     task = _task(test="")
     with pytest.raises(ValueError, match="unknown benchmark"):
         to_sandbox_inputs(task, "def add(a, b): ...", "swe_bench_lite")
+
+
+def test_mbpp_prepends_helpers_to_solution() -> None:
+    helpers = "class Pair:\n    def __init__(self, a, b):\n        self.a = a\n        self.b = b"
+    task = _task(
+        test="assert max_chain([Pair(1, 2)]) == 1\n",
+        entry_point="max_chain",
+        helpers=helpers,
+    )
+    completion = "def max_chain(arr):\n    return len(arr)\n"
+    solution, tests = to_sandbox_inputs(task, completion, "mbpp_plus")
+    assert "class Pair" in solution
+    assert solution.endswith(completion)
+    # Tests must use wildcard import so Pair (defined in solution.py) is in scope.
+    assert "from solution import *" in tests
+
+
+def test_mbpp_no_helpers_passes_completion_through() -> None:
+    task = _task(
+        test="assert add(1, 2) == 3\n",
+        helpers="",
+    )
+    completion = "def add(a, b):\n    return a + b\n"
+    solution, _ = to_sandbox_inputs(task, completion, "mbpp_plus")
+    assert solution == completion  # unchanged when no helpers
+

Original file line number	Diff line number	Diff line change
`@@ -99,6 +99,7 @@ def main() -> int:`
`99`	`99`	`"task_id": t.task_id,`
`100`	`100`	`"test": t.test,`
`101`	`101`	`"entry_point": t.entry_point,`
	`102`	`+ "helpers": t.helpers,`
`102`	`103`	`"benchmark": BENCHMARK,`
`103`	`104`	`},`
`104`	`105`	`}`
Original file line number	Diff line number	Diff line change
`@@ -82,6 +82,9 @@ def compute_reward(`
`82`	`82`	`canonical_solution="",`
`83`	`83`	`test=test,`
`84`	`84`	`entry_point=entry_point,`
	`85`	+ # Helpers (e.g. `class Pair` for some MBPP tasks) get prepended to
	`86`	`+ # the model's solution at sandbox-execution time.`
	`87`	`+ helpers=str(extra_info.get("helpers", "")),`
`85`	`88`	`)`
`86`	`89`
`87`	`90`	`try:`