Fix ignore_eos

tongyx361 · tongyx361 · commit 82377fc66af5 · 2024-08-24T10:14:35.000+08:00
diff --git a/README.md b/README.md
@@ -328,6 +328,9 @@ To reproduce other inference settings, just refer to the paper and
 modify the `--model_name_or_path` and `--gen_save_path` arguments
 accordingly.
 
+- We observed that Llama-3-8B(-Base) tends to decode EoS immediately
+  sometimes. Try use `--ignore_eos` as a workaround.
+
 For other general inference settings, please modify the command or
 directly modify the
 [script](https://github.com/hkust-nlp/dart-math/blob/main/pipeline/gen.py).
diff --git a/dart_math/eval.py b/dart_math/eval.py
@@ -126,17 +126,21 @@ def eq(self, ref: str, ans: str) -> bool:
 
     def extract_ans(self, resp_str: str) -> str:
         """Extract answer segment from complete `resp`."""
-
-        resp = self.extract_explicit_ans(resp_str)
-        if not self.strict_extract and resp is None:  # use the last number
-            pattern = r"-?\d*\.?\d+"
-            resp = re.findall(pattern, resp_str.replace(",", ""))
-            if len(resp) >= 1:
-                resp = resp[-1]
-            else:
-                resp = ""
-
-        return resp
+        ans = self.extract_explicit_ans(resp_str)
+        if ans is not None:
+            return ans
+        elif not self.strict_extract:
+            # Speculate with the last latex formula
+            matches = re.findall(
+                r"(?:\$|\\\(|\\\[)([^\$]+)(?:\$|\\\(|\\\[)", resp_str, re.DOTALL
+            )
+            if len(matches) > 0:
+                return matches[-1]
+            # Speculate with the last number
+            matches = re.findall(r"-?\d*\.?\d+", resp_str.replace(",", ""))
+            if len(matches) > 0:
+                return matches[-1]
+        return ""  # Empty str if no answer is found
 
     def extract_explicit_ans(self, resp_str: str) -> str:
         resp_str = self.clean_trailing(resp_str)
diff --git a/nbs/index.ipynb b/nbs/index.ipynb
@@ -350,6 +350,8 @@
     "\n",
     "To reproduce other inference settings, just refer to the paper and modify the `--model_name_or_path` and `--gen_save_path` arguments accordingly.\n",
     "\n",
+    "- We observed that Llama-3-8B(-Base) tends to decode EoS immediately sometimes. Try use `--ignore_eos` as a workaround.\n",
+    "\n",
     "For other general inference settings, please modify the command or directly modify the [script](https://github.com/hkust-nlp/dart-math/blob/main/pipeline/gen.py).\n",
     "\n",
     "- To test **base** models, please add the corresponding **ID** to `BASE_MODEL_IDS` from [dart_math.utils](https://github.com/hkust-nlp/dart-math/blob/main/dart_math/utils.py).\n",
diff --git a/pipeline/gen.ipynb b/pipeline/gen.ipynb
@@ -113,6 +113,13 @@
     ")\n",
     "\n",
     "parser.add_argument(\n",
+    "    \"--revision\",\n",
+    "    type=str,\n",
+    "    default=None,\n",
+    "    help=\"Model revision.\",\n",
+    ")\n",
+    "\n",
+    "parser.add_argument(\n",
     "    \"--dtype\",\n",
     "    type=str,\n",
     "    default=\"bfloat16\",\n",
@@ -148,6 +155,12 @@
     "    help=\"Maximum number of new tokens.\",\n",
     ")\n",
     "parser.add_argument(\n",
+    "    \"--ignore_eos\",\n",
+    "    action=\"store_true\",\n",
+    "    default=False,\n",
+    "    help=\"Ignore EOS token in generation. Llama-3-8B(-Base) tends to decode EoS immediately. Try this if you encounter this issue.\",\n",
+    ")\n",
+    "parser.add_argument(\n",
     "    \"--n_shots\",\n",
     "    type=int,\n",
     "    default=-1,\n",
@@ -342,7 +355,7 @@
     "    temperature=args.temperature,\n",
     "    top_p=args.top_p,\n",
     "    max_tokens=args.max_new_toks,\n",
-    "    ignore_eos=True,  # Llama-3-8B(-Base) tends to decode EoS immediately\n",
+    "    ignore_eos=args.ignore_eos,\n",
     "    skip_special_tokens=True,\n",
     "    seed=args.inf_seed,\n",
     ")"
@@ -421,7 +434,7 @@
    "source": [
     "llm = LLM(\n",
     "    model=args.model_name_or_path,\n",
-    "    tokenizer=args.model_name_or_path,\n",
+    "    revision=args.revision,\n",
     "    tensor_parallel_size=torch.cuda.device_count(),\n",
     "    dtype=args.dtype,\n",
     "    seed=args.inf_seed,\n",
diff --git a/pipeline/gen.py b/pipeline/gen.py
@@ -60,6 +60,13 @@
         help="HF-style model name or path.",
     )
 
+    parser.add_argument(
+        "--revision",
+        type=str,
+        default=None,
+        help="Model revision.",
+    )
+
     parser.add_argument(
         "--dtype",
         type=str,
@@ -95,6 +102,12 @@
         default=2048,
         help="Maximum number of new tokens.",
     )
+    parser.add_argument(
+        "--ignore_eos",
+        action="store_true",
+        default=False,
+        help="Ignore EOS token in generation. Llama-3-8B(-Base) tends to decode EoS immediately. Try this if you encounter this issue.",
+    )
     parser.add_argument(
         "--n_shots",
         type=int,
@@ -231,7 +244,7 @@
         temperature=args.temperature,
         top_p=args.top_p,
         max_tokens=args.max_new_toks,
-        ignore_eos=True,  # Llama-3-8B(-Base) tends to decode EoS immediately
+        ignore_eos=args.ignore_eos,
         skip_special_tokens=True,
         seed=args.inf_seed,
     )
@@ -244,7 +257,7 @@
 
     llm = LLM(
         model=args.model_name_or_path,
-        tokenizer=args.model_name_or_path,
+        revision=args.revision,
         tensor_parallel_size=torch.cuda.device_count(),
         dtype=args.dtype,
         seed=args.inf_seed,