unslothai · LudWittg · Feb 26, 2025
diff --git a/nb/Kaggle-Llama3.1_(8B)-GRPO.ipynb b/nb/Kaggle-Llama3.1_(8B)-GRPO.ipynb
@@ -336,14 +336,14 @@
     "    \"\"\"Reward function that checks if the completion has a specific format.\"\"\"\n",
     "    pattern = r\"^<reasoning>\\n.*?\\n</reasoning>\\n<answer>\\n.*?\\n</answer>\\n$\"\n",
     "    responses = [completion[0][\"content\"] for completion in completions]\n",
-    "    matches = [re.match(pattern, r) for r in responses]\n",
+    "    [re.match(pattern, r, flags=re.DOTALL) for r in responses]\n",
     "    return [0.5 if match else 0.0 for match in matches]\n",
     "\n",
     "def soft_format_reward_func(completions, **kwargs) -> list[float]:\n",
     "    \"\"\"Reward function that checks if the completion has a specific format.\"\"\"\n",
     "    pattern = r\"<reasoning>.*?</reasoning>\\s*<answer>.*?</answer>\"\n",
     "    responses = [completion[0][\"content\"] for completion in completions]\n",
-    "    matches = [re.match(pattern, r) for r in responses]\n",
+    "    [re.match(pattern, r, flags=re.DOTALL) for r in responses]\n",
     "    return [0.5 if match else 0.0 for match in matches]\n",
     "\n",
     "def count_xml(text) -> float:\n",

diff --git a/nb/Kaggle-Phi_4_(14B)-GRPO.ipynb b/nb/Kaggle-Phi_4_(14B)-GRPO.ipynb
@@ -339,14 +339,14 @@
     "    \"\"\"Reward function that checks if the completion has a specific format.\"\"\"\n",
     "    pattern = r\"^<reasoning>\\n.*?\\n</reasoning>\\n<answer>\\n.*?\\n</answer>\\n$\"\n",
     "    responses = [completion[0][\"content\"] for completion in completions]\n",
-    "    matches = [re.match(pattern, r) for r in responses]\n",
+    "    [re.match(pattern, r, flags=re.DOTALL) for r in responses]\n",
     "    return [0.5 if match else 0.0 for match in matches]\n",
     "\n",
     "def soft_format_reward_func(completions, **kwargs) -> list[float]:\n",
     "    \"\"\"Reward function that checks if the completion has a specific format.\"\"\"\n",
     "    pattern = r\"<reasoning>.*?</reasoning>\\s*<answer>.*?</answer>\"\n",
     "    responses = [completion[0][\"content\"] for completion in completions]\n",
-    "    matches = [re.match(pattern, r) for r in responses]\n",
+    "    [re.match(pattern, r, flags=re.DOTALL) for r in responses]\n",
     "    return [0.5 if match else 0.0 for match in matches]\n",
     "\n",
     "def count_xml(text) -> float:\n",

diff --git a/nb/Kaggle-Qwen2.5_(3B)-GRPO.ipynb b/nb/Kaggle-Qwen2.5_(3B)-GRPO.ipynb
@@ -857,14 +857,14 @@
     "    \"\"\"Reward function that checks if the completion has a specific format.\"\"\"\n",
     "    pattern = r\"^<reasoning>\\n.*?\\n</reasoning>\\n<answer>\\n.*?\\n</answer>\\n$\"\n",
     "    responses = [completion[0][\"content\"] for completion in completions]\n",
-    "    matches = [re.match(pattern, r) for r in responses]\n",
+    "    [re.match(pattern, r, flags=re.DOTALL) for r in responses]\n",
     "    return [0.5 if match else 0.0 for match in matches]\n",
     "\n",
     "def soft_format_reward_func(completions, **kwargs) -> list[float]:\n",
     "    \"\"\"Reward function that checks if the completion has a specific format.\"\"\"\n",
     "    pattern = r\"<reasoning>.*?</reasoning>\\s*<answer>.*?</answer>\"\n",
     "    responses = [completion[0][\"content\"] for completion in completions]\n",
-    "    matches = [re.match(pattern, r) for r in responses]\n",
+    "    [re.match(pattern, r, flags=re.DOTALL) for r in responses]\n",
     "    return [0.5 if match else 0.0 for match in matches]\n",
     "\n",
     "def count_xml(text) -> float:\n",

diff --git a/nb/Llama3.1_(8B)-GRPO.ipynb b/nb/Llama3.1_(8B)-GRPO.ipynb
@@ -336,14 +336,14 @@
     "    \"\"\"Reward function that checks if the completion has a specific format.\"\"\"\n",
     "    pattern = r\"^<reasoning>\\n.*?\\n</reasoning>\\n<answer>\\n.*?\\n</answer>\\n$\"\n",
     "    responses = [completion[0][\"content\"] for completion in completions]\n",
-    "    matches = [re.match(pattern, r) for r in responses]\n",
+    "    [re.match(pattern, r, flags=re.DOTALL) for r in responses]\n",
     "    return [0.5 if match else 0.0 for match in matches]\n",
     "\n",
     "def soft_format_reward_func(completions, **kwargs) -> list[float]:\n",
     "    \"\"\"Reward function that checks if the completion has a specific format.\"\"\"\n",
     "    pattern = r\"<reasoning>.*?</reasoning>\\s*<answer>.*?</answer>\"\n",
     "    responses = [completion[0][\"content\"] for completion in completions]\n",
-    "    matches = [re.match(pattern, r) for r in responses]\n",
+    "    [re.match(pattern, r, flags=re.DOTALL) for r in responses]\n",
     "    return [0.5 if match else 0.0 for match in matches]\n",
     "\n",
     "def count_xml(text) -> float:\n",

diff --git a/nb/Phi_4_(14B)-GRPO.ipynb b/nb/Phi_4_(14B)-GRPO.ipynb
@@ -339,14 +339,14 @@
     "    \"\"\"Reward function that checks if the completion has a specific format.\"\"\"\n",
     "    pattern = r\"^<reasoning>\\n.*?\\n</reasoning>\\n<answer>\\n.*?\\n</answer>\\n$\"\n",
     "    responses = [completion[0][\"content\"] for completion in completions]\n",
-    "    matches = [re.match(pattern, r) for r in responses]\n",
+    "    [re.match(pattern, r, flags=re.DOTALL) for r in responses]\n",
     "    return [0.5 if match else 0.0 for match in matches]\n",
     "\n",
     "def soft_format_reward_func(completions, **kwargs) -> list[float]:\n",
     "    \"\"\"Reward function that checks if the completion has a specific format.\"\"\"\n",
     "    pattern = r\"<reasoning>.*?</reasoning>\\s*<answer>.*?</answer>\"\n",
     "    responses = [completion[0][\"content\"] for completion in completions]\n",
-    "    matches = [re.match(pattern, r) for r in responses]\n",
+    "    [re.match(pattern, r, flags=re.DOTALL) for r in responses]\n",
     "    return [0.5 if match else 0.0 for match in matches]\n",
     "\n",
     "def count_xml(text) -> float:\n",

diff --git a/nb/Qwen2.5_(3B)-GRPO.ipynb b/nb/Qwen2.5_(3B)-GRPO.ipynb
@@ -857,14 +857,14 @@
     "    \"\"\"Reward function that checks if the completion has a specific format.\"\"\"\n",
     "    pattern = r\"^<reasoning>\\n.*?\\n</reasoning>\\n<answer>\\n.*?\\n</answer>\\n$\"\n",
     "    responses = [completion[0][\"content\"] for completion in completions]\n",
-    "    matches = [re.match(pattern, r) for r in responses]\n",
+    "    [re.match(pattern, r, flags=re.DOTALL) for r in responses]\n",
     "    return [0.5 if match else 0.0 for match in matches]\n",
     "\n",
     "def soft_format_reward_func(completions, **kwargs) -> list[float]:\n",
     "    \"\"\"Reward function that checks if the completion has a specific format.\"\"\"\n",
     "    pattern = r\"<reasoning>.*?</reasoning>\\s*<answer>.*?</answer>\"\n",
     "    responses = [completion[0][\"content\"] for completion in completions]\n",
-    "    matches = [re.match(pattern, r) for r in responses]\n",
+    "    [re.match(pattern, r, flags=re.DOTALL) for r in responses]\n",
     "    return [0.5 if match else 0.0 for match in matches]\n",
     "\n",
     "def count_xml(text) -> float:\n",