fix: chat template for local model (#72)

akristing22 · erinin · finitearth · web-flow · commit 15272b279d12 · 2026-05-23T11:11:02.000+02:00
* fix: chat template for local model

* added regression test

* added regression test

* remove tasks

* remove tasks

* Potential fix for pull request finding

Co-authored-by: Copilot Autofix powered by AI &lt;175728472+Copilot@users.noreply.github.com&gt;

* resolving precommit

* replaced gated models for ungated models

* alles raus was keine miete zahlt

---------

Co-authored-by: erinin &lt;erinin@altara.zitis.lan&gt;
Co-authored-by: finitearth &lt;t.zehle@gmail.com&gt;
Co-authored-by: Copilot Autofix powered by AI &lt;175728472+Copilot@users.noreply.github.com&gt;
diff --git a/.gitignore b/.gitignore
@@ -12,3 +12,4 @@ poetry.lock
 CLAUDE.md
 **/CLAUDE.local.md
 .mypy_cache/
+token.txt
diff --git a/promptolution/llms/local_llm.py b/promptolution/llms/local_llm.py
@@ -79,7 +79,7 @@ def _get_response(self, prompts: List[str], system_prompts: List[str]) -> List[s
         """
         inputs: List[List[Dict[str, str]]] = []
         for prompt, sys_prompt in zip(prompts, system_prompts):
-            inputs.append([{"role": "system", "prompt": sys_prompt}, {"role": "user", "prompt": prompt}])
+            inputs.append([{"role": "system", "content": sys_prompt}, {"role": "user", "content": prompt}])
 
         with torch.no_grad():
             response = self.pipeline(inputs, pad_token_id=self.eos_token_id)
diff --git a/pyproject.toml b/pyproject.toml
@@ -52,8 +52,9 @@ pytest = ">=8.3.5"
 pytest-cov = ">=6.1.1"
 openai = ">=1.0.0"
 requests = ">=2.31.0"
-vllm = ">=0.13.0"
 transformers = ">=4.48.0"
+vllm = ">=0.13.0"
+torch = ">=2.0.0"
 
 [tool.poetry.group.docs.dependencies]
 mkdocs = ">=1.6.1"
diff --git a/tests/llms/test_local_llm.py b/tests/llms/test_local_llm.py
@@ -1,6 +1,7 @@
 from unittest.mock import MagicMock, patch
 
 import pytest
+from transformers import AutoTokenizer
 
 from promptolution.llms import LocalLLM
 
@@ -67,3 +68,44 @@ def test_local_llm_get_response(mock_local_dependencies):
     assert len(responses) == 2
     assert responses[0] == "Mock response 1"
     assert responses[1] == "Mock response 2"
+
+
+@pytest.mark.parametrize(
+    "model_id",
+    [
+        "Qwen/Qwen2.5-0.5B-Instruct",
+        "HuggingFaceTB/SmolLM2-135M-Instruct",
+        "microsoft/Phi-3.5-mini-instruct",
+        "mistralai/Mistral-Nemo-Instruct-2407",
+    ],
+)
+def test_local_llm_chat_template_renders(model_id):
+    """Regression for #71: message dicts must use 'content' key so the
+    tokenizer's chat template renders the system and user text."""
+    tokenizer = AutoTokenizer.from_pretrained(model_id)
+
+    with patch("promptolution.llms.local_llm.pipeline") as mock_pipeline_func, patch(
+        "promptolution.llms.local_llm.torch"
+    ):
+        mock_pipeline_obj = MagicMock()
+        mock_pipeline_obj.tokenizer = tokenizer
+        mock_pipeline_func.return_value = mock_pipeline_obj
+
+        def fake_call(inputs, **_):
+            return [
+                [{"generated_text": tokenizer.apply_chat_template(msg, tokenize=False, add_generation_prompt=True)}]
+                for msg in inputs
+            ]
+
+        mock_pipeline_obj.side_effect = fake_call
+
+        local_llm = LocalLLM(model_id=model_id, batch_size=2)
+        prompts = ["What is 2 + 2?", "Name a colour."]
+        sys_prompts = ["You are a math tutor.", "You are concise."]
+
+        responses = local_llm._get_response(prompts, system_prompts=sys_prompts)
+
+        assert len(responses) == 2
+        for response, prompt, sys_prompt in zip(responses, prompts, sys_prompts):
+            assert prompt in response
+            assert sys_prompt in response