langchain-ai · Soumil32 · Aug 7, 2024 · Aug 7, 2024 · Aug 7, 2024 · Aug 7, 2024
diff --git a/libs/partners/huggingface/langchain_huggingface/chat_models/huggingface.py b/libs/partners/huggingface/langchain_huggingface/chat_models/huggingface.py
@@ -1,43 +1,44 @@
 """Hugging Face Chat Wrapper."""
 
 from dataclasses import dataclass
+from transformers.pipelines.text_generation import Chat
 from typing import (
     Any,
     Callable,
    Dict,
    List,
    Literal,
    Optional,
    Sequence,
    Type,
    Union,
    cast,
 )

 from langchain_core.callbacks.manager import (
    AsyncCallbackManagerForLLMRun,
    CallbackManagerForLLMRun,
 )
 from langchain_core.language_models import LanguageModelInput
 from langchain_core.language_models.chat_models import BaseChatModel
 from langchain_core.messages import (
    AIMessage,
    BaseMessage,
    ChatMessage,
    HumanMessage,
    SystemMessage,
    ToolMessage,
 )
 from langchain_core.outputs import ChatGeneration, ChatResult, LLMResult
 from langchain_core.pydantic_v1 import root_validator
 from langchain_core.runnables import Runnable
 from langchain_core.tools import BaseTool
 from langchain_core.utils.function_calling import convert_to_openai_tool

 from langchain_huggingface.llms.huggingface_endpoint import HuggingFaceEndpoint
 from langchain_huggingface.llms.huggingface_pipeline import HuggingFacePipeline

 DEFAULT_SYSTEM_PROMPT = """You are a helpful, respectful, and honest assistant."""


 @dataclass
@@ -356,7 +357,7 @@
     def _generate(
         self,
         messages: List[BaseMessage],
-        stop: Optional[List[str]] = None,
+        stop: Optional[List[Chat]] = None,
         run_manager: Optional[CallbackManagerForLLMRun] = None,
         **kwargs: Any,
     ) -> ChatResult:
@@ -406,9 +407,7 @@
 
         messages_dicts = [self._to_chatml_format(m) for m in messages]
 
-        return self.tokenizer.apply_chat_template(
-            messages_dicts, tokenize=False, add_generation_prompt=True
-        )
+        return messages_dicts
 
     def _to_chatml_format(self, message: BaseMessage) -> dict:
         """Convert LangChain message to ChatML format."""

diff --git a/libs/partners/huggingface/langchain_huggingface/llms/huggingface_pipeline.py b/libs/partners/huggingface/langchain_huggingface/llms/huggingface_pipeline.py
@@ -8,6 +8,7 @@
 from langchain_core.language_models.llms import BaseLLM
 from langchain_core.outputs import Generation, GenerationChunk, LLMResult
 from langchain_core.pydantic_v1 import Extra
+from transformers.pipelines.text_generation import ReturnType
 
 DEFAULT_MODEL_ID = "gpt2"
 DEFAULT_TASK = "text-generation"
@@ -253,7 +254,7 @@
 
     def _generate(
         self,
-        prompts: List[str],
+        prompts: List[List[dict[str, str]]], # List of prompts in the ChatML format e.g {"role": "user", "content": "Hello, how are you?"}
         stop: Optional[List[str]] = None,
         run_manager: Optional[CallbackManagerForLLMRun] = None,
         **kwargs: Any,
@@ -269,6 +270,7 @@
             # Process batch of prompts
             responses = self.pipeline(
                 batch_prompts,
+                return_full_text=False,
                 **pipeline_kwargs,
             )
 
@@ -294,7 +296,8 @@
                 if skip_prompt:
                     text = text[len(batch_prompts[j]) :]
                 # Append the processed text to results
-                text_generations.append(text)
+                # The 'text' variable is in the ChatMl format so we get the last message (just gerenated by the model) and access the text content
+                text_generations.append(text) 
 
         return LLMResult(
             generations=[[Generation(text=text)] for text in text_generations]