fix(huggingface): pass llm params to ChatHuggingFace (#32368)

azibek · mdrxy · Copilot · web-flow · commit d8b94007c13e · 2025-11-07T14:29:15.000-05:00
This PR fixes #32234 and improves HuggingFace chat model integration by: Ensuring ChatHuggingFace inherits key parameters (temperature, max_tokens, top_p, streaming, etc.) from the underlying LLM when not explicitly set. Adding and updating unit tests to verify property inheritance. No breaking changes; these updates enhance reliability and maintainability. --------- Co-authored-by: Mason Daugherty <mason@langchain.dev> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: Mason Daugherty <github@mdrxy.com>
diff --git a/libs/partners/huggingface/langchain_huggingface/chat_models/huggingface.py b/libs/partners/huggingface/langchain_huggingface/chat_models/huggingface.py
@@ -513,8 +513,57 @@ class GetPopulation(BaseModel):
 
     def __init__(self, **kwargs: Any):
         super().__init__(**kwargs)
+
+        # Inherit properties from the LLM if they weren't explicitly set
+        self._inherit_llm_properties()
+
         self._resolve_model_id()
 
+    def _inherit_llm_properties(self) -> None:
+        """Inherit properties from the wrapped LLM instance if not explicitly set."""
+        if not hasattr(self, "llm") or self.llm is None:
+            return
+
+        # Map of ChatHuggingFace properties to LLM properties
+        property_mappings = {
+            "temperature": "temperature",
+            "max_tokens": "max_new_tokens",  # Different naming convention
+            "top_p": "top_p",
+            "seed": "seed",
+            "streaming": "streaming",
+            "stop": "stop_sequences",
+        }
+
+        # Inherit properties from LLM and not explicitly set here
+        for chat_prop, llm_prop in property_mappings.items():
+            if hasattr(self.llm, llm_prop):
+                llm_value = getattr(self.llm, llm_prop)
+                chat_value = getattr(self, chat_prop, None)
+                if not chat_value and llm_value:
+                    setattr(self, chat_prop, llm_value)
+
+        # Handle special cases for HuggingFaceEndpoint
+        if _is_huggingface_endpoint(self.llm):
+            # Inherit additional HuggingFaceEndpoint specific properties
+            endpoint_mappings = {
+                "frequency_penalty": "repetition_penalty",
+            }
+
+            for chat_prop, llm_prop in endpoint_mappings.items():
+                if hasattr(self.llm, llm_prop):
+                    llm_value = getattr(self.llm, llm_prop)
+                    chat_value = getattr(self, chat_prop, None)
+                    if chat_value is None and llm_value is not None:
+                        setattr(self, chat_prop, llm_value)
+
+        # Inherit model_kwargs if not explicitly set
+        if (
+            not self.model_kwargs
+            and hasattr(self.llm, "model_kwargs")
+            and isinstance(self.llm.model_kwargs, dict)
+        ):
+            self.model_kwargs = self.llm.model_kwargs.copy()
+
     @model_validator(mode="after")
     def validate_llm(self) -> Self:
         if (
diff --git a/libs/partners/huggingface/tests/unit_tests/test_chat_models.py b/libs/partners/huggingface/tests/unit_tests/test_chat_models.py
@@ -23,6 +23,17 @@
 def mock_llm() -> Mock:
     llm = Mock(spec=HuggingFaceEndpoint)
     llm.inference_server_url = "test endpoint url"
+    llm.temperature = 0.7
+    llm.max_new_tokens = 512
+    llm.top_p = 0.9
+    llm.seed = 42
+    llm.streaming = True
+    llm.repetition_penalty = 1.1
+    llm.stop_sequences = ["</s>", "<|end|>"]
+    llm.model_kwargs = {"do_sample": True, "top_k": 50}
+    llm.server_kwargs = {"timeout": 120}
+    llm.repo_id = "test/model"
+    llm.model = "test/model"
     return llm
 
 
@@ -209,3 +220,108 @@ def test_bind_tools(chat_hugging_face: Any) -> None:
         _, kwargs = mock_super_bind.call_args
         assert kwargs["tools"] == tools
         assert kwargs["tool_choice"] == "auto"
+
+
+def test_property_inheritance_integration(chat_hugging_face: Any) -> None:
+    """Test that ChatHuggingFace inherits params from LLM object."""
+    assert getattr(chat_hugging_face, "temperature", None) == 0.7
+    assert getattr(chat_hugging_face, "max_tokens", None) == 512
+    assert getattr(chat_hugging_face, "top_p", None) == 0.9
+    assert getattr(chat_hugging_face, "streaming", None) is True
+
+
+def test_default_params_includes_inherited_values(chat_hugging_face: Any) -> None:
+    """Test that _default_params includes inherited max_tokens from max_new_tokens."""
+    params = chat_hugging_face._default_params
+    assert params["max_tokens"] == 512  # inherited from LLM's max_new_tokens
+    assert params["temperature"] == 0.7  # inherited from LLM's temperature
+    assert params["stream"] is True  # inherited from LLM's streaming
+
+
+def test_create_message_dicts_includes_inherited_params(chat_hugging_face: Any) -> None:
+    """Test that _create_message_dicts includes inherited parameters in API call."""
+    messages = [HumanMessage(content="test message")]
+    message_dicts, params = chat_hugging_face._create_message_dicts(messages, None)
+
+    # Verify inherited parameters are included
+    assert params["max_tokens"] == 512
+    assert params["temperature"] == 0.7
+    assert params["stream"] is True
+
+    # Verify message conversion
+    assert len(message_dicts) == 1
+    assert message_dicts[0]["role"] == "user"
+    assert message_dicts[0]["content"] == "test message"
+
+
+def test_model_kwargs_inheritance(mock_llm: Any) -> None:
+    """Test that model_kwargs are inherited when not explicitly set."""
+    with patch(
+        "langchain_huggingface.chat_models.huggingface.ChatHuggingFace._resolve_model_id"
+    ):
+        chat = ChatHuggingFace(llm=mock_llm)
+        assert chat.model_kwargs == {"do_sample": True, "top_k": 50}
+
+
+def test_huggingface_endpoint_specific_inheritance(mock_llm: Any) -> None:
+    """Test HuggingFaceEndpoint specific parameter inheritance."""
+    with (
+        patch(
+            "langchain_huggingface.chat_models.huggingface.ChatHuggingFace._resolve_model_id"
+        ),
+        patch(
+            "langchain_huggingface.chat_models.huggingface._is_huggingface_endpoint",
+            return_value=True,
+        ),
+    ):
+        chat = ChatHuggingFace(llm=mock_llm)
+        assert (
+            getattr(chat, "frequency_penalty", None) == 1.1
+        )  # from repetition_penalty
+
+
+def test_parameter_precedence_explicit_over_inherited(mock_llm: Any) -> None:
+    """Test that explicitly set parameters take precedence over inherited ones."""
+    with patch(
+        "langchain_huggingface.chat_models.huggingface.ChatHuggingFace._resolve_model_id"
+    ):
+        # Explicitly set max_tokens to override inheritance
+        chat = ChatHuggingFace(llm=mock_llm, max_tokens=256, temperature=0.5)
+        assert chat.max_tokens == 256  # explicit value, not inherited 512
+        assert chat.temperature == 0.5  # explicit value, not inherited 0.7
+
+
+def test_inheritance_with_no_llm_properties(mock_llm: Any) -> None:
+    """Test inheritance when LLM doesn't have expected properties."""
+    # Remove some properties from mock
+    del mock_llm.temperature
+    del mock_llm.top_p
+
+    with patch(
+        "langchain_huggingface.chat_models.huggingface.ChatHuggingFace._resolve_model_id"
+    ):
+        chat = ChatHuggingFace(llm=mock_llm)
+        # Should still inherit available properties
+        assert chat.max_tokens == 512  # max_new_tokens still available
+        # Missing properties should remain None/default
+        assert getattr(chat, "temperature", None) is None
+        assert getattr(chat, "top_p", None) is None
+
+
+def test_inheritance_with_empty_llm() -> None:
+    """Test that inheritance handles LLM with no relevant attributes gracefully."""
+    with patch(
+        "langchain_huggingface.chat_models.huggingface.ChatHuggingFace._resolve_model_id"
+    ):
+        # Create a minimal mock LLM that passes validation but has no
+        # inheritance attributes
+        empty_llm = Mock(spec=HuggingFaceEndpoint)
+        empty_llm.repo_id = "test/model"
+        empty_llm.model = "test/model"
+        # Mock doesn't have the inheritance attributes by default
+
+        chat = ChatHuggingFace(llm=empty_llm)
+        # Properties should remain at their default values when LLM has no
+        # relevant attrs
+        assert chat.max_tokens is None
+        assert chat.temperature is None