langchain-ai · raspawar · Oct 18, 2024 · Oct 25, 2024 · Nov 21, 2024 · Nov 21, 2024
diff --git a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_common.py b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_common.py
@@ -19,7 +19,9 @@
 )
 from urllib.parse import urlparse, urlunparse
 
+import httpx
 import requests
+from httpx import AsyncClient
 from pydantic import (
     BaseModel,
     ConfigDict,
@@ -76,6 +78,7 @@ class _NVIDIAClient(BaseModel):
         description="Path for polling after HTTP 202 responses",
     )
     get_session_fn: Callable = Field(requests.Session)
+    get_asession_fn: Callable = Field(AsyncClient)
 
     api_key: Optional[SecretStr] = Field(
         default_factory=lambda: SecretStr(
@@ -101,7 +104,7 @@ class _NVIDIAClient(BaseModel):
         default={}, description="Last inputs sent over to the server"
     )
     last_response: Optional[Response] = Field(
-        None, description="Last response sent from the server"
+        Response(), description="Last response sent from the server"
     )
     headers_tmpl: dict = Field(
         {
@@ -369,6 +372,26 @@ def _post(
         self._try_raise(response)
         return response, session
 
+    async def _apost(
+        self,
+        invoke_url: str,
+        payload: Optional[dict] = {},
+        extra_headers: dict = {},
+    ) -> Tuple[httpx.Response, Any]:
+        """Async Method for posting to the AI Foundation Model Function API."""
+        self.last_inputs = {
+            "url": invoke_url,
+            "headers": {
+                **self.headers_tmpl["call"],
+                **extra_headers,
+            },
+            "json": payload,
+        }
+        async with self.get_asession_fn() as session:
+            response = await session.post(**self.__add_authorization(self.last_inputs))
+        self._try_raise(response)
+        return response, session
+
     def _get(
         self,
         invoke_url: str,
@@ -475,9 +498,16 @@ def get_req(
         )
         return self._wait(response, session)
 
+    async def aget_req(
+        self,
+        payload: dict = {},
+        extra_headers: dict = {},
+    ) -> Tuple[httpx.Response, Any]:
+        return await self._apost(self.infer_url, payload, extra_headers=extra_headers)
+
     def postprocess(
         self,
-        response: Union[str, Response],
+        response: Union[str, Any[Response, httpx.Response]],
     ) -> Tuple[dict, bool]:
         """Parses a response from the AI Foundation Model Function API.
         Strongly assumes that the API will return a single response.

diff --git a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/chat_models.py b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/chat_models.py
@@ -389,6 +389,33 @@ def _generate(
         generation = ChatGeneration(message=AIMessage(**parsed_response))
         return ChatResult(generations=[generation], llm_output=responses)
 
+    async def _agenerate(
+        self,
+        messages: List[BaseMessage],
+        stop: Optional[List[str]] = None,
+        run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
+        **kwargs: Any,
+    ) -> ChatResult:
+        inputs = [
+            message
+            for message in [convert_message_to_dict(message) for message in messages]
+        ]
+        inputs, extra_headers = _process_for_vlm(inputs, self._client.model)
+        payload = self._get_payload(inputs=inputs, stop=stop, stream=False, **kwargs)
+        if payload.get("stream", False) is True:
+            payload = {**payload, "stream": False}
+        response, _ = await self._client.aget_req(
+            payload=payload, extra_headers=extra_headers
+        )
+        responses, _ = self._client.postprocess(response)
+        self._set_callback_out(responses, run_manager)
+        parsed_response = self._custom_postprocess(responses, streaming=False)
+        # for pre 0.2 compatibility w/ ChatMessage
+        # ChatMessage had a role property that was not present in AIMessage
+        parsed_response.update({"role": "assistant"})
+        generation = ChatGeneration(message=AIMessage(**responses))
+        return ChatResult(generations=[generation], llm_output=responses)
+
     def _stream(
         self,
         messages: List[BaseMessage],