Skip to content

Model features: native async #110

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 19 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 32 additions & 2 deletions libs/ai-endpoints/langchain_nvidia_ai_endpoints/_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@
)
from urllib.parse import urlparse, urlunparse

import httpx
import requests
from httpx import AsyncClient
from pydantic import (
BaseModel,
ConfigDict,
Expand Down Expand Up @@ -76,6 +78,7 @@ class _NVIDIAClient(BaseModel):
description="Path for polling after HTTP 202 responses",
)
get_session_fn: Callable = Field(requests.Session)
get_asession_fn: Callable = Field(AsyncClient)

api_key: Optional[SecretStr] = Field(
default_factory=lambda: SecretStr(
Expand All @@ -101,7 +104,7 @@ class _NVIDIAClient(BaseModel):
default={}, description="Last inputs sent over to the server"
)
last_response: Optional[Response] = Field(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why make this optional?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because, default value is None which makes it optional(fixing linting error actually)

None, description="Last response sent from the server"
Response(), description="Last response sent from the server"
)
headers_tmpl: dict = Field(
{
Expand Down Expand Up @@ -369,6 +372,26 @@ def _post(
self._try_raise(response)
return response, session

async def _apost(
self,
invoke_url: str,
payload: Optional[dict] = {},
extra_headers: dict = {},
) -> Tuple[httpx.Response, Any]:
"""Async Method for posting to the AI Foundation Model Function API."""
self.last_inputs = {
"url": invoke_url,
"headers": {
**self.headers_tmpl["call"],
**extra_headers,
},
"json": payload,
}
async with self.get_asession_fn() as session:
response = await session.post(**self.__add_authorization(self.last_inputs))
self._try_raise(response)
return response, session

def _get(
self,
invoke_url: str,
Expand Down Expand Up @@ -475,9 +498,16 @@ def get_req(
)
return self._wait(response, session)

async def aget_req(
self,
payload: dict = {},
extra_headers: dict = {},
) -> Tuple[httpx.Response, Any]:
return await self._apost(self.infer_url, payload, extra_headers=extra_headers)

def postprocess(
self,
response: Union[str, Response],
response: Union[str, Any[Response, httpx.Response]],
) -> Tuple[dict, bool]:
"""Parses a response from the AI Foundation Model Function API.
Strongly assumes that the API will return a single response.
Expand Down
27 changes: 27 additions & 0 deletions libs/ai-endpoints/langchain_nvidia_ai_endpoints/chat_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -389,6 +389,33 @@ def _generate(
generation = ChatGeneration(message=AIMessage(**parsed_response))
return ChatResult(generations=[generation], llm_output=responses)

async def _agenerate(
self,
messages: List[BaseMessage],
stop: Optional[List[str]] = None,
run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
**kwargs: Any,
) -> ChatResult:
inputs = [
message
for message in [convert_message_to_dict(message) for message in messages]
]
inputs, extra_headers = _process_for_vlm(inputs, self._client.model)
payload = self._get_payload(inputs=inputs, stop=stop, stream=False, **kwargs)
if payload.get("stream", False) is True:
payload = {**payload, "stream": False}
response, _ = await self._client.aget_req(
payload=payload, extra_headers=extra_headers
)
responses, _ = self._client.postprocess(response)
self._set_callback_out(responses, run_manager)
parsed_response = self._custom_postprocess(responses, streaming=False)
# for pre 0.2 compatibility w/ ChatMessage
# ChatMessage had a role property that was not present in AIMessage
parsed_response.update({"role": "assistant"})
generation = ChatGeneration(message=AIMessage(**responses))
return ChatResult(generations=[generation], llm_output=responses)

def _stream(
self,
messages: List[BaseMessage],
Expand Down
Loading
Loading