Skip to content

Model features: native async #110

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 19 commits into
base: main
Choose a base branch
from
Open
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions libs/ai-endpoints/tests/integration_tests/test_chat_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
HumanMessage,
SystemMessage,
)
from langchain_core.outputs import ChatGeneration, LLMResult

from langchain_nvidia_ai_endpoints.chat_models import ChatNVIDIA

Expand Down Expand Up @@ -441,3 +442,33 @@ def test_stop(
assert isinstance(token.content, str)
result += f"{token.content}|"
assert all(target not in result for target in targets)


def test_generate(chat_model: str, mode: dict) -> None:
"""Test generate method of anthropic."""
chat = ChatNVIDIA(model=chat_model, **mode) # type: ignore[call-arg]
chat_messages: List[List[BaseMessage]] = [
[HumanMessage(content="How many toes do dogs have?")]
]
messages_copy = [messages.copy() for messages in chat_messages]
result: LLMResult = chat.generate(chat_messages)
assert isinstance(result, LLMResult)
for response in result.generations[0]:
assert isinstance(response, ChatGeneration)
assert isinstance(response.text, str)
assert response.text == response.message.content
assert chat_messages == messages_copy


# @pytest.mark.scheduled
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should this be commented or not?

async def test_async_generate(chat_model: str, mode: dict) -> None:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this will pass even if agenerate() is implemented without truly async communication w/ the server.

add a unit text that check that async generation requests are interleaved. for inspiration...

    async def afetch_data(url: str) -> str:
        async with httpx.AsyncClient() as client:
            return (await client.get(url)).text
    async def amock_response(request):
        await asyncio.sleep(1)
        return httpx.Response(200, text="Hello world!")

    start_time = time.time()

    httpx_mock.add_callback(amock_response, is_reusable=True)
    task1, task2 = afetch_data("http://example.com"), afetch_data("http://example.com")
    _, _ = await asyncio.gather(task1, task2)

    assert (time.time() - start_time) < 2, "Tasks did not run concurrently"

"""Test async generation."""
llm = ChatNVIDIA(model=chat_model, **mode)
message = HumanMessage(content="Hello")
response = await llm.agenerate([[message]])
assert isinstance(response, LLMResult)
for generations in response.generations:
for generation in generations:
assert isinstance(generation, ChatGeneration)
assert isinstance(generation.text, str)
assert generation.text == generation.message.content
Loading