Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions .github/ISSUE_TEMPLATE/bug_report.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
---
name: Bug Report
about: Submit a report to help us improve.
labels: 'bug, needs triage'
---

**Describe the bug:**
A clear and concise description of what the bug is.

**Describe how to reproduce:**
Steps to reproduce the behavior. Ideally attach a minimal code sample to reproduce the described issue.

**Describe the expected behavior:**
A clear and concise description of what you expected to happen.

**Environment:**
At the very least, specify the versions of matrix, PyTorch, Python, and CUDA along with your operating system and, if relevant, GPU model.

**Additional Context:**
Add any other context about the bug here.
17 changes: 17 additions & 0 deletions .github/ISSUE_TEMPLATE/feature_request.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
---
name: Feature Request
about: Submit a request for a new feature.
labels: 'enhancement, needs triage'
---

**Is your feature request related to a problem? Please describe:**
A clear and concise description of what the problem is.

**Describe the solution you would like:**
A clear and concise description of what you want to happen.

**Describe the alternatives you have considered:**
A clear and concise description of any alternative solutions or features you have considered.

**Additional Context:**
Add any other context about the feature request here.
7 changes: 7 additions & 0 deletions .github/ISSUE_TEMPLATE/question.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
name: Question
about: Ask a question to the users and contributors.
labels: 'question, needs triage'
---

Please make sure that you first search existing issues and documentation before asking a question. If you cannot find an answer, be clear and concise. Ideally attach a minimal code sample if it is relevant to your question.
7 changes: 7 additions & 0 deletions .github/ISSUE_TEMPLATE/typo_doc_issue.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
name: Typo or Documentation Issue
about: Report a typo or an issue related to documentation.
labels: 'documentation, needs triage'
---

For typos, please go ahead; fix the typo and submit a PR. For documentation issues, please describe the issue here and wait for approval before submitting a PR.
File renamed without changes.
48 changes: 48 additions & 0 deletions matrix/app_server/llm/query_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -471,6 +471,54 @@ async def make_request(
}


def batch_requests(
url: tp.Union[str, tp.Callable[[], tp.Awaitable[str]]],
model: str,
requests: tp.List[tp.Dict[str, tp.Any]],
**kwargs,
) -> tp.List[tp.Dict[str, tp.Any]]:
"""
Process multiple requests by calling make_request for each.
This function works whether called from sync or async context.
"""

async def _process_requests():
"""Helper function to process all requests concurrently."""
return await asyncio.gather(
*[make_request(url, model, request, **kwargs) for request in requests]
)

# Get the event loop
try:
loop = asyncio.get_event_loop()
except RuntimeError:
# No event loop in this thread, create a new one
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)

# Check if we're already in an async context
if loop.is_running():
# We're in an async context and can't use run_until_complete
# Create a new thread to run our async code
import concurrent.futures
import threading

def run_in_new_loop():
# Create a new event loop for this thread
new_loop = asyncio.new_event_loop()
try:
return new_loop.run_until_complete(_process_requests())
finally:
new_loop.close()

# Run in an executor to avoid blocking the current event loop
with concurrent.futures.ThreadPoolExecutor() as pool:
return pool.submit(run_in_new_loop).result()
else:
# We're in a sync context, use the current loop
return loop.run_until_complete(_process_requests())


async def main(
url: tp.Union[str, tp.Callable[[], tp.Awaitable[str]]],
output_file: str,
Expand Down
16 changes: 7 additions & 9 deletions matrix/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -357,15 +357,13 @@ def check_health(
else:
if not use_chat:
data_payload = {"prompt": prompt}
response = asyncio.run(
query_llm.make_request(
metadata["endpoints"]["head"],
metadata["model_name"],
data_payload,
app_name=app_name,
**kwargs,
)
)
response = query_llm.batch_requests(
metadata["endpoints"]["head"],
metadata["model_name"],
[data_payload],
app_name=app_name,
**kwargs,
)[0]
print(response)
return "error" not in response["response"]

Expand Down
6 changes: 6 additions & 0 deletions matrix/cluster/ray_dashboard_job.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.

import logging
import os
import shutil
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ classifiers=[
dev = [
# Test
"pytest>=4.3.0",
"pytest-asyncio>=0.26.0",
"coverage[toml]>=5.1",
# Format
"black==24.10.0",
Expand Down
78 changes: 78 additions & 0 deletions tests/unit/query/test_batch_requests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.

import asyncio
from unittest.mock import MagicMock, patch

import pytest

import matrix
from matrix.app_server.llm import query_llm


def test_batch_requests_from_async_run():
"""Test batch_requests called from within an asyncio.run context."""
mock_response = "mocked_response"

async def mock_make_request_async(_url, _model, request):
return f"{mock_response}_{request}"

async def async_wrapper():
with patch(
"matrix.app_server.llm.query_llm.make_request",
side_effect=mock_make_request_async,
):
requests = [1, 2, 3]
# batch_requests should handle the async context internally
# and return a list directly, not a task
result = query_llm.batch_requests("", "", requests)

# Verify it returned a list, not a task
assert isinstance(result, list)
assert len(result) == 3
assert result == [
f"{mock_response}_1",
f"{mock_response}_2",
f"{mock_response}_3",
]

# Use asyncio.run to execute the async wrapper
asyncio.run(async_wrapper())


def test_batch_requests_in_sync_context():
"""Test batch_requests when called from a synchronous context."""
# Create a mock for make_request_async
mock_response = "mocked_response"

async def mock_make_request_async(_url, _model, request):
return f"{mock_response}_{request}"

with patch(
"matrix.app_server.llm.query_llm.make_request",
side_effect=mock_make_request_async,
):
# Test with a list of requests
requests = [1, 2, 3]
result = query_llm.batch_requests("", "", requests)

# Verify results
assert len(result) == 3
assert result == [
f"{mock_response}_1",
f"{mock_response}_2",
f"{mock_response}_3",
]


def test_batch_requests_empty_list():
"""Test batch_requests with an empty list."""
with patch("matrix.app_server.llm.query_llm.make_request") as mock_request:
result = query_llm.batch_requests("", "", [])
# make_request_async should not be called
mock_request.assert_not_called()
# Result should be an empty list
assert result == []