Skip to content

Commit d123713

Browse files
authored
Fix GPU status request in sync flow (#4318)
* Fix GPU status request in sync flow * tweak * Fix test * Fix more tests
1 parent 775c847 commit d123713

File tree

5 files changed

+22
-8
lines changed

5 files changed

+22
-8
lines changed

backend/onyx/chat/answer.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
from onyx.tools.tool_implementations.search.search_tool import QUERY_FIELD
3131
from onyx.tools.tool_implementations.search.search_tool import SearchTool
3232
from onyx.tools.utils import explicit_tool_calling_supported
33-
from onyx.utils.gpu_utils import gpu_status_request
33+
from onyx.utils.gpu_utils import fast_gpu_status_request
3434
from onyx.utils.logger import setup_logger
3535

3636
logger = setup_logger()
@@ -88,7 +88,9 @@ def __init__(
8888
rerank_settings is not None
8989
and rerank_settings.rerank_provider_type is not None
9090
)
91-
allow_agent_reranking = gpu_status_request() or using_cloud_reranking
91+
allow_agent_reranking = (
92+
fast_gpu_status_request(indexing=False) or using_cloud_reranking
93+
)
9294

9395
# TODO: this is a hack to force the query to be used for the search tool
9496
# this should be removed once we fully unify graph inputs (i.e.

backend/onyx/setup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -324,7 +324,7 @@ def update_default_multipass_indexing(db_session: Session) -> None:
324324
logger.info(
325325
"No existing docs or connectors found. Checking GPU availability for multipass indexing."
326326
)
327-
gpu_available = gpu_status_request()
327+
gpu_available = gpu_status_request(indexing=True)
328328
logger.info(f"GPU available: {gpu_available}")
329329

330330
current_settings = get_current_search_settings(db_session)

backend/onyx/utils/gpu_utils.py

+14-2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from functools import lru_cache
2+
13
import requests
24
from retry import retry
35

@@ -10,8 +12,7 @@
1012
logger = setup_logger()
1113

1214

13-
@retry(tries=5, delay=5)
14-
def gpu_status_request(indexing: bool = True) -> bool:
15+
def _get_gpu_status_from_model_server(indexing: bool) -> bool:
1516
if indexing:
1617
model_server_url = f"{INDEXING_MODEL_SERVER_HOST}:{INDEXING_MODEL_SERVER_PORT}"
1718
else:
@@ -28,3 +29,14 @@ def gpu_status_request(indexing: bool = True) -> bool:
2829
except requests.RequestException as e:
2930
logger.error(f"Error: Unable to fetch GPU status. Error: {str(e)}")
3031
raise # Re-raise exception to trigger a retry
32+
33+
34+
@retry(tries=5, delay=5)
35+
def gpu_status_request(indexing: bool) -> bool:
36+
return _get_gpu_status_from_model_server(indexing)
37+
38+
39+
@lru_cache(maxsize=1)
40+
def fast_gpu_status_request(indexing: bool) -> bool:
41+
"""For use in sync flows, where we don't want to retry / we want to cache this."""
42+
return gpu_status_request(indexing=indexing)

backend/tests/unit/onyx/chat/test_answer.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ def answer_instance(
5050
mocker: MockerFixture,
5151
) -> Answer:
5252
mocker.patch(
53-
"onyx.chat.answer.gpu_status_request",
53+
"onyx.chat.answer.fast_gpu_status_request",
5454
return_value=True,
5555
)
5656
return _answer_fixture_impl(mock_llm, answer_style_config, prompt_config)
@@ -400,7 +400,7 @@ def test_no_slow_reranking(
400400
mocker: MockerFixture,
401401
) -> None:
402402
mocker.patch(
403-
"onyx.chat.answer.gpu_status_request",
403+
"onyx.chat.answer.fast_gpu_status_request",
404404
return_value=gpu_enabled,
405405
)
406406
rerank_settings = (

backend/tests/unit/onyx/chat/test_skip_gen_ai.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ def test_skip_gen_ai_answer_generation_flag(
3939
mocker: MockerFixture,
4040
) -> None:
4141
mocker.patch(
42-
"onyx.chat.answer.gpu_status_request",
42+
"onyx.chat.answer.fast_gpu_status_request",
4343
return_value=True,
4444
)
4545
question = config["question"]

0 commit comments

Comments
 (0)