Fix GPU status request in sync flow (#4318)

Weves · web-flow · commit d123713c006f · 2025-03-21T11:11:00.000-07:00
* Fix GPU status request in sync flow

* tweak

* Fix test

* Fix more tests
diff --git a/backend/onyx/chat/answer.py b/backend/onyx/chat/answer.py
@@ -30,7 +30,7 @@
 from onyx.tools.tool_implementations.search.search_tool import QUERY_FIELD
 from onyx.tools.tool_implementations.search.search_tool import SearchTool
 from onyx.tools.utils import explicit_tool_calling_supported
-from onyx.utils.gpu_utils import gpu_status_request
+from onyx.utils.gpu_utils import fast_gpu_status_request
 from onyx.utils.logger import setup_logger
 
 logger = setup_logger()
@@ -88,7 +88,9 @@ def __init__(
             rerank_settings is not None
             and rerank_settings.rerank_provider_type is not None
         )
-        allow_agent_reranking = gpu_status_request() or using_cloud_reranking
+        allow_agent_reranking = (
+            fast_gpu_status_request(indexing=False) or using_cloud_reranking
+        )
 
         # TODO: this is a hack to force the query to be used for the search tool
         #       this should be removed once we fully unify graph inputs (i.e.
diff --git a/backend/onyx/setup.py b/backend/onyx/setup.py
@@ -324,7 +324,7 @@ def update_default_multipass_indexing(db_session: Session) -> None:
         logger.info(
             "No existing docs or connectors found. Checking GPU availability for multipass indexing."
         )
-        gpu_available = gpu_status_request()
+        gpu_available = gpu_status_request(indexing=True)
         logger.info(f"GPU available: {gpu_available}")
 
         current_settings = get_current_search_settings(db_session)
diff --git a/backend/onyx/utils/gpu_utils.py b/backend/onyx/utils/gpu_utils.py
@@ -1,3 +1,5 @@
+from functools import lru_cache
+
 import requests
 from retry import retry
 
@@ -10,8 +12,7 @@
 logger = setup_logger()
 
 
-@retry(tries=5, delay=5)
-def gpu_status_request(indexing: bool = True) -> bool:
+def _get_gpu_status_from_model_server(indexing: bool) -> bool:
     if indexing:
         model_server_url = f"{INDEXING_MODEL_SERVER_HOST}:{INDEXING_MODEL_SERVER_PORT}"
     else:
@@ -28,3 +29,14 @@ def gpu_status_request(indexing: bool = True) -> bool:
     except requests.RequestException as e:
         logger.error(f"Error: Unable to fetch GPU status. Error: {str(e)}")
         raise  # Re-raise exception to trigger a retry
+
+
+@retry(tries=5, delay=5)
+def gpu_status_request(indexing: bool) -> bool:
+    return _get_gpu_status_from_model_server(indexing)
+
+
+@lru_cache(maxsize=1)
+def fast_gpu_status_request(indexing: bool) -> bool:
+    """For use in sync flows, where we don't want to retry / we want to cache this."""
+    return gpu_status_request(indexing=indexing)
diff --git a/backend/tests/unit/onyx/chat/test_answer.py b/backend/tests/unit/onyx/chat/test_answer.py
@@ -50,7 +50,7 @@ def answer_instance(
     mocker: MockerFixture,
 ) -> Answer:
     mocker.patch(
-        "onyx.chat.answer.gpu_status_request",
+        "onyx.chat.answer.fast_gpu_status_request",
         return_value=True,
     )
     return _answer_fixture_impl(mock_llm, answer_style_config, prompt_config)
@@ -400,7 +400,7 @@ def test_no_slow_reranking(
     mocker: MockerFixture,
 ) -> None:
     mocker.patch(
-        "onyx.chat.answer.gpu_status_request",
+        "onyx.chat.answer.fast_gpu_status_request",
         return_value=gpu_enabled,
     )
     rerank_settings = (
diff --git a/backend/tests/unit/onyx/chat/test_skip_gen_ai.py b/backend/tests/unit/onyx/chat/test_skip_gen_ai.py
@@ -39,7 +39,7 @@ def test_skip_gen_ai_answer_generation_flag(
     mocker: MockerFixture,
 ) -> None:
     mocker.patch(
-        "onyx.chat.answer.gpu_status_request",
+        "onyx.chat.answer.fast_gpu_status_request",
         return_value=True,
     )
     question = config["question"]

Original file line number	Diff line number	Diff line change
`@@ -324,7 +324,7 @@ def update_default_multipass_indexing(db_session: Session) -> None:`
`324`	`324`	`logger.info(`
`325`	`325`	`"No existing docs or connectors found. Checking GPU availability for multipass indexing."`
`326`	`326`	`)`
`327`		`- gpu_available = gpu_status_request()`
	`327`	`+ gpu_available = gpu_status_request(indexing=True)`
`328`	`328`	`logger.info(f"GPU available: {gpu_available}")`
`329`	`329`
`330`	`330`	`current_settings = get_current_search_settings(db_session)`
Original file line number	Diff line number	Diff line change
`@@ -39,7 +39,7 @@ def test_skip_gen_ai_answer_generation_flag(`
`39`	`39`	`mocker: MockerFixture,`
`40`	`40`	`) -> None:`
`41`	`41`	`mocker.patch(`
`42`		`- "onyx.chat.answer.gpu_status_request",`
	`42`	`+ "onyx.chat.answer.fast_gpu_status_request",`
`43`	`43`	`return_value=True,`
`44`	`44`	`)`
`45`	`45`	`question = config["question"]`