File tree 5 files changed +22
-8
lines changed
5 files changed +22
-8
lines changed Original file line number Diff line number Diff line change 30
30
from onyx .tools .tool_implementations .search .search_tool import QUERY_FIELD
31
31
from onyx .tools .tool_implementations .search .search_tool import SearchTool
32
32
from onyx .tools .utils import explicit_tool_calling_supported
33
- from onyx .utils .gpu_utils import gpu_status_request
33
+ from onyx .utils .gpu_utils import fast_gpu_status_request
34
34
from onyx .utils .logger import setup_logger
35
35
36
36
logger = setup_logger ()
@@ -88,7 +88,9 @@ def __init__(
88
88
rerank_settings is not None
89
89
and rerank_settings .rerank_provider_type is not None
90
90
)
91
- allow_agent_reranking = gpu_status_request () or using_cloud_reranking
91
+ allow_agent_reranking = (
92
+ fast_gpu_status_request (indexing = False ) or using_cloud_reranking
93
+ )
92
94
93
95
# TODO: this is a hack to force the query to be used for the search tool
94
96
# this should be removed once we fully unify graph inputs (i.e.
Original file line number Diff line number Diff line change @@ -324,7 +324,7 @@ def update_default_multipass_indexing(db_session: Session) -> None:
324
324
logger .info (
325
325
"No existing docs or connectors found. Checking GPU availability for multipass indexing."
326
326
)
327
- gpu_available = gpu_status_request ()
327
+ gpu_available = gpu_status_request (indexing = True )
328
328
logger .info (f"GPU available: { gpu_available } " )
329
329
330
330
current_settings = get_current_search_settings (db_session )
Original file line number Diff line number Diff line change
1
+ from functools import lru_cache
2
+
1
3
import requests
2
4
from retry import retry
3
5
10
12
logger = setup_logger ()
11
13
12
14
13
- @retry (tries = 5 , delay = 5 )
14
- def gpu_status_request (indexing : bool = True ) -> bool :
15
+ def _get_gpu_status_from_model_server (indexing : bool ) -> bool :
15
16
if indexing :
16
17
model_server_url = f"{ INDEXING_MODEL_SERVER_HOST } :{ INDEXING_MODEL_SERVER_PORT } "
17
18
else :
@@ -28,3 +29,14 @@ def gpu_status_request(indexing: bool = True) -> bool:
28
29
except requests .RequestException as e :
29
30
logger .error (f"Error: Unable to fetch GPU status. Error: { str (e )} " )
30
31
raise # Re-raise exception to trigger a retry
32
+
33
+
34
+ @retry (tries = 5 , delay = 5 )
35
+ def gpu_status_request (indexing : bool ) -> bool :
36
+ return _get_gpu_status_from_model_server (indexing )
37
+
38
+
39
+ @lru_cache (maxsize = 1 )
40
+ def fast_gpu_status_request (indexing : bool ) -> bool :
41
+ """For use in sync flows, where we don't want to retry / we want to cache this."""
42
+ return gpu_status_request (indexing = indexing )
Original file line number Diff line number Diff line change @@ -50,7 +50,7 @@ def answer_instance(
50
50
mocker : MockerFixture ,
51
51
) -> Answer :
52
52
mocker .patch (
53
- "onyx.chat.answer.gpu_status_request " ,
53
+ "onyx.chat.answer.fast_gpu_status_request " ,
54
54
return_value = True ,
55
55
)
56
56
return _answer_fixture_impl (mock_llm , answer_style_config , prompt_config )
@@ -400,7 +400,7 @@ def test_no_slow_reranking(
400
400
mocker : MockerFixture ,
401
401
) -> None :
402
402
mocker .patch (
403
- "onyx.chat.answer.gpu_status_request " ,
403
+ "onyx.chat.answer.fast_gpu_status_request " ,
404
404
return_value = gpu_enabled ,
405
405
)
406
406
rerank_settings = (
Original file line number Diff line number Diff line change @@ -39,7 +39,7 @@ def test_skip_gen_ai_answer_generation_flag(
39
39
mocker : MockerFixture ,
40
40
) -> None :
41
41
mocker .patch (
42
- "onyx.chat.answer.gpu_status_request " ,
42
+ "onyx.chat.answer.fast_gpu_status_request " ,
43
43
return_value = True ,
44
44
)
45
45
question = config ["question" ]
You can’t perform that action at this time.
0 commit comments