13
13
from dendrite_sdk .async_api ._core .models .api_config import APIConfig
14
14
15
15
16
+ CACHE_TIMEOUT = 5
17
+
18
+
16
19
class GetElementMixin (DendritePageProtocol ):
17
20
@overload
18
21
async def get_elements (
@@ -28,7 +31,9 @@ async def get_elements(
28
31
Args:
29
32
prompt_or_elements (str): The prompt describing the elements to be retrieved.
30
33
use_cache (bool, optional): Whether to use cached results. Defaults to True.
31
- timeout (int, optional): The total timeout (in milliseconds) until the last request is sent to the API. Defaults to 15000 (15 seconds).
34
+ timeout (int, optional): Maximum time in milliseconds for the entire operation. If use_cache=True,
35
+ up to 5000ms will be spent attempting to use cached selectors before falling back to the
36
+ find element agent for the remaining time. Defaults to 15000 (15 seconds).
32
37
context (str, optional): Additional context for the retrieval. Defaults to an empty string.
33
38
34
39
Returns:
@@ -49,7 +54,9 @@ async def get_elements(
49
54
Args:
50
55
prompt_or_elements (Dict[str, str]): A dictionary where keys are field names and values are prompts describing the elements to be retrieved.
51
56
use_cache (bool, optional): Whether to use cached results. Defaults to True.
52
- timeout (int, optional): The total timeout (in milliseconds) until the last request is sent to the API. Defaults to 3000.
57
+ timeout (int, optional): Maximum time in milliseconds for the entire operation. If use_cache=True,
58
+ up to 5000ms will be spent attempting to use cached selectors before falling back to the
59
+ find element agent for the remaining time. Defaults to 15000 (15 seconds).
53
60
context (str, optional): Additional context for the retrieval. Defaults to an empty string.
54
61
55
62
Returns:
@@ -72,7 +79,9 @@ async def get_elements(
72
79
Args:
73
80
prompt_or_elements (Union[str, Dict[str, str]]): The prompt or dictionary of prompts for element retrieval.
74
81
use_cache (bool, optional): Whether to use cached results. Defaults to True.
75
- timeout (int, optional): The total timeout (in milliseconds) until the last request is sent to the API. Defaults to 3000.
82
+ timeout (int, optional): Maximum time in milliseconds for the entire operation. If use_cache=True,
83
+ up to 5000ms will be spent attempting to use cached selectors before falling back to the
84
+ find element agent for the remaining time. Defaults to 15000 (15 seconds).
76
85
context (str, optional): Additional context for the retrieval. Defaults to an empty string.
77
86
78
87
Returns:
@@ -86,11 +95,9 @@ async def get_elements(
86
95
prompt_or_elements ,
87
96
only_one = False ,
88
97
use_cache = use_cache ,
89
- timeout = timeout ,
98
+ timeout = timeout / 1000 ,
90
99
)
91
100
92
- raise ValueError ("Prompt must be either a string prompt or a dictionary" )
93
-
94
101
async def get_element (
95
102
self ,
96
103
prompt : str ,
@@ -103,7 +110,9 @@ async def get_element(
103
110
Args:
104
111
prompt (str): The prompt describing the element to be retrieved.
105
112
use_cache (bool, optional): Whether to use cached results. Defaults to True.
106
- timeout (int, optional): The total timeout (in milliseconds) until the last request is sent to the API. Defaults to 15000 (15 seconds).
113
+ timeout (int, optional): Maximum time in milliseconds for the entire operation. If use_cache=True,
114
+ up to 5000ms will be spent attempting to use cached selectors before falling back to the
115
+ find element agent for the remaining time. Defaults to 15000 (15 seconds).
107
116
108
117
Returns:
109
118
AsyncElement: The retrieved element.
@@ -112,7 +121,7 @@ async def get_element(
112
121
prompt ,
113
122
only_one = True ,
114
123
use_cache = use_cache ,
115
- timeout = timeout ,
124
+ timeout = timeout / 1000 ,
116
125
)
117
126
118
127
@overload
@@ -130,7 +139,9 @@ async def _get_element(
130
139
prompt (Union[str, Dict[str, str]]): The prompt describing the element to be retrieved.
131
140
only_one (Literal[True]): Indicates that only one element should be retrieved.
132
141
use_cache (bool): Whether to use cached results.
133
- timeout: The total timeout (in milliseconds) until the last request is sent to the API.
142
+ timeout (int, optional): Maximum time in milliseconds for the entire operation. If use_cache=True,
143
+ up to 5000ms will be spent attempting to use cached selectors before falling back to the
144
+ find element agent for the remaining time. Defaults to 15000 (15 seconds).
134
145
135
146
Returns:
136
147
AsyncElement: The retrieved element.
@@ -151,7 +162,9 @@ async def _get_element(
151
162
prompt (str): The prompt describing the elements to be retrieved.
152
163
only_one (Literal[False]): Indicates that multiple elements should be retrieved.
153
164
use_cache (bool): Whether to use cached results.
154
- timeout: The total timeout (in milliseconds) until the last request is sent to the API.
165
+ timeout (int, optional): Maximum time in milliseconds for the entire operation. If use_cache=True,
166
+ up to 5000ms will be spent attempting to use cached selectors before falling back to the
167
+ find element agent for the remaining time. Defaults to 15000 (15 seconds).
155
168
156
169
Returns:
157
170
List[AsyncElement]: A list of retrieved elements.
@@ -177,7 +190,9 @@ async def _get_element(
177
190
prompt_or_elements (Union[str, Dict[str, str]]): The prompt or dictionary of prompts for element retrieval.
178
191
only_one (bool): Whether to retrieve only one element or a list of elements.
179
192
use_cache (bool): Whether to use cached results.
180
- timeout (float): The total timeout (in seconds) for the entire operation.
193
+ timeout (int, optional): Maximum time in milliseconds for the entire operation. If use_cache=True,
194
+ up to 5000ms will be spent attempting to use cached selectors before falling back to the
195
+ find element agent for the remaining time. Defaults to 15000 (15 seconds).
181
196
182
197
Returns:
183
198
Union[AsyncElement, List[AsyncElement], AsyncElementsResponse]: The retrieved element, list of elements, or response object.
@@ -187,7 +202,7 @@ async def _get_element(
187
202
start_time = time .time ()
188
203
189
204
# First, let's check if there is a cached selector
190
- cache_available = await test_if_cache_available (self ,prompt_or_elements )
205
+ cache_available = await test_if_cache_available (self , prompt_or_elements )
191
206
192
207
# If we have cached elements, attempt to use them with an exponentation backoff
193
208
if cache_available and use_cache == True :
@@ -197,8 +212,8 @@ async def _get_element(
197
212
prompt_or_elements ,
198
213
only_one ,
199
214
api_config ,
215
+ remaining_timeout = CACHE_TIMEOUT ,
200
216
only_use_cache = True ,
201
- remaining_timeout = timeout - (time .time () - start_time ),
202
217
)
203
218
if res :
204
219
return res
@@ -216,8 +231,8 @@ async def _get_element(
216
231
prompt_or_elements ,
217
232
only_one ,
218
233
api_config ,
219
- only_use_cache = False ,
220
234
remaining_timeout = timeout - (time .time () - start_time ),
235
+ only_use_cache = False ,
221
236
)
222
237
if res :
223
238
return res
@@ -227,6 +242,7 @@ async def _get_element(
227
242
)
228
243
return None
229
244
245
+
230
246
async def test_if_cache_available (
231
247
obj : DendritePageProtocol ,
232
248
prompt_or_elements : Union [str , Dict [str , str ]],
@@ -240,13 +256,14 @@ async def test_if_cache_available(
240
256
cache_available = await obj ._get_browser_api_client ().check_selector_cache (dto )
241
257
return cache_available .exists
242
258
259
+
243
260
async def attempt_with_backoff (
244
261
obj : DendritePageProtocol ,
245
262
prompt_or_elements : Union [str , Dict [str , str ]],
246
263
only_one : bool ,
247
264
api_config : APIConfig ,
265
+ remaining_timeout : float ,
248
266
only_use_cache : bool = False ,
249
- remaining_timeout : float = 15.0 ,
250
267
) -> Union [Optional [AsyncElement ], List [AsyncElement ], AsyncElementsResponse ]:
251
268
TIMEOUT_INTERVAL : List [float ] = [0.15 , 0.45 , 1.0 , 2.0 , 4.0 , 8.0 ]
252
269
total_elapsed_time = 0
@@ -294,6 +311,7 @@ async def attempt_with_backoff(
294
311
logger .error (f"All attempts failed after { total_elapsed_time :.2f} seconds" )
295
312
return None
296
313
314
+
297
315
async def get_elements_from_selectors (
298
316
obj : DendritePageProtocol , res : GetElementResponse , only_one : bool
299
317
) -> Union [Optional [AsyncElement ], List [AsyncElement ], AsyncElementsResponse ]:
@@ -302,9 +320,7 @@ async def get_elements_from_selectors(
302
320
for key , selectors in res .selectors .items ():
303
321
for selector in selectors :
304
322
page = await obj ._get_page ()
305
- dendrite_elements = await page ._get_all_elements_from_selector (
306
- selector
307
- )
323
+ dendrite_elements = await page ._get_all_elements_from_selector (selector )
308
324
if len (dendrite_elements ) > 0 :
309
325
result [key ] = dendrite_elements [0 ]
310
326
break
0 commit comments