6
6
import requests
7
7
from unstructured_client import UnstructuredClient
8
8
from unstructured_client .models import operations , shared
9
+ from unstructured_client .utils import retries
9
10
10
11
from unstructured .documents .elements import Element
11
12
from unstructured .logger import logger
12
13
from unstructured .partition .common .common import exactly_one
13
14
from unstructured .staging .base import elements_from_dicts , elements_from_json
14
15
16
+ # Default retry configuration taken from the client code
17
+ DEFAULT_RETRIES_INITIAL_INTERVAL_SEC = 3000
18
+ DEFAULT_RETRIES_MAX_INTERVAL_SEC = 720000
19
+ DEFAULT_RETRIES_EXPONENT = 1.5
20
+ DEFAULT_RETRIES_MAX_ELAPSED_TIME_SEC = 1800000
21
+ DEFAULT_RETRIES_CONNECTION_ERRORS = True
22
+
15
23
16
24
def partition_via_api (
17
25
filename : Optional [str ] = None ,
@@ -21,6 +29,11 @@ def partition_via_api(
21
29
api_url : str = "https://api.unstructured.io/general/v0/general" ,
22
30
api_key : str = "" ,
23
31
metadata_filename : Optional [str ] = None ,
32
+ retries_initial_interval : [int ] = None ,
33
+ retries_max_interval : Optional [int ] = None ,
34
+ retries_exponent : Optional [float ] = None ,
35
+ retries_max_elapsed_time : Optional [int ] = None ,
36
+ retries_connection_errors : Optional [bool ] = None ,
24
37
** request_kwargs : Any ,
25
38
) -> list [Element ]:
26
39
"""Partitions a document using the Unstructured REST API. This is equivalent to
@@ -44,6 +57,21 @@ def partition_via_api(
44
57
The URL for the Unstructured API. Defaults to the hosted Unstructured API.
45
58
api_key
46
59
The API key to pass to the Unstructured API.
60
+ retries_initial_interval
61
+ Defines the time interval (in seconds) to wait before the first retry in case of a request
62
+ failure. Defaults to 3000. If set should be > 0.
63
+ retries_max_interval
64
+ Defines the maximum time interval (in seconds) to wait between retries (the interval
65
+ between retries is increased as using exponential increase algorithm
66
+ - this setting limits it). Defaults to 720000. If set should be > 0.
67
+ retries_exponent
68
+ Defines the exponential factor to increase the interval between retries. Defaults to 1.5.
69
+ If set should be > 0.0.
70
+ retries_max_elapsed_time
71
+ Defines the maximum time (in seconds) to wait for retries. If exceeded, the original
72
+ exception is raised. Defaults to 1800000. If set should be > 0.
73
+ retries_connection_errors
74
+ Defines whether to retry on connection errors. Defaults to True.
47
75
request_kwargs
48
76
Additional parameters to pass to the data field of the request to the Unstructured API.
49
77
For example the `strategy` parameter.
@@ -87,7 +115,19 @@ def partition_via_api(
87
115
partition_parameters = shared .PartitionParameters (files = files , ** request_kwargs )
88
116
)
89
117
90
- response = sdk .general .partition (request = req )
118
+ retries_config = get_retries_config (
119
+ retries_connection_errors = retries_connection_errors ,
120
+ retries_exponent = retries_exponent ,
121
+ retries_initial_interval = retries_initial_interval ,
122
+ retries_max_elapsed_time = retries_max_elapsed_time ,
123
+ retries_max_interval = retries_max_interval ,
124
+ sdk = sdk ,
125
+ )
126
+
127
+ response = sdk .general .partition (
128
+ request = req ,
129
+ retries = retries_config ,
130
+ )
91
131
92
132
if response .status_code == 200 :
93
133
return elements_from_json (text = response .raw_response .text )
@@ -97,6 +137,97 @@ def partition_via_api(
97
137
)
98
138
99
139
140
+ def get_retries_config (
141
+ retries_connection_errors : Optional [bool ],
142
+ retries_exponent : Optional [float ],
143
+ retries_initial_interval : Optional [int ],
144
+ retries_max_elapsed_time : Optional [int ],
145
+ retries_max_interval : Optional [int ],
146
+ sdk : UnstructuredClient ,
147
+ ) -> Optional [retries .RetryConfig ]:
148
+ """Constructs a RetryConfig object from the provided parameters. If any of the parameters
149
+ are None, the default values are taken from the SDK configuration or the default constants.
150
+
151
+ If all parameters are None, returns None (and the SDK-managed defaults are used within the
152
+ client)
153
+
154
+ The solution is not perfect as the RetryConfig object does not include the defaults by
155
+ itself so we might need to construct it basing on our defaults.
156
+
157
+ Parameters
158
+ ----------
159
+ retries_connection_errors
160
+ Defines whether to retry on connection errors. If not set the
161
+ DEFAULT_RETRIES_CONNECTION_ERRORS constant is used.
162
+ retries_exponent
163
+ Defines the exponential factor to increase the interval between retries.
164
+ If set, should be > 0.0 (otherwise the DEFAULT_RETRIES_EXPONENT constant is used)
165
+ retries_initial_interval
166
+ Defines the time interval to wait before the first retry in case of a request failure.
167
+ If set, should be > 0 (otherwise the DEFAULT_RETRIES_INITIAL_INTERVAL_SEC constant is used)
168
+ retries_max_elapsed_time
169
+ Defines the maximum time to wait for retries. If exceeded, the original exception is raised.
170
+ If set, should be > 0 (otherwise the DEFAULT_RETRIES_MAX_ELAPSED_TIME_SEC constant is used)
171
+ retries_max_interval
172
+ Defines the maximum time interval to wait between retries. If set, should be > 0
173
+ (otherwise the DEFAULT_RETRIES_MAX_INTERVAL_SEC constant is used)
174
+ sdk
175
+ The UnstructuredClient object to take the default values from.
176
+ """
177
+ retries_config = None
178
+ sdk_default_retries_config = sdk .sdk_configuration .retry_config
179
+ if any (
180
+ setting is not None
181
+ for setting in (
182
+ retries_initial_interval ,
183
+ retries_max_interval ,
184
+ retries_exponent ,
185
+ retries_max_elapsed_time ,
186
+ retries_connection_errors ,
187
+ )
188
+ ):
189
+
190
+ def get_backoff_default (setting_name : str , default_value : Any ) -> Any :
191
+ if sdk_default_retries_config : # noqa: SIM102
192
+ if setting_value := getattr (sdk_default_retries_config .backoff , setting_name ):
193
+ return setting_value
194
+ return default_value
195
+
196
+ default_retries_connneciton_errors = (
197
+ sdk_default_retries_config .retry_connection_errors
198
+ if sdk_default_retries_config .retry_connection_errors is not None
199
+ else DEFAULT_RETRIES_CONNECTION_ERRORS
200
+ )
201
+
202
+ backoff_strategy = retries .BackoffStrategy (
203
+ initial_interval = (
204
+ retries_initial_interval
205
+ or get_backoff_default ("initial_interval" , DEFAULT_RETRIES_INITIAL_INTERVAL_SEC )
206
+ ),
207
+ max_interval = (
208
+ retries_max_interval
209
+ or get_backoff_default ("max_interval" , DEFAULT_RETRIES_MAX_INTERVAL_SEC )
210
+ ),
211
+ exponent = (
212
+ retries_exponent or get_backoff_default ("exponent" , DEFAULT_RETRIES_EXPONENT )
213
+ ),
214
+ max_elapsed_time = (
215
+ retries_max_elapsed_time
216
+ or get_backoff_default ("max_elapsed_time" , DEFAULT_RETRIES_MAX_ELAPSED_TIME_SEC )
217
+ ),
218
+ )
219
+ retries_config = retries .RetryConfig (
220
+ strategy = "backoff" ,
221
+ backoff = backoff_strategy ,
222
+ retry_connection_errors = (
223
+ retries_connection_errors
224
+ if retries_connection_errors is not None
225
+ else default_retries_connneciton_errors
226
+ ),
227
+ )
228
+ return retries_config
229
+
230
+
100
231
def partition_multiple_via_api (
101
232
filenames : Optional [list [str ]] = None ,
102
233
content_types : Optional [list [str ]] = None ,
0 commit comments