Skip to content

Commit 1d15529

Browse files
ds-filipknefelFilip Knefel
and
Filip Knefel
authored
fix: remove false success log & improve retry log (#111)
In the PR introducing more logging two things turned out lacking: - there was a false success log in split-page after error hook - the retry logs were only for 5XX responses where speakeasy also has retry set for ConnectionError exceptions This PR removes the faulty success log, moves success/failure logs to the `LoggerHook` and introduces logging for `ConnectionError` retries. --------- Co-authored-by: Filip Knefel <[email protected]>
1 parent 4675ee3 commit 1d15529

File tree

4 files changed

+62
-12
lines changed

4 files changed

+62
-12
lines changed

Diff for: _test_unstructured_client/unit/test_custom_hooks.py

+28-1
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ def test_unit_retry_with_backoff_does_retry(caplog):
4343

4444

4545
@pytest.mark.parametrize("status_code", [500, 503])
46-
def test_unit_backoff_strategy_logs_retries(status_code: int, caplog):
46+
def test_unit_backoff_strategy_logs_retries_5XX(status_code: int, caplog):
4747
caplog.set_level(logging.INFO)
4848
filename = "README.md"
4949
backoff_strategy = BackoffStrategy(
@@ -64,11 +64,38 @@ def test_unit_backoff_strategy_logs_retries(status_code: int, caplog):
6464
req = shared.PartitionParameters(files=files)
6565
with pytest.raises(Exception):
6666
session.general.partition(req, retries=retries)
67+
6768
pattern = re.compile(f"Failed to process a request due to API server error with status code {status_code}. "
6869
"Attempting retry number 1 after sleep.")
6970
assert bool(pattern.search(caplog.text))
7071

7172

73+
def test_unit_backoff_strategy_logs_retries_connection_error(caplog):
74+
caplog.set_level(logging.INFO)
75+
filename = "README.md"
76+
backoff_strategy = BackoffStrategy(
77+
initial_interval=10, max_interval=100, exponent=1.5, max_elapsed_time=300
78+
)
79+
retries = RetryConfig(
80+
strategy="backoff", backoff=backoff_strategy, retry_connection_errors=True
81+
)
82+
with requests_mock.Mocker() as mock:
83+
# mock a connection error response to POST request
84+
mock.post("https://api.unstructured.io/general/v0/general", exc=requests.exceptions.ConnectionError)
85+
session = UnstructuredClient(api_key_auth=FAKE_KEY)
86+
87+
with open(filename, "rb") as f:
88+
files = shared.Files(content=f.read(), file_name=filename)
89+
90+
req = shared.PartitionParameters(files=files)
91+
with pytest.raises(Exception):
92+
session.general.partition(req, retries=retries)
93+
94+
pattern = re.compile(f"Failed to process a request due to connection error .*? "
95+
"Attempting retry number 1 after sleep.")
96+
assert bool(pattern.search(caplog.text))
97+
98+
7299
@pytest.mark.parametrize(
73100
"server_url",
74101
[

Diff for: src/unstructured_client/_hooks/custom/logger_hook.py

+29-4
Original file line numberDiff line numberDiff line change
@@ -9,19 +9,20 @@
99
AfterErrorContext,
1010
AfterErrorHook,
1111
SDKInitHook,
12+
AfterSuccessHook,
1213
)
1314
from collections import defaultdict
1415

1516
logger = logging.getLogger(UNSTRUCTURED_CLIENT_LOGGER_NAME)
1617

1718

18-
class LoggerHook(AfterErrorHook, SDKInitHook):
19+
class LoggerHook(AfterErrorHook, AfterSuccessHook, SDKInitHook):
1920
"""Hook providing custom logging"""
2021

2122
def __init__(self) -> None:
2223
self.retries_counter: DefaultDict[str, int] = defaultdict(int)
2324

24-
def log_retries(self, response: Optional[requests.Response], operation_id: str):
25+
def log_retries(self, response: Optional[requests.Response], error: Optional[Exception], operation_id: str,):
2526
"""Log retries to give users visibility into requests."""
2627

2728
if response is not None and response.status_code // 100 == 5:
@@ -33,6 +34,15 @@ def log_retries(self, response: Optional[requests.Response], operation_id: str):
3334
)
3435
if response.text:
3536
logger.info("Server message - %s", response.text)
37+
38+
elif error is not None and isinstance(error, requests.exceptions.ConnectionError):
39+
logger.info(
40+
"Failed to process a request due to connection error - %s. "
41+
"Attempting retry number %d after sleep.",
42+
error,
43+
self.retries_counter[operation_id],
44+
)
45+
3646

3747
def sdk_init(
3848
self, base_url: str, client: requests.Session
@@ -43,7 +53,9 @@ def sdk_init(
4353
def after_success(
4454
self, hook_ctx: AfterSuccessContext, response: requests.Response
4555
) -> Union[requests.Response, Exception]:
46-
del self.retries_counter[hook_ctx.operation_id]
56+
self.retries_counter.pop(hook_ctx.operation_id, None)
57+
# NOTE: In case of split page partition this means - at least one of the splits was partitioned successfully
58+
logger.info("Successfully partitioned the document.")
4759
return response
4860

4961
def after_error(
@@ -54,5 +66,18 @@ def after_error(
5466
) -> Union[Tuple[Optional[requests.Response], Optional[Exception]], Exception]:
5567
"""Concrete implementation for AfterErrorHook."""
5668
self.retries_counter[hook_ctx.operation_id] += 1
57-
self.log_retries(response, hook_ctx.operation_id)
69+
self.log_retries(response, error, hook_ctx.operation_id)
70+
71+
if response and response.status_code == 200:
72+
# NOTE: Even though this is an after_error method, due to split_pdf_hook logic we may get
73+
# a success here when one of the split requests was partitioned successfully
74+
logger.info("Successfully partitioned the document.")
75+
76+
else:
77+
logger.error("Failed to partition the document.")
78+
if response:
79+
logger.error("Server responded with %d - %s", response.status_code, response.text)
80+
if error is not None:
81+
logger.error("Following error occurred - %s", error)
82+
5883
return response, error

Diff for: src/unstructured_client/_hooks/custom/split_pdf_hook.py

-6
Original file line numberDiff line numberDiff line change
@@ -302,7 +302,6 @@ def after_success(
302302

303303
updated_response = request_utils.create_response(response, elements)
304304
self._clear_operation(operation_id)
305-
logger.info("Successfully processed the request.")
306305
return updated_response
307306

308307
def after_error(
@@ -335,21 +334,16 @@ def after_error(
335334
successful_responses = self.api_successful_responses.get(operation_id)
336335

337336
if elements is None or successful_responses is None:
338-
logger.info("Successfully processed the request.")
339337
return (response, error)
340338

341339
if len(successful_responses) == 0:
342-
logger.error("Failed to process the request.")
343-
if error is not None:
344-
logger.error(error)
345340
self._clear_operation(operation_id)
346341
return (response, error)
347342

348343
updated_response = request_utils.create_response(
349344
successful_responses[0], elements
350345
)
351346
self._clear_operation(operation_id)
352-
logger.info("Successfully processed the request.")
353347
return (updated_response, None)
354348

355349
def _clear_operation(self, operation_id: str) -> None:

Diff for: src/unstructured_client/_hooks/registration.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,9 @@ def init_hooks(hooks: Hooks):
2727
logger_hook = LoggerHook()
2828
split_pdf_hook = SplitPdfHook()
2929

30+
# NOTE: logger_hook should stay registered last as logs the status of
31+
# request and whether it will be retried which can be changed by e.g. split_pdf_hook
32+
3033
# Register SDK Init hooks
3134
hooks.register_sdk_init_hook(clean_server_url_hook)
3235
hooks.register_sdk_init_hook(logger_hook)
@@ -37,8 +40,9 @@ def init_hooks(hooks: Hooks):
3740

3841
# Register After Error hooks
3942
hooks.register_after_success_hook(split_pdf_hook)
43+
hooks.register_after_success_hook(logger_hook)
4044

4145
# Register After Error hooks
4246
hooks.register_after_error_hook(suggest_defining_url_hook)
4347
hooks.register_after_error_hook(split_pdf_hook)
44-
hooks.register_after_error_hook(logger_hook)
48+
hooks.register_after_error_hook(logger_hook)

0 commit comments

Comments
 (0)