Skip to content

Commit c0a0f13

Browse files
committed
feat(python-sdk): add opt-in retries for REST transport errors (GET/DELETE)
1 parent 0ed9cc1 commit c0a0f13

File tree

3 files changed

+248
-5
lines changed

3 files changed

+248
-5
lines changed

sdks/python/hatchet_sdk/clients/rest/tenacity_utils.py

Lines changed: 53 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,42 @@
1+
from __future__ import annotations
2+
3+
import re
14
from collections.abc import Callable
2-
from typing import ParamSpec, TypeVar
5+
from typing import TYPE_CHECKING, ParamSpec, TypeVar
36

47
import grpc
58
import tenacity
69

7-
from hatchet_sdk.clients.rest.exceptions import NotFoundException, ServiceException
8-
from hatchet_sdk.config import TenacityConfig
10+
from hatchet_sdk.clients.rest.exceptions import (
11+
NotFoundException,
12+
RestTransportError,
13+
ServiceException,
14+
)
915
from hatchet_sdk.logger import logger
1016

17+
if TYPE_CHECKING:
18+
from hatchet_sdk.config import TenacityConfig
19+
1120
P = ParamSpec("P")
1221
R = TypeVar("R")
1322

23+
# Pattern to extract HTTP method from exception reason
24+
_METHOD_PATTERN = re.compile(r"method=(\w+)", re.IGNORECASE)
25+
1426

1527
def tenacity_retry(func: Callable[P, R], config: TenacityConfig) -> Callable[P, R]:
1628
if config.max_attempts <= 0:
1729
return func
1830

31+
def should_retry(ex: BaseException) -> bool:
32+
return tenacity_should_retry(ex, config)
33+
1934
return tenacity.retry(
2035
reraise=True,
2136
wait=tenacity.wait_exponential_jitter(),
2237
stop=tenacity.stop_after_attempt(config.max_attempts),
2338
before_sleep=tenacity_alert_retry,
24-
retry=tenacity.retry_if_exception(tenacity_should_retry),
39+
retry=tenacity.retry_if_exception(should_retry),
2540
)(func)
2641

2742

@@ -33,10 +48,23 @@ def tenacity_alert_retry(retry_state: tenacity.RetryCallState) -> None:
3348
)
3449

3550

36-
def tenacity_should_retry(ex: BaseException) -> bool:
51+
def tenacity_should_retry(
52+
ex: BaseException, config: TenacityConfig | None = None
53+
) -> bool:
54+
"""Determine if an exception should trigger a retry.
55+
56+
Args:
57+
ex: The exception to evaluate.
58+
config: Optional tenacity config for transport error settings.
59+
60+
Returns:
61+
True if the exception should be retried, False otherwise.
62+
"""
63+
# HTTP errors: ServiceException (5xx) and NotFoundException (404) are retried
3764
if isinstance(ex, ServiceException | NotFoundException):
3865
return True
3966

67+
# gRPC errors: retry most, except specific permanent failure codes
4068
if isinstance(ex, grpc.aio.AioRpcError | grpc.RpcError):
4169
return ex.code() not in [
4270
grpc.StatusCode.UNIMPLEMENTED,
@@ -47,4 +75,24 @@ def tenacity_should_retry(ex: BaseException) -> bool:
4775
grpc.StatusCode.PERMISSION_DENIED,
4876
]
4977

78+
# REST transport errors: opt-in retry for configured HTTP methods
79+
if isinstance(ex, RestTransportError):
80+
if config is not None and config.retry_transport_errors:
81+
method = _extract_method_from_reason(ex.reason)
82+
if method is not None:
83+
allowed_methods = {m.upper() for m in config.retry_transport_methods}
84+
return method.upper() in allowed_methods
85+
return False
86+
5087
return False
88+
89+
90+
def _extract_method_from_reason(reason: str | None) -> str | None:
91+
"""Extract HTTP method from exception reason string.
92+
93+
The reason string contains 'method=GET' or similar from rest.py exception handling.
94+
"""
95+
if not reason:
96+
return None
97+
match = _METHOD_PATTERN.search(reason)
98+
return match.group(1) if match else None

sdks/python/hatchet_sdk/config.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,15 @@ class TenacityConfig(BaseSettings):
9494

9595
max_attempts: int = 5
9696

97+
retry_transport_errors: bool = Field(
98+
default=False,
99+
description="Enable retries for REST transport errors (timeout, connection, TLS). Default: off.",
100+
)
101+
retry_transport_methods: list[str] = Field(
102+
default_factory=lambda: ["GET", "DELETE"],
103+
description="HTTP methods to retry on transport errors when retry_transport_errors is enabled.",
104+
)
105+
97106

98107
DEFAULT_HOST_PORT = "localhost:7070"
99108

Lines changed: 186 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,186 @@
1+
"""Unit tests for tenacity transport error retry behavior.
2+
3+
Tests verify:
4+
1. Default behavior: RestTransportError is NOT retried (even for GET)
5+
2. Opt-in behavior: RestTransportError retried for configured methods only
6+
3. Existing HTTP error retry behavior unchanged
7+
"""
8+
9+
import pytest
10+
11+
from hatchet_sdk.clients.rest.exceptions import (
12+
NotFoundException,
13+
RestConnectionError,
14+
RestProtocolError,
15+
RestTimeoutError,
16+
RestTLSError,
17+
RestTransportError,
18+
ServiceException,
19+
)
20+
from hatchet_sdk.clients.rest.tenacity_utils import tenacity_should_retry
21+
from hatchet_sdk.config import TenacityConfig
22+
23+
# --- Default behavior tests (transport errors NOT retried) ---
24+
25+
26+
def test_default__rest_transport_error_not_retried() -> None:
27+
"""By default, RestTransportError should NOT be retried."""
28+
exc = RestTransportError(status=0, reason="method=GET\nurl=http://test")
29+
config = TenacityConfig()
30+
assert tenacity_should_retry(exc, config) is False
31+
32+
33+
def test_default__rest_timeout_error_not_retried() -> None:
34+
"""By default, RestTimeoutError should NOT be retried."""
35+
exc = RestTimeoutError(status=0, reason="method=GET\nurl=http://test")
36+
config = TenacityConfig()
37+
assert tenacity_should_retry(exc, config) is False
38+
39+
40+
def test_default__rest_connection_error_not_retried() -> None:
41+
"""By default, RestConnectionError should NOT be retried."""
42+
exc = RestConnectionError(status=0, reason="method=GET\nurl=http://test")
43+
config = TenacityConfig()
44+
assert tenacity_should_retry(exc, config) is False
45+
46+
47+
def test_default__rest_tls_error_not_retried() -> None:
48+
"""By default, RestTLSError should NOT be retried."""
49+
exc = RestTLSError(status=0, reason="method=GET\nurl=http://test")
50+
config = TenacityConfig()
51+
assert tenacity_should_retry(exc, config) is False
52+
53+
54+
def test_default__rest_protocol_error_not_retried() -> None:
55+
"""By default, RestProtocolError should NOT be retried."""
56+
exc = RestProtocolError(status=0, reason="method=GET\nurl=http://test")
57+
config = TenacityConfig()
58+
assert tenacity_should_retry(exc, config) is False
59+
60+
61+
# --- Opt-in behavior tests (transport errors retried for allowed methods) ---
62+
63+
64+
def test_optin__get_request_retried() -> None:
65+
"""When enabled, GET requests with transport errors should be retried."""
66+
exc = RestTimeoutError(status=0, reason="method=GET\nurl=http://test")
67+
config = TenacityConfig(retry_transport_errors=True)
68+
assert tenacity_should_retry(exc, config) is True
69+
70+
71+
def test_optin__delete_request_retried() -> None:
72+
"""When enabled, DELETE requests with transport errors should be retried."""
73+
exc = RestConnectionError(status=0, reason="method=DELETE\nurl=http://test")
74+
config = TenacityConfig(retry_transport_errors=True)
75+
assert tenacity_should_retry(exc, config) is True
76+
77+
78+
def test_optin__post_request_not_retried() -> None:
79+
"""POST requests should NOT be retried even when transport retry is enabled."""
80+
exc = RestTimeoutError(status=0, reason="method=POST\nurl=http://test")
81+
config = TenacityConfig(retry_transport_errors=True)
82+
assert tenacity_should_retry(exc, config) is False
83+
84+
85+
def test_optin__put_request_not_retried() -> None:
86+
"""PUT requests should NOT be retried even when transport retry is enabled."""
87+
exc = RestConnectionError(status=0, reason="method=PUT\nurl=http://test")
88+
config = TenacityConfig(retry_transport_errors=True)
89+
assert tenacity_should_retry(exc, config) is False
90+
91+
92+
def test_optin__patch_request_not_retried() -> None:
93+
"""PATCH requests should NOT be retried even when transport retry is enabled."""
94+
exc = RestProtocolError(status=0, reason="method=PATCH\nurl=http://test")
95+
config = TenacityConfig(retry_transport_errors=True)
96+
assert tenacity_should_retry(exc, config) is False
97+
98+
99+
def test_optin__custom_methods_list() -> None:
100+
"""Custom retry_transport_methods should be honored."""
101+
exc = RestTimeoutError(status=0, reason="method=POST\nurl=http://test")
102+
config = TenacityConfig(
103+
retry_transport_errors=True,
104+
retry_transport_methods=["POST"], # allow POST explicitly
105+
)
106+
assert tenacity_should_retry(exc, config) is True
107+
108+
109+
def test_optin__custom_methods_excludes_get() -> None:
110+
"""Custom retry_transport_methods can exclude GET."""
111+
exc = RestTimeoutError(status=0, reason="method=GET\nurl=http://test")
112+
config = TenacityConfig(
113+
retry_transport_errors=True,
114+
retry_transport_methods=["DELETE"], # only DELETE, not GET
115+
)
116+
assert tenacity_should_retry(exc, config) is False
117+
118+
119+
# --- Regression tests: existing HTTP error retry behavior unchanged ---
120+
121+
122+
def test_regression__service_exception_still_retried() -> None:
123+
"""ServiceException (5xx) should still be retried."""
124+
exc = ServiceException(status=500, reason="Internal Server Error")
125+
config = TenacityConfig()
126+
assert tenacity_should_retry(exc, config) is True
127+
128+
129+
def test_regression__not_found_exception_still_retried() -> None:
130+
"""NotFoundException (404) should still be retried."""
131+
exc = NotFoundException(status=404, reason="Not Found")
132+
config = TenacityConfig()
133+
assert tenacity_should_retry(exc, config) is True
134+
135+
136+
def test_regression__service_exception_retried_without_config() -> None:
137+
"""ServiceException should be retried even without config (backward compat)."""
138+
exc = ServiceException(status=500, reason="Internal Server Error")
139+
# Call without config to test backward compatibility
140+
assert tenacity_should_retry(exc) is True
141+
142+
143+
# --- Edge cases ---
144+
145+
146+
def test_edge__missing_method_in_reason_not_retried() -> None:
147+
"""If method cannot be extracted from reason, should not retry."""
148+
exc = RestTimeoutError(status=0, reason="some error without method")
149+
config = TenacityConfig(retry_transport_errors=True)
150+
assert tenacity_should_retry(exc, config) is False
151+
152+
153+
def test_edge__empty_reason_not_retried() -> None:
154+
"""If reason is empty, should not retry."""
155+
exc = RestConnectionError(status=0, reason="")
156+
config = TenacityConfig(retry_transport_errors=True)
157+
assert tenacity_should_retry(exc, config) is False
158+
159+
160+
def test_edge__none_reason_not_retried() -> None:
161+
"""If reason is None, should not retry."""
162+
exc = RestTLSError(status=0, reason=None)
163+
config = TenacityConfig(retry_transport_errors=True)
164+
assert tenacity_should_retry(exc, config) is False
165+
166+
167+
def test_edge__case_insensitive_method_matching() -> None:
168+
"""Method matching should be case-insensitive."""
169+
exc = RestTimeoutError(status=0, reason="method=get\nurl=http://test")
170+
config = TenacityConfig(retry_transport_errors=True)
171+
assert tenacity_should_retry(exc, config) is True
172+
173+
174+
# --- Config defaults tests ---
175+
176+
177+
def test_config__default_retry_transport_errors_is_false() -> None:
178+
"""retry_transport_errors should default to False."""
179+
config = TenacityConfig()
180+
assert config.retry_transport_errors is False
181+
182+
183+
def test_config__default_retry_transport_methods() -> None:
184+
"""retry_transport_methods should default to GET and DELETE."""
185+
config = TenacityConfig()
186+
assert set(config.retry_transport_methods) == {"GET", "DELETE"}

0 commit comments

Comments
 (0)