Skip to content

Commit b13aabb

Browse files
committed
Fix: Connection test to not just check 200
1 parent 5c7deab commit b13aabb

File tree

1 file changed

+50
-9
lines changed

1 file changed

+50
-9
lines changed

src/operator/utils/node_validation_test/connection_validator.py

Lines changed: 50 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -37,10 +37,15 @@ class URLTestConfig(pydantic.BaseModel):
3737
method: str = pydantic.Field(default='GET', description='HTTP method to use')
3838
timeout: int = pydantic.Field(
3939
default=30, description='Timeout in seconds for the connection test')
40-
expected_status_code: int = pydantic.Field(
41-
default=200, description='Expected HTTP status code')
40+
expected_status_code: Optional[int] = pydantic.Field(
41+
default=None, description='Expected HTTP status code (None means any non-5xx is success)')
4242
condition_name: Optional[str] = pydantic.Field(
4343
default='ServiceConnectionTestFailure', description='Custom condition name for this URL')
44+
retriable_status_codes: List[int] = pydantic.Field(
45+
default=[429, 503], description='Status codes that should trigger retry')
46+
failure_status_codes: List[int] = pydantic.Field(
47+
default=[500, 501, 502, 504, 505, 506, 507, 508, 510, 511],
48+
description='Status codes indicating actual service failure (5xx except retriable)')
4449

4550

4651
class ConnectionTestConfig(test_base.NodeTestConfig):
@@ -119,6 +124,13 @@ def _connection_test(self, url_config: URLTestConfig) -> test_base.NodeCondition
119124
120125
Returns:
121126
NodeCondition on success, None on failure (to trigger retry/backoff).
127+
128+
Status code handling:
129+
- If expected_status_code is set, only that code is considered success
130+
- If expected_status_code is None (default):
131+
- Retriable codes (429, 503) trigger retry
132+
- Failure codes (5xx except retriable) indicate service is down
133+
- All other codes (2xx, 3xx, 4xx) indicate service is reachable
122134
"""
123135
try:
124136
logging.info('Testing URL: %s', url_config.url)
@@ -128,21 +140,50 @@ def _connection_test(self, url_config: URLTestConfig) -> test_base.NodeCondition
128140
timeout=url_config.timeout,
129141
)
130142

131-
if response.status_code != url_config.expected_status_code:
132-
logging.error(
133-
'Unexpected status code from %s: %s != %s',
143+
status_code = response.status_code
144+
145+
# If expected_status_code is explicitly set, use strict matching
146+
if url_config.expected_status_code is not None:
147+
if status_code != url_config.expected_status_code:
148+
logging.error(
149+
'Unexpected status code from %s: %s != %s',
150+
url_config.url,
151+
status_code,
152+
url_config.expected_status_code,
153+
)
154+
return None
155+
else:
156+
# Check if status code is retriable (e.g., 429 rate limiting, 503 unavailable)
157+
if status_code in url_config.retriable_status_codes:
158+
logging.warning(
159+
'Retriable status code from %s: %s, will retry',
160+
url_config.url,
161+
status_code,
162+
)
163+
return None
164+
165+
# Check if status code indicates actual service failure (5xx errors)
166+
if status_code in url_config.failure_status_codes or status_code >= 500:
167+
logging.error(
168+
'Service failure status code from %s: %s',
169+
url_config.url,
170+
status_code,
171+
)
172+
return None
173+
174+
# Any other status code (2xx, 3xx, 4xx) means service is reachable
175+
logging.info(
176+
'Service reachable at %s with status code %s',
134177
url_config.url,
135-
response.status_code,
136-
url_config.expected_status_code,
178+
status_code,
137179
)
138-
return None
139180

140181
logging.info('URL test passed: %s (%s)', url_config.url, url_config.condition_name)
141182
return test_base.NodeCondition(
142183
type=url_config.condition_name or self.config.condition_name,
143184
status='False',
144185
reason='ServiceConnectionSuccess',
145-
message=f'Connection test passed: {url_config.url}',
186+
message=f'Connection test passed: {url_config.url} (status: {status_code})',
146187
)
147188
except requests.RequestException as e:
148189
logging.error('Connection test failed for %s: %s', url_config.url, str(e))

0 commit comments

Comments
 (0)