Skip to content

Commit c91e82f

Browse files
committed
Pre-warm DNS for endpoint latency
Change-Id: I9c8db07791446ffea7d8a5467cc1ebbcf8b65b71
1 parent 40f9a21 commit c91e82f

File tree

2 files changed

+22
-8
lines changed

2 files changed

+22
-8
lines changed

app/check_http_endpoints.py

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
from pydantic import BaseModel, Field
77
from typing import List
88
from prometheus_client import Counter, Histogram
9+
import socket
10+
from urllib.parse import urlparse
911

1012
log = logging.getLogger('check.http_endpoints')
1113

@@ -22,7 +24,7 @@
2224
HTTP_ENDPOINT_LATENCY_SECONDS = Histogram(
2325
'http_endpoint_latency_seconds',
2426
'Latency of HTTP endpoint checks in seconds',
25-
['endpoint_name']
27+
['endpoint_name', 'status']
2628
)
2729

2830
class Endpoint(BaseModel):
@@ -48,20 +50,31 @@ def is_healthy(self):
4850
return True
4951

5052
def check_endpoint(self, endpoint: Endpoint):
53+
# Pre-resolve DNS to warm up the cache for timing purposes.
54+
try:
55+
parsed_url = urlparse(endpoint.url)
56+
hostname = parsed_url.hostname
57+
port = parsed_url.port or {'http': 80, 'https': 443}.get(parsed_url.scheme, 80)
58+
if hostname:
59+
socket.getaddrinfo(hostname, port)
60+
except (socket.gaierror, TypeError) as e:
61+
# Log the pre-resolution failure, but proceed. The actual request will handle the error.
62+
log.warning(f"DNS pre-resolution failed for {hostname}: {e}")
63+
5164
start_time = time.time()
5265
try:
5366
response = requests.request(endpoint.method, endpoint.url, timeout=endpoint.timeout)
5467
if not response.ok:
5568
log.error(f"HTTP endpoint {endpoint.name} ({endpoint.url}) returned status code {response.status_code}")
5669
HTTP_ENDPOINT_FAILURE_TOTAL.labels(endpoint_name=endpoint.name).inc()
70+
HTTP_ENDPOINT_LATENCY_SECONDS.labels(endpoint_name=endpoint.name, status='failure').observe(response.elapsed.total_seconds())
5771
return False
5872
except requests.exceptions.RequestException as e:
5973
log.error(f"Failed to connect to HTTP endpoint {endpoint.name} ({endpoint.url}): {e}")
6074
HTTP_ENDPOINT_FAILURE_TOTAL.labels(endpoint_name=endpoint.name).inc()
75+
HTTP_ENDPOINT_LATENCY_SECONDS.labels(endpoint_name=endpoint.name, status='failure').observe(time.time() - start_time)
6176
return False
62-
finally:
63-
latency = time.time() - start_time
64-
HTTP_ENDPOINT_LATENCY_SECONDS.labels(endpoint_name=endpoint.name).observe(latency)
6577

6678
HTTP_ENDPOINT_SUCCESS_TOTAL.labels(endpoint_name=endpoint.name).inc()
79+
HTTP_ENDPOINT_LATENCY_SECONDS.labels(endpoint_name=endpoint.name, status='success').observe(response.elapsed.total_seconds())
6780
return True

app/test_check_http_endpoints.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ def test_check_endpoint_success(self, mock_success, mock_failure, mock_latency,
2626
mock_success.labels.assert_called_once_with(endpoint_name='example')
2727
mock_success.labels.return_value.inc.assert_called_once()
2828
mock_failure.labels.assert_not_called()
29-
mock_latency.labels.assert_called_once_with(endpoint_name='example')
29+
mock_latency.labels.assert_called_once_with(endpoint_name='example', status='success')
3030
mock_latency.labels.return_value.observe.assert_called_once()
3131

3232
@patch('check_http_endpoints.requests.request')
@@ -48,7 +48,7 @@ def test_check_endpoint_http_error(self, mock_success, mock_failure, mock_latenc
4848
mock_failure.labels.assert_called_once_with(endpoint_name='example')
4949
mock_failure.labels.return_value.inc.assert_called_once()
5050
mock_success.labels.assert_not_called()
51-
mock_latency.labels.assert_called_once_with(endpoint_name='example')
51+
mock_latency.labels.assert_called_once_with(endpoint_name='example', status='failure')
5252
mock_latency.labels.return_value.observe.assert_called_once()
5353

5454
@patch('check_http_endpoints.requests.request')
@@ -67,7 +67,7 @@ def test_check_endpoint_request_exception(self, mock_success, mock_failure, mock
6767
mock_failure.labels.assert_called_once_with(endpoint_name='example')
6868
mock_failure.labels.return_value.inc.assert_called_once()
6969
mock_success.labels.assert_not_called()
70-
mock_latency.labels.assert_called_once_with(endpoint_name='example')
70+
mock_latency.labels.assert_called_once_with(endpoint_name='example', status='failure')
7171
mock_latency.labels.return_value.observe.assert_called_once()
7272

7373
@patch('check_http_endpoints.time.time')
@@ -79,6 +79,7 @@ def test_latency_metric(self, mock_success, mock_failure, mock_latency, mock_req
7979
"""Test that latency is recorded for endpoint checks."""
8080
mock_response = MagicMock()
8181
mock_response.ok = True
82+
mock_response.elapsed.total_seconds.return_value = 0.5
8283
mock_request.return_value = mock_response
8384
mock_time.side_effect = [100, 100.5]
8485

@@ -87,7 +88,7 @@ def test_latency_metric(self, mock_success, mock_failure, mock_latency, mock_req
8788
endpoint = Endpoint(name="example", url="http://example.com")
8889

8990
checker.check_endpoint(endpoint)
90-
mock_latency.labels.assert_called_once_with(endpoint_name='example')
91+
mock_latency.labels.assert_called_once_with(endpoint_name='example', status='success')
9192
mock_latency.labels.return_value.observe.assert_called_once_with(0.5)
9293

9394
def test_init_invalid_parameters(self):

0 commit comments

Comments
 (0)