|  | 
|  | 1 | + | 
|  | 2 | +import logging | 
|  | 3 | +import time | 
|  | 4 | +import requests | 
|  | 5 | +import concurrent.futures | 
|  | 6 | +from pydantic import BaseModel, Field | 
|  | 7 | +from typing import List | 
|  | 8 | +from prometheus_client import Counter, Histogram | 
|  | 9 | +import socket | 
|  | 10 | +from urllib.parse import urlparse | 
|  | 11 | + | 
|  | 12 | +log = logging.getLogger('check.http_endpoints') | 
|  | 13 | + | 
|  | 14 | +HTTP_ENDPOINT_SUCCESS_TOTAL = Counter( | 
|  | 15 | +    'http_endpoint_success_total', | 
|  | 16 | +    'Total number of successful HTTP endpoint checks', | 
|  | 17 | +    ['endpoint_name'] | 
|  | 18 | +) | 
|  | 19 | +HTTP_ENDPOINT_FAILURE_TOTAL = Counter( | 
|  | 20 | +    'http_endpoint_failure_total', | 
|  | 21 | +    'Total number of failed HTTP endpoint checks', | 
|  | 22 | +    ['endpoint_name'] | 
|  | 23 | +) | 
|  | 24 | +HTTP_ENDPOINT_LATENCY_SECONDS = Histogram( | 
|  | 25 | +    'http_endpoint_latency_seconds', | 
|  | 26 | +    'Latency of HTTP endpoint checks in seconds', | 
|  | 27 | +    ['endpoint_name', 'status'] | 
|  | 28 | +) | 
|  | 29 | + | 
|  | 30 | +class Endpoint(BaseModel): | 
|  | 31 | +    name: str | 
|  | 32 | +    url: str | 
|  | 33 | +    timeout: int = 10 | 
|  | 34 | +    method: str = 'GET' | 
|  | 35 | + | 
|  | 36 | +class HttpEndpointsParameters(BaseModel): | 
|  | 37 | +    endpoints: List[Endpoint] = Field(..., min_items=1) | 
|  | 38 | + | 
|  | 39 | +class CheckHttpEndpoints: | 
|  | 40 | +    def __init__(self, parameters: dict): | 
|  | 41 | +        self.params = HttpEndpointsParameters(**parameters) | 
|  | 42 | + | 
|  | 43 | +    def is_healthy(self): | 
|  | 44 | +        with concurrent.futures.ThreadPoolExecutor() as executor: | 
|  | 45 | +            futures = {executor.submit(self.check_endpoint, endpoint): endpoint for endpoint in self.params.endpoints} | 
|  | 46 | +            for future in concurrent.futures.as_completed(futures): | 
|  | 47 | +                if not future.result(): | 
|  | 48 | +                    return False | 
|  | 49 | +        log.info("Check http endpoints passed") | 
|  | 50 | +        return True | 
|  | 51 | + | 
|  | 52 | +    def check_endpoint(self, endpoint: Endpoint): | 
|  | 53 | +        # Pre-resolve DNS to warm up the cache for timing purposes. | 
|  | 54 | +        try: | 
|  | 55 | +            parsed_url = urlparse(endpoint.url) | 
|  | 56 | +            hostname = parsed_url.hostname | 
|  | 57 | +            port = parsed_url.port or {'http': 80, 'https': 443}.get(parsed_url.scheme, 80) | 
|  | 58 | +            if hostname: | 
|  | 59 | +                socket.getaddrinfo(hostname, port) | 
|  | 60 | +        except (socket.gaierror, TypeError) as e: | 
|  | 61 | +            # Log the pre-resolution failure, but proceed. The actual request will handle the error. | 
|  | 62 | +            log.warning(f"DNS pre-resolution failed for {hostname}: {e}") | 
|  | 63 | + | 
|  | 64 | +        start_time = time.time() | 
|  | 65 | +        try: | 
|  | 66 | +            response = requests.request(endpoint.method, endpoint.url, timeout=endpoint.timeout) | 
|  | 67 | +            if not response.ok: | 
|  | 68 | +                log.error(f"HTTP endpoint {endpoint.name} ({endpoint.url}) returned status code {response.status_code}") | 
|  | 69 | +                HTTP_ENDPOINT_FAILURE_TOTAL.labels(endpoint_name=endpoint.name).inc() | 
|  | 70 | +                HTTP_ENDPOINT_LATENCY_SECONDS.labels(endpoint_name=endpoint.name, status='failure').observe(response.elapsed.total_seconds()) | 
|  | 71 | +                return False | 
|  | 72 | +        except requests.exceptions.RequestException as e: | 
|  | 73 | +            log.error(f"Failed to connect to HTTP endpoint {endpoint.name} ({endpoint.url}): {e}") | 
|  | 74 | +            HTTP_ENDPOINT_FAILURE_TOTAL.labels(endpoint_name=endpoint.name).inc() | 
|  | 75 | +            HTTP_ENDPOINT_LATENCY_SECONDS.labels(endpoint_name=endpoint.name, status='failure').observe(time.time() - start_time) | 
|  | 76 | +            return False | 
|  | 77 | + | 
|  | 78 | +        HTTP_ENDPOINT_SUCCESS_TOTAL.labels(endpoint_name=endpoint.name).inc() | 
|  | 79 | +        HTTP_ENDPOINT_LATENCY_SECONDS.labels(endpoint_name=endpoint.name, status='success').observe(response.elapsed.total_seconds()) | 
|  | 80 | +        return True | 
0 commit comments