|
8 | 8 | import os |
9 | 9 | import shutil |
10 | 10 | import sys |
| 11 | +import time |
11 | 12 | from collections import defaultdict |
12 | 13 | from importlib.resources import files |
13 | 14 | from pathlib import Path |
14 | 15 | from textwrap import dedent |
15 | | -from typing import TYPE_CHECKING, cast |
| 16 | +from typing import TYPE_CHECKING, Any, cast |
16 | 17 |
|
17 | 18 | import httpx |
18 | 19 | import jsonschema |
|
22 | 23 | from ._logging import log, setup_logging |
23 | 24 |
|
24 | 25 | if TYPE_CHECKING: |
25 | | - from collections.abc import Iterable, Mapping, Sequence |
| 26 | + from collections.abc import Callable, Iterable, Mapping, Sequence |
26 | 27 | from importlib.resources.abc import Traversable |
27 | 28 |
|
28 | 29 | from .schema import ScverseEcosystemPackages # pyright: ignore[reportMissingModuleSource] |
|
33 | 34 | IMAGE_SIZE = 512 |
34 | 35 |
|
35 | 36 |
|
| 37 | +def _retry_with_backoff( |
| 38 | + function: Callable[..., httpx.Response], *, wait_time: int = 5, attempts: int = 3, **kwargs: Any |
| 39 | +) -> httpx.Response: |
| 40 | + """ |
| 41 | + Attempt a http request `attempts` times with exponential backoff |
| 42 | +
|
| 43 | + Use `function` to make the request. `kwargs` are passed to `function`. |
| 44 | + Wait time is multiplied by 2 after every attempt. |
| 45 | + """ |
| 46 | + httpx_exception = None |
| 47 | + try: |
| 48 | + response = function(**kwargs) |
| 49 | + |
| 50 | + # Success - return immediately |
| 51 | + # Don't retry on client errors (4xx) as they won't resolve with retries |
| 52 | + if response.status_code == httpx.codes.OK or 400 <= response.status_code < 500: # noqa: PLR2004 (allow numeric ocnstants in condition) |
| 53 | + return response |
| 54 | + |
| 55 | + except (httpx.TimeoutException, httpx.NetworkError, httpx.ConnectError) as e: |
| 56 | + httpx_exception = e |
| 57 | + |
| 58 | + remaining_attempts = attempts - 1 |
| 59 | + if remaining_attempts > 0: |
| 60 | + log.info(f"Retry after {wait_time}s. Remainint attempts: {remaining_attempts}") |
| 61 | + time.sleep(wait_time) |
| 62 | + return _retry_with_backoff(function, wait_time=wait_time * 2, n=remaining_attempts, **kwargs) |
| 63 | + |
| 64 | + if httpx_exception is not None: |
| 65 | + raise httpx_exception |
| 66 | + else: |
| 67 | + return response |
| 68 | + |
| 69 | + |
36 | 70 | class ValidationError(Exception): |
37 | 71 | pass |
38 | 72 |
|
@@ -67,7 +101,12 @@ def check_and_register(self, url: str, context: str) -> None | ValidationError: |
67 | 101 | msg = f"{context}: Duplicate link: {url}" |
68 | 102 | return ValidationError(msg) |
69 | 103 |
|
70 | | - response = httpx.head(url, follow_redirects=True) |
| 104 | + try: |
| 105 | + response = _retry_with_backoff(httpx.head, url=url, follow_redirects=True, timeout=30.0) |
| 106 | + except Exception as e: |
| 107 | + msg = f"URL {url} is not reachable: {e}" |
| 108 | + return ValidationError(msg) |
| 109 | + |
71 | 110 | if response.status_code != httpx.codes.OK: |
72 | 111 | msg = f"URL {url} is not reachable (error {response.status_code}). " |
73 | 112 | return ValidationError(msg) |
@@ -102,7 +141,18 @@ def validate_usernames(self, usernames: Sequence[str], context: str) -> None | V |
102 | 141 | headers["Authorization"] = f"token {self.github_token}" |
103 | 142 |
|
104 | 143 | q = "\n".join(f"user{i}: user(login: {json.dumps(name)}) {{ login }}" for i, name in enumerate(unvalidated)) |
105 | | - response = httpx.post("https://api.github.com/graphql", headers=headers, json={"query": f"query {{ {q} }}"}) |
| 144 | + |
| 145 | + try: |
| 146 | + response = _retry_with_backoff( |
| 147 | + httpx.post, |
| 148 | + url="https://api.github.com/graphql", |
| 149 | + headers=headers, |
| 150 | + json={"query": f"query {{ {q} }}"}, |
| 151 | + timeout=30.0, |
| 152 | + ) |
| 153 | + except Exception as e: |
| 154 | + msg = f"{context}: Failed to validate GitHub users {unvalidated!r}: {e}" |
| 155 | + return ValidationError(msg) |
106 | 156 |
|
107 | 157 | if response.status_code != httpx.codes.OK: |
108 | 158 | msg = f"{context}: Failed to validate GitHub users {unvalidated!r} (error {response.status_code})" |
@@ -138,7 +188,13 @@ def validate_package(self, package_name: str, context: str) -> None | Validation |
138 | 188 | if package_name in self.validated_packages: |
139 | 189 | return None |
140 | 190 |
|
141 | | - response = httpx.head(f"https://pypi.org/pypi/{package_name}/json", follow_redirects=True) |
| 191 | + try: |
| 192 | + response = _retry_with_backoff( |
| 193 | + httpx.head, url=f"https://pypi.org/pypi/{package_name}/json", follow_redirects=True, timeout=30.0 |
| 194 | + ) |
| 195 | + except Exception as e: |
| 196 | + msg = f"{context}: Failed to validate PyPI package {package_name!r}: {e}" |
| 197 | + return ValidationError(msg) |
142 | 198 |
|
143 | 199 | if response.status_code == httpx.codes.NOT_FOUND: |
144 | 200 | msg = f"{context}: PyPI package {package_name!r} does not exist" |
@@ -179,10 +235,16 @@ def validate_package(self, package_spec: str, context: str) -> None | Validation |
179 | 235 | channel, package_name = package_spec.split("::", 1) |
180 | 236 |
|
181 | 237 | # Check package exists on the channel |
182 | | - response = httpx.head( |
183 | | - f"https://api.anaconda.org/package/{channel}/{package_name}", |
184 | | - follow_redirects=True, |
185 | | - ) |
| 238 | + try: |
| 239 | + response = _retry_with_backoff( |
| 240 | + httpx.head, |
| 241 | + url=f"https://api.anaconda.org/package/{channel}/{package_name}", |
| 242 | + follow_redirects=True, |
| 243 | + timeout=30.0, |
| 244 | + ) |
| 245 | + except Exception as e: |
| 246 | + msg = f"{context}: Failed to validate Conda package '{package_spec}': {e}" |
| 247 | + return ValidationError(msg) |
186 | 248 |
|
187 | 249 | if response.status_code == httpx.codes.NOT_FOUND: |
188 | 250 | msg = f"{context}: Conda package '{package_spec}' does not exist" |
@@ -216,10 +278,13 @@ def validate_package(self, package_name: str, context: str) -> None | Validation |
216 | 278 | return None |
217 | 279 |
|
218 | 280 | # CRAN packages can be checked via the packages database |
219 | | - response = httpx.head( |
220 | | - f"https://crandb.r-pkg.org/{package_name}", |
221 | | - follow_redirects=True, |
222 | | - ) |
| 281 | + try: |
| 282 | + response = _retry_with_backoff( |
| 283 | + httpx.head, url=f"https://crandb.r-pkg.org/{package_name}", follow_redirects=True, timeout=30.0 |
| 284 | + ) |
| 285 | + except Exception as e: |
| 286 | + msg = f"{context}: Failed to validate CRAN package '{package_name}': {e}" |
| 287 | + return ValidationError(msg) |
223 | 288 |
|
224 | 289 | if response.status_code == httpx.codes.NOT_FOUND: |
225 | 290 | msg = f"{context}: CRAN package '{package_name}' does not exist" |
|
0 commit comments