1717import httpx
1818import jsonschema
1919import yaml
20+ from httpx_retries import Retry , RetryTransport
2021from PIL import Image
2122
2223from ._logging import log , setup_logging
@@ -50,8 +51,9 @@ def append(self, obj: Exception | None) -> None:
5051class LinkChecker :
5152 """Track known links and validate URLs."""
5253
53- def __init__ (self ) -> None :
54+ def __init__ (self , client : httpx . Client ) -> None :
5455 self .known_links : set [str ] = set ()
56+ self .client = client
5557
5658 def check_and_register (self , url : str , context : str ) -> None | ValidationError :
5759 """Check if URL is duplicate, validate it exists, and register it.
@@ -67,7 +69,12 @@ def check_and_register(self, url: str, context: str) -> None | ValidationError:
6769 msg = f"{ context } : Duplicate link: { url } "
6870 return ValidationError (msg )
6971
70- response = httpx .head (url , follow_redirects = True )
72+ try :
73+ response = self .client .head (url )
74+ except Exception as e :
75+ msg = f"URL { url } is not reachable: { e } "
76+ return ValidationError (msg )
77+
7178 if response .status_code != httpx .codes .OK :
7279 msg = f"URL { url } is not reachable (error { response .status_code } ). "
7380 return ValidationError (msg )
@@ -79,7 +86,8 @@ def check_and_register(self, url: str, context: str) -> None | ValidationError:
7986class GitHubUserValidator :
8087 """Validate GitHub usernames using the GitHub API."""
8188
82- def __init__ (self , github_token : str | None = None ) -> None :
89+ def __init__ (self , client : httpx .Client , github_token : str | None = None ) -> None :
90+ self .client = client
8391 self .github_token = github_token
8492 self .validated_users : set [str ] = set ()
8593
@@ -102,7 +110,14 @@ def validate_usernames(self, usernames: Sequence[str], context: str) -> None | V
102110 headers ["Authorization" ] = f"token { self .github_token } "
103111
104112 q = "\n " .join (f"user{ i } : user(login: { json .dumps (name )} ) {{ login }}" for i , name in enumerate (unvalidated ))
105- response = httpx .post ("https://api.github.com/graphql" , headers = headers , json = {"query" : f"query {{ { q } }}" })
113+
114+ try :
115+ response = self .client .post (
116+ "https://api.github.com/graphql" , headers = headers , json = {"query" : f"query {{ { q } }}" }
117+ )
118+ except Exception as e :
119+ msg = f"{ context } : Failed to validate GitHub users { unvalidated !r} : { e } "
120+ return ValidationError (msg )
106121
107122 if response .status_code != httpx .codes .OK :
108123 msg = f"{ context } : Failed to validate GitHub users { unvalidated !r} (error { response .status_code } )"
@@ -122,7 +137,8 @@ def validate_usernames(self, usernames: Sequence[str], context: str) -> None | V
122137class PyPIValidator :
123138 """Validate PyPI package names against the PyPI API."""
124139
125- def __init__ (self ) -> None :
140+ def __init__ (self , client : httpx .Client ) -> None :
141+ self .client = client
126142 self .validated_packages : set [str ] = set ()
127143
128144 def validate_package (self , package_name : str , context : str ) -> None | ValidationError :
@@ -138,7 +154,11 @@ def validate_package(self, package_name: str, context: str) -> None | Validation
138154 if package_name in self .validated_packages :
139155 return None
140156
141- response = httpx .head (f"https://pypi.org/pypi/{ package_name } /json" , follow_redirects = True )
157+ try :
158+ response = self .client .head (f"https://pypi.org/pypi/{ package_name } /json" )
159+ except Exception as e :
160+ msg = f"{ context } : Failed to validate PyPI package { package_name !r} : { e } "
161+ return ValidationError (msg )
142162
143163 if response .status_code == httpx .codes .NOT_FOUND :
144164 msg = f"{ context } : PyPI package { package_name !r} does not exist"
@@ -155,7 +175,8 @@ def validate_package(self, package_name: str, context: str) -> None | Validation
155175class CondaValidator :
156176 """Validate Conda package identifiers using the Anaconda API."""
157177
158- def __init__ (self ) -> None :
178+ def __init__ (self , client : httpx .Client ) -> None :
179+ self .client = client
159180 self .validated_packages : set [str ] = set ()
160181
161182 def validate_package (self , package_spec : str , context : str ) -> None | ValidationError :
@@ -179,10 +200,11 @@ def validate_package(self, package_spec: str, context: str) -> None | Validation
179200 channel , package_name = package_spec .split ("::" , 1 )
180201
181202 # Check package exists on the channel
182- response = httpx .head (
183- f"https://api.anaconda.org/package/{ channel } /{ package_name } " ,
184- follow_redirects = True ,
185- )
203+ try :
204+ response = self .client .head (f"https://api.anaconda.org/package/{ channel } /{ package_name } " )
205+ except Exception as e :
206+ msg = f"{ context } : Failed to validate Conda package '{ package_spec } ': { e } "
207+ return ValidationError (msg )
186208
187209 if response .status_code == httpx .codes .NOT_FOUND :
188210 msg = f"{ context } : Conda package '{ package_spec } ' does not exist"
@@ -199,7 +221,8 @@ def validate_package(self, package_spec: str, context: str) -> None | Validation
199221class CRANValidator :
200222 """Validate CRAN package names using the CRAN API."""
201223
202- def __init__ (self ) -> None :
224+ def __init__ (self , client : httpx .Client ) -> None :
225+ self .client = client
203226 self .validated_packages : set [str ] = set ()
204227
205228 def validate_package (self , package_name : str , context : str ) -> None | ValidationError :
@@ -216,10 +239,11 @@ def validate_package(self, package_name: str, context: str) -> None | Validation
216239 return None
217240
218241 # CRAN packages can be checked via the packages database
219- response = httpx .head (
220- f"https://crandb.r-pkg.org/{ package_name } " ,
221- follow_redirects = True ,
222- )
242+ try :
243+ response = self .client .head (f"https://crandb.r-pkg.org/{ package_name } " )
244+ except Exception as e :
245+ msg = f"{ context } : Failed to validate CRAN package '{ package_name } ': { e } "
246+ return ValidationError (msg )
223247
224248 if response .status_code == httpx .codes .NOT_FOUND :
225249 msg = f"{ context } : CRAN package '{ package_name } ' does not exist"
@@ -260,16 +284,20 @@ def validate_packages(
260284 """Find all package `meta.yaml` files in the registry dir and yield package records."""
261285 schema = json .loads (schema_file .read_bytes ())
262286
287+ # Create HTTP client with retry configuration using httpx_retries transport
288+ retry_transport = RetryTransport (retry = Retry (total = 3 , backoff_factor = 2 ))
289+ retry_client = httpx .Client (follow_redirects = True , timeout = 30.0 , transport = retry_transport )
290+
263291 # using different link checkers,
264292 # because each of them may point to the same URL and this wouldn't qualify as duplicate
265- link_checker_home = LinkChecker ()
266- link_checker_docs = LinkChecker ()
267- link_checker_tutorials = LinkChecker ()
268-
269- github_validator = GitHubUserValidator (github_token )
270- pypi_validator = PyPIValidator ()
271- conda_validator = CondaValidator ()
272- cran_validator = CRANValidator ()
293+ link_checker_home = LinkChecker (retry_client )
294+ link_checker_docs = LinkChecker (retry_client )
295+ link_checker_tutorials = LinkChecker (retry_client )
296+
297+ github_validator = GitHubUserValidator (retry_client , github_token )
298+ pypi_validator = PyPIValidator (retry_client )
299+ conda_validator = CondaValidator (retry_client )
300+ cran_validator = CRANValidator (retry_client )
273301
274302 errors : defaultdict [str , ErrorList ] = defaultdict (ErrorList )
275303 package_metadata : list [ScverseEcosystemPackages ] = []
0 commit comments