Skip to content

Commit 619a10e

Browse files
authored
intersphinx: Simplify _fetch_inventory() (sphinx-doc#13209)
The streams-based interfaces in intersphinx and ``sphinx.util.inventory`` are clever, but also complex and prevent using compression methods that don't support incrememntal decoding. This change refactors ``_fetch_inventory()`` to read all inventory content from disk or an HTTP request at once.
1 parent e17ed74 commit 619a10e

File tree

3 files changed

+61
-70
lines changed

3 files changed

+61
-70
lines changed

sphinx/ext/intersphinx/_cli.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from __future__ import annotations
44

55
import sys
6+
from pathlib import Path
67

78
from sphinx.ext.intersphinx._load import _fetch_inventory
89

@@ -29,7 +30,7 @@ class MockConfig:
2930
target_uri='',
3031
inv_location=filename,
3132
config=MockConfig(), # type: ignore[arg-type]
32-
srcdir='', # type: ignore[arg-type]
33+
srcdir=Path(''),
3334
)
3435
for key in sorted(inv_data or {}):
3536
print(key)

sphinx/ext/intersphinx/_load.py

Lines changed: 51 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from __future__ import annotations
44

55
import concurrent.futures
6+
import io
67
import os.path
78
import posixpath
89
import time
@@ -20,8 +21,6 @@
2021
if TYPE_CHECKING:
2122
from pathlib import Path
2223

23-
from urllib3.response import HTTPResponse
24-
2524
from sphinx.application import Sphinx
2625
from sphinx.config import Config
2726
from sphinx.ext.intersphinx._shared import (
@@ -31,7 +30,7 @@
3130
InventoryName,
3231
InventoryURI,
3332
)
34-
from sphinx.util.typing import Inventory, _ReadableStream
33+
from sphinx.util.typing import Inventory
3534

3635

3736
def validate_intersphinx_mapping(app: Sphinx, config: Config) -> None:
@@ -297,13 +296,38 @@ def _fetch_inventory(
297296
# and *inv_location* (actual location of the inventory file)
298297
# can be local or remote URIs
299298
if '://' in target_uri:
300-
# case: inv URI points to remote resource; strip any existing auth
299+
# inv URI points to remote resource; strip any existing auth
301300
target_uri = _strip_basic_auth(target_uri)
301+
if '://' in inv_location:
302+
raw_data, target_uri = _fetch_inventory_url(
303+
target_uri=target_uri, inv_location=inv_location, config=config
304+
)
305+
else:
306+
raw_data = _fetch_inventory_file(inv_location=inv_location, srcdir=srcdir)
307+
308+
stream = io.BytesIO(raw_data)
302309
try:
303-
if '://' in inv_location:
304-
f: _ReadableStream[bytes] = _read_from_url(inv_location, config=config)
305-
else:
306-
f = open(os.path.join(srcdir, inv_location), 'rb') # NoQA: SIM115
310+
invdata = InventoryFile.load(stream, target_uri, posixpath.join)
311+
except ValueError as exc:
312+
msg = f'unknown or unsupported inventory version: {exc!r}'
313+
raise ValueError(msg) from exc
314+
return invdata
315+
316+
317+
def _fetch_inventory_url(
318+
*, target_uri: InventoryURI, inv_location: str, config: Config
319+
) -> tuple[bytes, str]:
320+
try:
321+
with requests.get(
322+
inv_location,
323+
stream=True,
324+
timeout=config.intersphinx_timeout,
325+
_user_agent=config.user_agent,
326+
_tls_info=(config.tls_verify, config.tls_cacerts),
327+
) as r:
328+
r.raise_for_status()
329+
raw_data = r.content
330+
new_inv_location = r.url
307331
except Exception as err:
308332
err.args = (
309333
'intersphinx inventory %r not fetchable due to %s: %s',
@@ -312,25 +336,25 @@ def _fetch_inventory(
312336
str(err),
313337
)
314338
raise
339+
340+
if inv_location != new_inv_location:
341+
msg = __('intersphinx inventory has moved: %s -> %s')
342+
LOGGER.info(msg, inv_location, new_inv_location)
343+
344+
if target_uri in {
345+
inv_location,
346+
os.path.dirname(inv_location),
347+
os.path.dirname(inv_location) + '/',
348+
}:
349+
target_uri = os.path.dirname(new_inv_location)
350+
351+
return raw_data, target_uri
352+
353+
354+
def _fetch_inventory_file(*, inv_location: str, srcdir: Path) -> bytes:
315355
try:
316-
if hasattr(f, 'url'):
317-
new_inv_location = f.url
318-
if inv_location != new_inv_location:
319-
msg = __('intersphinx inventory has moved: %s -> %s')
320-
LOGGER.info(msg, inv_location, new_inv_location)
321-
322-
if target_uri in {
323-
inv_location,
324-
os.path.dirname(inv_location),
325-
os.path.dirname(inv_location) + '/',
326-
}:
327-
target_uri = os.path.dirname(new_inv_location)
328-
with f:
329-
try:
330-
invdata = InventoryFile.load(f, target_uri, posixpath.join)
331-
except ValueError as exc:
332-
msg = f'unknown or unsupported inventory version: {exc!r}'
333-
raise ValueError(msg) from exc
356+
with open(srcdir / inv_location, 'rb') as f:
357+
raw_data = f.read()
334358
except Exception as err:
335359
err.args = (
336360
'intersphinx inventory %r not readable due to %s: %s',
@@ -339,8 +363,7 @@ def _fetch_inventory(
339363
str(err),
340364
)
341365
raise
342-
else:
343-
return invdata
366+
return raw_data
344367

345368

346369
def _get_safe_url(url: str) -> str:
@@ -387,37 +410,3 @@ def _strip_basic_auth(url: str) -> str:
387410
if '@' in frags[1]:
388411
frags[1] = frags[1].split('@')[1]
389412
return urlunsplit(frags)
390-
391-
392-
def _read_from_url(url: str, *, config: Config) -> HTTPResponse:
393-
"""Reads data from *url* with an HTTP *GET*.
394-
395-
This function supports fetching from resources which use basic HTTP auth as
396-
laid out by RFC1738 § 3.1. See § 5 for grammar definitions for URLs.
397-
398-
.. seealso:
399-
400-
https://www.ietf.org/rfc/rfc1738.txt
401-
402-
:param url: URL of an HTTP resource
403-
:type url: ``str``
404-
405-
:return: data read from resource described by *url*
406-
:rtype: ``file``-like object
407-
"""
408-
r = requests.get(
409-
url,
410-
stream=True,
411-
timeout=config.intersphinx_timeout,
412-
_user_agent=config.user_agent,
413-
_tls_info=(config.tls_verify, config.tls_cacerts),
414-
)
415-
r.raise_for_status()
416-
417-
# For inv_location / new_inv_location
418-
r.raw.url = r.url # type: ignore[union-attr]
419-
420-
# Decode content-body based on the header.
421-
# xref: https://github.com/psf/requests/issues/2155
422-
r.raw.decode_content = True
423-
return r.raw

tests/test_extensions/test_ext_intersphinx.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -70,14 +70,15 @@ def set_config(app, mapping):
7070

7171

7272
@mock.patch('sphinx.ext.intersphinx._load.InventoryFile')
73-
@mock.patch('sphinx.ext.intersphinx._load._read_from_url')
73+
@mock.patch('sphinx.ext.intersphinx._load.requests.get')
7474
@pytest.mark.sphinx('html', testroot='root')
75-
def test_fetch_inventory_redirection(_read_from_url, InventoryFile, app): # NoQA: PT019
75+
def test_fetch_inventory_redirection(get_request, InventoryFile, app):
76+
mocked_get = get_request.return_value.__enter__.return_value
7677
intersphinx_setup(app)
77-
_read_from_url().readline.return_value = b'# Sphinx inventory version 2'
78+
mocked_get.content = b'# Sphinx inventory version 2'
7879

7980
# same uri and inv, not redirected
80-
_read_from_url().url = 'https://hostname/' + INVENTORY_FILENAME
81+
mocked_get.url = 'https://hostname/' + INVENTORY_FILENAME
8182
_fetch_inventory(
8283
target_uri='https://hostname/',
8384
inv_location='https://hostname/' + INVENTORY_FILENAME,
@@ -90,7 +91,7 @@ def test_fetch_inventory_redirection(_read_from_url, InventoryFile, app): # NoQ
9091
# same uri and inv, redirected
9192
app.status.seek(0)
9293
app.status.truncate(0)
93-
_read_from_url().url = 'https://hostname/new/' + INVENTORY_FILENAME
94+
mocked_get.url = 'https://hostname/new/' + INVENTORY_FILENAME
9495

9596
_fetch_inventory(
9697
target_uri='https://hostname/',
@@ -108,7 +109,7 @@ def test_fetch_inventory_redirection(_read_from_url, InventoryFile, app): # NoQ
108109
# different uri and inv, not redirected
109110
app.status.seek(0)
110111
app.status.truncate(0)
111-
_read_from_url().url = 'https://hostname/new/' + INVENTORY_FILENAME
112+
mocked_get.url = 'https://hostname/new/' + INVENTORY_FILENAME
112113

113114
_fetch_inventory(
114115
target_uri='https://hostname/',
@@ -122,7 +123,7 @@ def test_fetch_inventory_redirection(_read_from_url, InventoryFile, app): # NoQ
122123
# different uri and inv, redirected
123124
app.status.seek(0)
124125
app.status.truncate(0)
125-
_read_from_url().url = 'https://hostname/other/' + INVENTORY_FILENAME
126+
mocked_get.url = 'https://hostname/other/' + INVENTORY_FILENAME
126127

127128
_fetch_inventory(
128129
target_uri='https://hostname/',

0 commit comments

Comments
 (0)