From d0813e8a39736f1a376d4176a308bb1886244b75 Mon Sep 17 00:00:00 2001
From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com>
Date: Wed, 11 May 2022 04:32:43 -0400
Subject: [PATCH 01/18] create LinkHash and check out dist-info-metadata (PEP
658)
- use PEP 658 dist-info-metadata when --use-feature=fast-deps is on
- add testing
---
src/pip/_internal/index/collector.py | 131 ++------
src/pip/_internal/metadata/__init__.py | 21 ++
src/pip/_internal/metadata/base.py | 15 +
.../_internal/metadata/importlib/_dists.py | 11 +
src/pip/_internal/metadata/pkg_resources.py | 19 ++
src/pip/_internal/models/link.py | 239 ++++++++++++--
src/pip/_internal/operations/prepare.py | 52 ++-
tests/functional/test_download.py | 310 +++++++++++++++++-
tests/lib/server.py | 8 -
tests/unit/test_collector.py | 79 ++++-
10 files changed, 723 insertions(+), 162 deletions(-)
diff --git a/src/pip/_internal/index/collector.py b/src/pip/_internal/index/collector.py
index f4e6e221f5d..9ae213197dd 100644
--- a/src/pip/_internal/index/collector.py
+++ b/src/pip/_internal/index/collector.py
@@ -9,10 +9,8 @@
import json
import logging
import os
-import re
import urllib.parse
import urllib.request
-import xml.etree.ElementTree
from html.parser import HTMLParser
from optparse import Values
from typing import (
@@ -34,12 +32,12 @@
from pip._vendor.requests.exceptions import RetryError, SSLError
from pip._internal.exceptions import NetworkConnectionError
-from pip._internal.models.link import Link
+from pip._internal.models.link import HTMLElement, Link
from pip._internal.models.search_scope import SearchScope
from pip._internal.network.session import PipSession
from pip._internal.network.utils import raise_for_status
from pip._internal.utils.filetypes import is_archive_file
-from pip._internal.utils.misc import pairwise, redact_auth_from_url
+from pip._internal.utils.misc import redact_auth_from_url
from pip._internal.vcs import vcs
from .sources import CandidatesFromPage, LinkSource, build_source
@@ -51,7 +49,6 @@
logger = logging.getLogger(__name__)
-HTMLElement = xml.etree.ElementTree.Element
ResponseHeaders = MutableMapping[str, str]
@@ -191,92 +188,25 @@ def _get_encoding_from_headers(headers: ResponseHeaders) -> Optional[str]:
return None
-def _clean_url_path_part(part: str) -> str:
- """
- Clean a "part" of a URL path (i.e. after splitting on "@" characters).
- """
- # We unquote prior to quoting to make sure nothing is double quoted.
- return urllib.parse.quote(urllib.parse.unquote(part))
-
-
-def _clean_file_url_path(part: str) -> str:
- """
- Clean the first part of a URL path that corresponds to a local
- filesystem path (i.e. the first part after splitting on "@" characters).
- """
- # We unquote prior to quoting to make sure nothing is double quoted.
- # Also, on Windows the path part might contain a drive letter which
- # should not be quoted. On Linux where drive letters do not
- # exist, the colon should be quoted. We rely on urllib.request
- # to do the right thing here.
- return urllib.request.pathname2url(urllib.request.url2pathname(part))
-
+def _determine_base_url(document: HTMLElement, page_url: str) -> str:
+ """Determine the HTML document's base URL.
-# percent-encoded: /
-_reserved_chars_re = re.compile("(@|%2F)", re.IGNORECASE)
+ This looks for a ```` tag in the HTML document. If present, its href
+ attribute denotes the base URL of anchor tags in the document. If there is
+ no such tag (or if it does not have a valid href attribute), the HTML
+ file's URL is used as the base URL.
+ :param document: An HTML document representation. The current
+ implementation expects the result of ``html5lib.parse()``.
+ :param page_url: The URL of the HTML document.
-def _clean_url_path(path: str, is_local_path: bool) -> str:
+ TODO: Remove when `html5lib` is dropped.
"""
- Clean the path portion of a URL.
- """
- if is_local_path:
- clean_func = _clean_file_url_path
- else:
- clean_func = _clean_url_path_part
-
- # Split on the reserved characters prior to cleaning so that
- # revision strings in VCS URLs are properly preserved.
- parts = _reserved_chars_re.split(path)
-
- cleaned_parts = []
- for to_clean, reserved in pairwise(itertools.chain(parts, [""])):
- cleaned_parts.append(clean_func(to_clean))
- # Normalize %xx escapes (e.g. %2f -> %2F)
- cleaned_parts.append(reserved.upper())
-
- return "".join(cleaned_parts)
-
-
-def _clean_link(url: str) -> str:
- """
- Make sure a link is fully quoted.
- For example, if ' ' occurs in the URL, it will be replaced with "%20",
- and without double-quoting other characters.
- """
- # Split the URL into parts according to the general structure
- # `scheme://netloc/path;parameters?query#fragment`.
- result = urllib.parse.urlparse(url)
- # If the netloc is empty, then the URL refers to a local filesystem path.
- is_local_path = not result.netloc
- path = _clean_url_path(result.path, is_local_path=is_local_path)
- return urllib.parse.urlunparse(result._replace(path=path))
-
-
-def _create_link_from_element(
- element_attribs: Dict[str, Optional[str]],
- page_url: str,
- base_url: str,
-) -> Optional[Link]:
- """
- Convert an anchor element's attributes in a simple repository page to a Link.
- """
- href = element_attribs.get("href")
- if not href:
- return None
-
- url = _clean_link(urllib.parse.urljoin(base_url, href))
- pyrequire = element_attribs.get("data-requires-python")
- yanked_reason = element_attribs.get("data-yanked")
-
- link = Link(
- url,
- comes_from=page_url,
- requires_python=pyrequire,
- yanked_reason=yanked_reason,
- )
-
- return link
+ for base in document.findall(".//base"):
+ href = base.get("href")
+ if href is not None:
+ return href
+ return page_url
class CacheablePageContent:
@@ -326,25 +256,10 @@ def parse_links(page: "IndexContent") -> Iterable[Link]:
if content_type_l.startswith("application/vnd.pypi.simple.v1+json"):
data = json.loads(page.content)
for file in data.get("files", []):
- file_url = file.get("url")
- if file_url is None:
+ link = Link.from_json(file, page.url)
+ if link is None:
continue
-
- # The Link.yanked_reason expects an empty string instead of a boolean.
- yanked_reason = file.get("yanked")
- if yanked_reason and not isinstance(yanked_reason, str):
- yanked_reason = ""
- # The Link.yanked_reason expects None instead of False
- elif not yanked_reason:
- yanked_reason = None
-
- yield Link(
- _clean_link(urllib.parse.urljoin(page.url, file_url)),
- comes_from=page.url,
- requires_python=file.get("requires-python"),
- yanked_reason=yanked_reason,
- hashes=file.get("hashes", {}),
- )
+ yield link
return
parser = HTMLLinkParser(page.url)
@@ -354,11 +269,7 @@ def parse_links(page: "IndexContent") -> Iterable[Link]:
url = page.url
base_url = parser.base_url or url
for anchor in parser.anchors:
- link = _create_link_from_element(
- anchor,
- page_url=url,
- base_url=base_url,
- )
+ link = Link.from_element(anchor, page_url=url, base_url=base_url)
if link is None:
continue
yield link
diff --git a/src/pip/_internal/metadata/__init__.py b/src/pip/_internal/metadata/__init__.py
index 8cd0fda6851..c41070cbab6 100644
--- a/src/pip/_internal/metadata/__init__.py
+++ b/src/pip/_internal/metadata/__init__.py
@@ -103,3 +103,24 @@ def get_wheel_distribution(wheel: Wheel, canonical_name: str) -> BaseDistributio
:param canonical_name: Normalized project name of the given wheel.
"""
return select_backend().Distribution.from_wheel(wheel, canonical_name)
+
+
+def get_metadata_distribution(
+ metadata_path: str,
+ filename: str,
+ canonical_name: str,
+) -> BaseDistribution:
+ """Get the representation of the specified METADATA file.
+
+ This returns a Distribution instance from the chosen backend based on the contents
+ of the file at ``metadata_path``.
+
+ :param metadata_path: Path to the METADATA file.
+ :param filename: Filename for the dist this metadata represents.
+ :param canonical_name: Normalized project name of the given dist.
+ """
+ return select_backend().Distribution.from_metadata_file(
+ metadata_path,
+ filename,
+ canonical_name,
+ )
diff --git a/src/pip/_internal/metadata/base.py b/src/pip/_internal/metadata/base.py
index 151fd6d009e..ef92b40ad59 100644
--- a/src/pip/_internal/metadata/base.py
+++ b/src/pip/_internal/metadata/base.py
@@ -113,6 +113,21 @@ def from_directory(cls, directory: str) -> "BaseDistribution":
"""
raise NotImplementedError()
+ @classmethod
+ def from_metadata_file(
+ cls,
+ metadata_path: str,
+ filename: str,
+ project_name: str,
+ ) -> "BaseDistribution":
+ """Load the distribution from the contents of a METADATA file.
+
+ :param metadata: The path to a METADATA file.
+ :param filename: File name for the dist with this metadata.
+ :param project_name: Name of the project this dist represents.
+ """
+ raise NotImplementedError()
+
@classmethod
def from_wheel(cls, wheel: "Wheel", name: str) -> "BaseDistribution":
"""Load the distribution from a given wheel.
diff --git a/src/pip/_internal/metadata/importlib/_dists.py b/src/pip/_internal/metadata/importlib/_dists.py
index fbf9a93218a..5d5ef7860ad 100644
--- a/src/pip/_internal/metadata/importlib/_dists.py
+++ b/src/pip/_internal/metadata/importlib/_dists.py
@@ -109,6 +109,17 @@ def from_directory(cls, directory: str) -> BaseDistribution:
dist = importlib.metadata.Distribution.at(info_location)
return cls(dist, info_location, info_location.parent)
+ @classmethod
+ def from_metadata_file(
+ cls,
+ metadata_path: str,
+ filename: str,
+ project_name: str,
+ ) -> BaseDistribution:
+ metadata_location = pathlib.Path(metadata_path)
+ dist = importlib.metadata.Distribution.at(metadata_location.parent)
+ return cls(dist, metadata_location.parent, None)
+
@classmethod
def from_wheel(cls, wheel: Wheel, name: str) -> BaseDistribution:
try:
diff --git a/src/pip/_internal/metadata/pkg_resources.py b/src/pip/_internal/metadata/pkg_resources.py
index bf79ba139c0..29462223eb0 100644
--- a/src/pip/_internal/metadata/pkg_resources.py
+++ b/src/pip/_internal/metadata/pkg_resources.py
@@ -92,6 +92,25 @@ def from_directory(cls, directory: str) -> BaseDistribution:
dist = dist_cls(base_dir, project_name=dist_name, metadata=metadata)
return cls(dist)
+ @classmethod
+ def from_metadata_file(
+ cls,
+ metadata_path: str,
+ filename: str,
+ project_name: str,
+ ) -> BaseDistribution:
+ with open(metadata_path, "rb") as f:
+ metadata = f.read()
+ metadata_text = {
+ "METADATA": metadata,
+ }
+ dist = pkg_resources.DistInfoDistribution(
+ location=filename,
+ metadata=WheelMetadata(metadata_text, filename),
+ project_name=project_name,
+ )
+ return cls(dist)
+
@classmethod
def from_wheel(cls, wheel: Wheel, name: str) -> BaseDistribution:
try:
diff --git a/src/pip/_internal/models/link.py b/src/pip/_internal/models/link.py
index 8fd1c3d9960..8fc42966816 100644
--- a/src/pip/_internal/models/link.py
+++ b/src/pip/_internal/models/link.py
@@ -1,11 +1,15 @@
import functools
+import itertools
import logging
import os
import posixpath
import re
import urllib.parse
+import xml.etree.ElementTree
+from dataclasses import dataclass
from typing import (
TYPE_CHECKING,
+ Any,
Dict,
List,
Mapping,
@@ -18,6 +22,7 @@
from pip._internal.utils.filetypes import WHEEL_EXTENSION
from pip._internal.utils.hashes import Hashes
from pip._internal.utils.misc import (
+ pairwise,
redact_auth_from_url,
split_auth_from_netloc,
splitext,
@@ -31,11 +36,121 @@
logger = logging.getLogger(__name__)
+HTMLElement = xml.etree.ElementTree.Element
+
# Order matters, earlier hashes have a precedence over later hashes for what
# we will pick to use.
_SUPPORTED_HASHES = ("sha512", "sha384", "sha256", "sha224", "sha1", "md5")
+@dataclass(frozen=True)
+class LinkHash:
+ """Links to content may have embedded hash values. This class parses those.
+
+ `name` must be any member of `_SUPPORTED_HASHES`.
+
+ This class can be converted to and from `ArchiveInfo`. While ArchiveInfo intends to
+ be JSON-serializable to conform to PEP 610, this class contains the logic for
+ parsing a hash name and value for correctness, and then checking whether that hash
+ conforms to a schema with `.is_hash_allowed()`."""
+
+ name: str
+ value: str
+
+ _hash_re = re.compile(
+ r"({choices})=(.*)".format(
+ choices="|".join(re.escape(hash_name) for hash_name in _SUPPORTED_HASHES)
+ ),
+ )
+
+ def __post_init__(self) -> None:
+ assert self._hash_re.match(f"{self.name}={self.value}")
+
+ @classmethod
+ @functools.lru_cache(maxsize=None)
+ def split_hash_name_and_value(cls, url: str) -> Optional["LinkHash"]:
+ """Search a string for a checksum algorithm name and encoded output value."""
+ match = cls._hash_re.search(url)
+ if match is None:
+ return None
+ name, value = match.groups()
+ return cls(name=name, value=value)
+
+ def as_hashes(self) -> Hashes:
+ """Return a Hashes instance which checks only for the current hash."""
+ return Hashes({self.name: [self.value]})
+
+ def is_hash_allowed(self, hashes: Optional[Hashes]) -> bool:
+ """
+ Return True if the current hash is allowed by `hashes`.
+ """
+ if hashes is None:
+ return False
+ return hashes.is_hash_allowed(self.name, hex_digest=self.value)
+
+
+def _clean_url_path_part(part: str) -> str:
+ """
+ Clean a "part" of a URL path (i.e. after splitting on "@" characters).
+ """
+ # We unquote prior to quoting to make sure nothing is double quoted.
+ return urllib.parse.quote(urllib.parse.unquote(part))
+
+
+def _clean_file_url_path(part: str) -> str:
+ """
+ Clean the first part of a URL path that corresponds to a local
+ filesystem path (i.e. the first part after splitting on "@" characters).
+ """
+ # We unquote prior to quoting to make sure nothing is double quoted.
+ # Also, on Windows the path part might contain a drive letter which
+ # should not be quoted. On Linux where drive letters do not
+ # exist, the colon should be quoted. We rely on urllib.request
+ # to do the right thing here.
+ return urllib.request.pathname2url(urllib.request.url2pathname(part))
+
+
+# percent-encoded: /
+_reserved_chars_re = re.compile("(@|%2F)", re.IGNORECASE)
+
+
+def _clean_url_path(path: str, is_local_path: bool) -> str:
+ """
+ Clean the path portion of a URL.
+ """
+ if is_local_path:
+ clean_func = _clean_file_url_path
+ else:
+ clean_func = _clean_url_path_part
+
+ # Split on the reserved characters prior to cleaning so that
+ # revision strings in VCS URLs are properly preserved.
+ parts = _reserved_chars_re.split(path)
+
+ cleaned_parts = []
+ for to_clean, reserved in pairwise(itertools.chain(parts, [""])):
+ cleaned_parts.append(clean_func(to_clean))
+ # Normalize %xx escapes (e.g. %2f -> %2F)
+ cleaned_parts.append(reserved.upper())
+
+ return "".join(cleaned_parts)
+
+
+def _ensure_quoted_url(url: str) -> str:
+ """
+ Make sure a link is fully quoted.
+ For example, if ' ' occurs in the URL, it will be replaced with "%20",
+ and without double-quoting other characters.
+ """
+ # Split the URL into parts according to the general structure
+ # `scheme://netloc/path;parameters?query#fragment`.
+ result = urllib.parse.urlparse(url)
+ # If the netloc is empty, then the URL refers to a local filesystem path.
+ is_local_path = not result.netloc
+ path = _clean_url_path(result.path, is_local_path=is_local_path)
+ return urllib.parse.urlunparse(result._replace(path=path))
+
+
class Link(KeyBasedCompareMixin):
"""Represents a parsed link from a Package Index's simple URL"""
@@ -46,6 +161,8 @@ class Link(KeyBasedCompareMixin):
"comes_from",
"requires_python",
"yanked_reason",
+ "dist_info_metadata",
+ "link_hash",
"cache_link_parsing",
]
@@ -55,6 +172,8 @@ def __init__(
comes_from: Optional[Union[str, "IndexContent"]] = None,
requires_python: Optional[str] = None,
yanked_reason: Optional[str] = None,
+ dist_info_metadata: Optional[str] = None,
+ link_hash: Optional[LinkHash] = None,
cache_link_parsing: bool = True,
hashes: Optional[Mapping[str, str]] = None,
) -> None:
@@ -72,6 +191,14 @@ def __init__(
a simple repository HTML link. If the file has been yanked but
no reason was provided, this should be the empty string. See
PEP 592 for more information and the specification.
+ :param dist_info_metadata: the metadata attached to the file, or None if no such
+ metadata is provided. This is the value of the "data-dist-info-metadata"
+ attribute, if present, in a simple repository HTML link. This may be parsed
+ into its own `Link` by `self.metadata_link()`. See PEP 658 for more
+ information and the specification.
+ :param link_hash: a checksum for the content the link points to. If not
+ provided, this will be extracted from the link URL, if the URL has
+ any checksum.
:param cache_link_parsing: A flag that is used elsewhere to determine
whether resources retrieved from this link
should be cached. PyPI index urls should
@@ -94,11 +221,69 @@ def __init__(
self.comes_from = comes_from
self.requires_python = requires_python if requires_python else None
self.yanked_reason = yanked_reason
+ self.dist_info_metadata = dist_info_metadata
+ self.link_hash = link_hash or LinkHash.split_hash_name_and_value(self._url)
super().__init__(key=url, defining_class=Link)
self.cache_link_parsing = cache_link_parsing
+ @classmethod
+ def from_json(
+ cls,
+ file_data: Dict[str, Any],
+ page_url: str,
+ ) -> Optional["Link"]:
+ """
+ Convert an pypi json document from a simple repository page into a Link.
+ """
+ file_url = file_data.get("url")
+ if file_url is None:
+ return None
+
+ yanked_reason = file_data.get("yanked")
+ # The Link.yanked_reason expects an empty string instead of a boolean.
+ if yanked_reason and not isinstance(yanked_reason, str):
+ yanked_reason = ""
+ # The Link.yanked_reason expects None instead of False.
+ elif not yanked_reason:
+ yanked_reason = None
+
+ return cls(
+ _ensure_quoted_url(urllib.parse.urljoin(page_url, file_url)),
+ comes_from=page_url,
+ requires_python=file_data.get("requires-python"),
+ yanked_reason=yanked_reason,
+ hashes=file_data.get("hashes", {}),
+ )
+
+ @classmethod
+ def from_element(
+ cls,
+ anchor_attribs: Dict[str, Optional[str]],
+ page_url: str,
+ base_url: str,
+ ) -> Optional["Link"]:
+ """
+ Convert an anchor element's attributes in a simple repository page to a Link.
+ """
+ href = anchor_attribs.get("href")
+ if not href:
+ return None
+
+ url = _ensure_quoted_url(urllib.parse.urljoin(base_url, href))
+ pyrequire = anchor_attribs.get("data-requires-python")
+ yanked_reason = anchor_attribs.get("data-yanked")
+ dist_info_metadata = anchor_attribs.get("data-dist-info-metadata")
+
+ return cls(
+ url,
+ comes_from=page_url,
+ requires_python=pyrequire,
+ yanked_reason=yanked_reason,
+ dist_info_metadata=dist_info_metadata,
+ )
+
def __str__(self) -> str:
if self.requires_python:
rp = f" (requires-python:{self.requires_python})"
@@ -181,32 +366,36 @@ def subdirectory_fragment(self) -> Optional[str]:
return None
return match.group(1)
- _hash_re = re.compile(
- r"({choices})=([a-f0-9]+)".format(choices="|".join(_SUPPORTED_HASHES))
- )
+ def metadata_link(self) -> Optional["Link"]:
+ """Implementation of PEP 658 parsing."""
+ # Note that Link.from_element() parsing the "data-dist-info-metadata" attribute
+ # from an HTML anchor tag is typically how the Link.dist_info_metadata attribute
+ # gets set.
+ if self.dist_info_metadata is None:
+ return None
+ metadata_url = f"{self.url_without_fragment}.metadata"
+ link_hash: Optional[LinkHash] = None
+ # If data-dist-info-metadata="true" is set, then the metadata file exists,
+ # but there is no information about its checksum or anything else.
+ if self.dist_info_metadata != "true":
+ link_hash = LinkHash.split_hash_name_and_value(self.dist_info_metadata)
+ return Link(metadata_url, link_hash=link_hash)
+
+ def as_hashes(self) -> Optional[Hashes]:
+ if self.link_hash is not None:
+ return self.link_hash.as_hashes()
+ return None
@property
def hash(self) -> Optional[str]:
- for hashname in _SUPPORTED_HASHES:
- if hashname in self._hashes:
- return self._hashes[hashname]
-
- match = self._hash_re.search(self._url)
- if match:
- return match.group(2)
-
+ if self.link_hash is not None:
+ return self.link_hash.value
return None
@property
def hash_name(self) -> Optional[str]:
- for hashname in _SUPPORTED_HASHES:
- if hashname in self._hashes:
- return hashname
-
- match = self._hash_re.search(self._url)
- if match:
- return match.group(1)
-
+ if self.link_hash is not None:
+ return self.link_hash.name
return None
@property
@@ -236,19 +425,15 @@ def is_yanked(self) -> bool:
@property
def has_hash(self) -> bool:
- return self.hash_name is not None
+ return self.link_hash is not None
def is_hash_allowed(self, hashes: Optional[Hashes]) -> bool:
"""
- Return True if the link has a hash and it is allowed.
+ Return True if the link has a hash and it is allowed by `hashes`.
"""
- if hashes is None or not self.has_hash:
+ if self.link_hash is None:
return False
- # Assert non-None so mypy knows self.hash_name and self.hash are str.
- assert self.hash_name is not None
- assert self.hash is not None
-
- return hashes.is_hash_allowed(self.hash_name, hex_digest=self.hash)
+ return self.link_hash.is_hash_allowed(hashes)
class _CleanResult(NamedTuple):
diff --git a/src/pip/_internal/operations/prepare.py b/src/pip/_internal/operations/prepare.py
index 80723fffe47..7722370bcea 100644
--- a/src/pip/_internal/operations/prepare.py
+++ b/src/pip/_internal/operations/prepare.py
@@ -24,7 +24,7 @@
VcsHashUnsupported,
)
from pip._internal.index.package_finder import PackageFinder
-from pip._internal.metadata import BaseDistribution
+from pip._internal.metadata import BaseDistribution, get_metadata_distribution
from pip._internal.models.direct_url import ArchiveInfo
from pip._internal.models.link import Link
from pip._internal.models.wheel import Wheel
@@ -346,16 +346,49 @@ def _get_linked_req_hashes(self, req: InstallRequirement) -> Hashes:
# showing the user what the hash should be.
return req.hashes(trust_internet=False) or MissingHashes()
- def _fetch_metadata_using_lazy_wheel(
+ def _fetch_metadata_only(
self,
- link: Link,
+ req: InstallRequirement,
) -> Optional[BaseDistribution]:
- """Fetch metadata using lazy wheel, if possible."""
+ # --use-feature=fast-deps must be provided.
if not self.use_lazy_wheel:
return None
if self.require_hashes:
- logger.debug("Lazy wheel is not used as hash checking is required")
+ logger.debug(
+ "Metadata-only fetching is not used as hash checking is required",
+ )
+ return None
+ # Try PEP 658 metadata first, then fall back to lazy wheel if unavailable.
+ pep_658_dist = self._fetch_metadata_using_pep_658(req)
+ if pep_658_dist is not None:
+ return pep_658_dist
+ return self._fetch_metadata_using_lazy_wheel(req.link)
+
+ def _fetch_metadata_using_pep_658(
+ self,
+ req: InstallRequirement,
+ ) -> Optional[BaseDistribution]:
+ """Fetch metadata from the data-dist-info-metadata attribute, if possible."""
+ metadata_link = req.link.metadata_link()
+ if metadata_link is None:
return None
+ metadata_file = get_http_url(
+ metadata_link,
+ self._download,
+ hashes=metadata_link.as_hashes(),
+ )
+ assert req.req is not None
+ return get_metadata_distribution(
+ metadata_file.path,
+ req.link.filename,
+ req.req.name,
+ )
+
+ def _fetch_metadata_using_lazy_wheel(
+ self,
+ link: Link,
+ ) -> Optional[BaseDistribution]:
+ """Fetch metadata using lazy wheel, if possible."""
if link.is_file or not link.is_wheel:
logger.debug(
"Lazy wheel is not used as %r does not points to a remote wheel",
@@ -414,13 +447,12 @@ def prepare_linked_requirement(
) -> BaseDistribution:
"""Prepare a requirement to be obtained from req.link."""
assert req.link
- link = req.link
self._log_preparing_link(req)
with indent_log():
# Check if the relevant file is already available
# in the download directory
file_path = None
- if self.download_dir is not None and link.is_wheel:
+ if self.download_dir is not None and req.link.is_wheel:
hashes = self._get_linked_req_hashes(req)
file_path = _check_download_dir(req.link, self.download_dir, hashes)
@@ -429,10 +461,10 @@ def prepare_linked_requirement(
self._downloaded[req.link.url] = file_path
else:
# The file is not available, attempt to fetch only metadata
- wheel_dist = self._fetch_metadata_using_lazy_wheel(link)
- if wheel_dist is not None:
+ metadata_dist = self._fetch_metadata_only(req)
+ if metadata_dist is not None:
req.needs_more_preparation = True
- return wheel_dist
+ return metadata_dist
# None of the optimizations worked, fully prepare the requirement
return self._prepare_linked_requirement(req, parallel_builds)
diff --git a/tests/functional/test_download.py b/tests/functional/test_download.py
index 89318b74553..629c15a040f 100644
--- a/tests/functional/test_download.py
+++ b/tests/functional/test_download.py
@@ -1,17 +1,24 @@
import os
+import re
import shutil
import textwrap
+import uuid
+from dataclasses import dataclass
+from enum import Enum
from hashlib import sha256
from pathlib import Path
-from typing import List
+from textwrap import dedent
+from typing import Callable, Dict, List, Tuple
import pytest
from pip._internal.cli.status_codes import ERROR
+from pip._internal.utils.urls import path_to_url
from tests.conftest import MockServer, ScriptFactory
from tests.lib import (
PipTestEnvironment,
TestData,
+ TestPipResult,
create_basic_sdist_for_package,
create_really_basic_wheel,
)
@@ -1230,3 +1237,304 @@ def test_download_use_pep517_propagation(
downloads = os.listdir(download_dir)
assert len(downloads) == 2
+
+
+class MetadataKind(Enum):
+ """All the types of values we might be provided for the data-dist-info-metadata
+ attribute from PEP 658."""
+
+ # Valid: will read metadata from the dist instead.
+ No = "none"
+ # Valid: will read the .metadata file, but won't check its hash.
+ Unhashed = "unhashed"
+ # Valid: will read the .metadata file and check its hash matches.
+ Sha256 = "sha256"
+ # Invalid: will error out after checking the hash.
+ WrongHash = "wrong-hash"
+ # Invalid: will error out after failing to fetch the .metadata file.
+ NoFile = "no-file"
+
+
+@dataclass(frozen=True)
+class Package:
+ """Mock package structure used to generate a PyPI repository.
+
+ Package name and version should correspond to sdists (.tar.gz files) in our test
+ data."""
+
+ name: str
+ version: str
+ metadata: MetadataKind
+ # This will override any dependencies specified in the actual dist's METADATA.
+ requires_dist: Tuple[str, ...] = ()
+
+ def filename(self) -> str:
+ """We will only be pointed at .tar.gz packages, so we can guess the name."""
+ return f"{self.name}-{self.version}.tar.gz"
+
+ def metadata_filename(self) -> str:
+ """This is specified by PEP 658."""
+ return f"{self.filename()}.metadata"
+
+ def generate_additional_tag(self) -> str:
+ """This gets injected into the tag in the generated PyPI index page for this
+ package."""
+ if self.metadata == MetadataKind.No:
+ return ""
+ if self.metadata in [MetadataKind.Unhashed, MetadataKind.NoFile]:
+ return 'data-dist-info-metadata="true"'
+ if self.metadata == MetadataKind.WrongHash:
+ return 'data-dist-info-metadata="sha256=WRONG-HASH"'
+ assert self.metadata == MetadataKind.Sha256
+ checksum = sha256(self.generate_metadata()).hexdigest()
+ return f'data-dist-info-metadata="sha256={checksum}"'
+
+ def generate_metadata(self) -> bytes:
+ """This is written to `self.metadata_filename()` and will override the actual
+ dist's METADATA, unless `self.metadata == MetadataKind.NoFile`."""
+ if self.requires_dist:
+ joined = " and ".join(self.requires_dist)
+ requires_str = f"Requires-Dist: {joined}"
+ else:
+ requires_str = ""
+ return dedent(
+ f"""\
+ Metadata-Version: 2.1
+ Name: {self.name}
+ Version: {self.version}
+ {requires_str}
+ """
+ ).encode("utf-8")
+
+
+@pytest.fixture(scope="function")
+def index_html_content(tmpdir: Path) -> Callable[..., Path]:
+ """Generate a PyPI package index.html within a temporary local directory."""
+ html_dir = tmpdir / "index_html_content"
+ html_dir.mkdir()
+
+ def generate_index_html_subdir(index_html: str) -> Path:
+ """Create a new subdirectory after a UUID and write an index.html."""
+ new_subdir = html_dir / uuid.uuid4().hex
+ new_subdir.mkdir()
+
+ with open(new_subdir / "index.html", "w") as f:
+ f.write(index_html)
+
+ return new_subdir
+
+ return generate_index_html_subdir
+
+
+@pytest.fixture(scope="function")
+def index_for_packages(
+ shared_data: TestData,
+ index_html_content: Callable[..., Path],
+) -> Callable[..., Path]:
+ """Generate a PyPI package index within a local directory pointing to blank data."""
+
+ def generate_index_for_packages(packages: Dict[str, List[Package]]) -> Path:
+ """
+ Produce a PyPI directory structure pointing to the specified packages.
+ """
+ # (1) Generate the content for a PyPI index.html.
+ pkg_links = "\n".join(
+ f' {pkg}' for pkg in packages.keys()
+ )
+ index_html = f"""\
+
+
+
+
+ Simple index
+
+
+{pkg_links}
+
+"""
+ # (2) Generate the index.html in a new subdirectory of the temp directory.
+ index_html_subdir = index_html_content(index_html)
+
+ # (3) Generate subdirectories for individual packages, each with their own
+ # index.html.
+ for pkg, links in packages.items():
+ pkg_subdir = index_html_subdir / pkg
+ pkg_subdir.mkdir()
+
+ download_links: List[str] = []
+ for package_link in links:
+ # (3.1) Generate the tag which pip can crawl pointing to this
+ # specific package version.
+ download_links.append(
+ f' {package_link.filename()}
' # noqa: E501
+ )
+ # (3.2) Copy over the corresponding file in `shared_data.packages`.
+ shutil.copy(
+ shared_data.packages / package_link.filename(),
+ pkg_subdir / package_link.filename(),
+ )
+ # (3.3) Write a metadata file, if applicable.
+ if package_link.metadata != MetadataKind.NoFile:
+ with open(pkg_subdir / package_link.metadata_filename(), "wb") as f:
+ f.write(package_link.generate_metadata())
+
+ # (3.4) After collating all the download links and copying over the files,
+ # write an index.html with the generated download links for each
+ # copied file for this specific package name.
+ download_links_str = "\n".join(download_links)
+ pkg_index_content = f"""\
+
+
+
+
+ Links for {pkg}
+
+
+ Links for {pkg}
+{download_links_str}
+
+"""
+ with open(pkg_subdir / "index.html", "w") as f:
+ f.write(pkg_index_content)
+
+ return index_html_subdir
+
+ return generate_index_for_packages
+
+
+@pytest.fixture(scope="function")
+def download_generated_index(
+ script: PipTestEnvironment,
+ index_for_packages: Callable[..., Path],
+ tmpdir: Path,
+) -> Callable[..., Tuple[TestPipResult, Path]]:
+ """Execute `pip download` against a generated PyPI index."""
+ download_dir = tmpdir / "download_dir"
+
+ def run_for_generated_index(
+ packages: Dict[str, List[Package]],
+ args: List[str],
+ fast_deps: bool = False,
+ allow_error: bool = False,
+ ) -> Tuple[TestPipResult, Path]:
+ """
+ Produce a PyPI directory structure pointing to the specified packages, then
+ execute `pip download -i ...` pointing to our generated index.
+ """
+ index_dir = index_for_packages(packages)
+ pip_args = []
+ if fast_deps:
+ pip_args.append("--use-feature=fast-deps")
+ pip_args.extend(
+ [
+ "download",
+ "-d",
+ str(download_dir),
+ "-i",
+ path_to_url(str(index_dir)),
+ ]
+ )
+ pip_args.extend(args)
+ result = script.pip(
+ *pip_args,
+ # We need allow_stderr_warning=True if fast_deps=True, since that will print
+ # a warning to stderr.
+ allow_stderr_warning=fast_deps,
+ allow_error=allow_error,
+ )
+ return (result, download_dir)
+
+ return run_for_generated_index
+
+
+# The package database we generate for testing PEP 658 support.
+_simple_packages: Dict[str, List[Package]] = {
+ "simple": [
+ Package("simple", "1.0", MetadataKind.Sha256),
+ Package("simple", "2.0", MetadataKind.No),
+ # This will raise a hashing error.
+ Package("simple", "3.0", MetadataKind.WrongHash),
+ ],
+ "simple2": [
+ # Override the dependencies here in order to force pip to download
+ # simple-1.0.tar.gz as well.
+ Package("simple2", "1.0", MetadataKind.Unhashed, ("simple==1.0",)),
+ # This will raise an error when pip attempts to fetch the metadata file.
+ Package("simple2", "2.0", MetadataKind.NoFile),
+ ],
+}
+
+
+@pytest.mark.parametrize(
+ "requirement_to_download, expected_outputs",
+ [
+ ("simple2==1.0", ["simple-1.0.tar.gz", "simple2-1.0.tar.gz"]),
+ ("simple==2.0", ["simple-2.0.tar.gz"]),
+ ],
+)
+def test_download_metadata(
+ download_generated_index: Callable[..., Tuple[TestPipResult, Path]],
+ requirement_to_download: str,
+ expected_outputs: List[str],
+) -> None:
+ """Verify that when using --use-feature=fast-deps, if a data-dist-info-metadata
+ attribute is present, then it is used instead of the actual dist's METADATA."""
+ _, download_dir = download_generated_index(
+ _simple_packages,
+ [requirement_to_download],
+ fast_deps=True,
+ )
+ assert sorted(os.listdir(download_dir)) == expected_outputs
+
+
+@pytest.mark.parametrize(
+ "requirement_to_download, real_hash",
+ [
+ (
+ "simple==3.0",
+ "95e0f200b6302989bcf2cead9465cf229168295ea330ca30d1ffeab5c0fed996",
+ )
+ ],
+)
+def test_incorrect_metadata_hash(
+ download_generated_index: Callable[..., Tuple[TestPipResult, Path]],
+ requirement_to_download: str,
+ real_hash: str,
+) -> None:
+ """Verify that if a hash for data-dist-info-metadata is provided, it must match the
+ actual hash of the metadata file."""
+ result, _ = download_generated_index(
+ _simple_packages,
+ [requirement_to_download],
+ fast_deps=True,
+ allow_error=True,
+ )
+ assert result.returncode != 0
+ expected_msg = f"""\
+ Expected sha256 WRONG-HASH
+ Got {real_hash}"""
+ assert expected_msg in result.stderr
+
+
+@pytest.mark.parametrize(
+ "requirement_to_download",
+ ["simple2==2.0"],
+)
+def test_metadata_not_found(
+ download_generated_index: Callable[..., Tuple[TestPipResult, Path]],
+ requirement_to_download: str,
+) -> None:
+ """Verify that if a data-dist-info-metadata attribute is provided, that pip will
+ fetch the .metadata file at the location specified by PEP 658, and error
+ if unavailable."""
+ result, _ = download_generated_index(
+ _simple_packages,
+ [requirement_to_download],
+ fast_deps=True,
+ allow_error=True,
+ )
+ assert result.returncode != 0
+ pattern = re.compile(
+ r"ERROR: 404 Client Error: FileNotFoundError for url:.*simple2-2\.0\.tar\.gz\.metadata" # noqa: E501
+ )
+ assert pattern.search(result.stderr), (pattern, result.stderr)
diff --git a/tests/lib/server.py b/tests/lib/server.py
index 4b5add345d3..4cc18452cb5 100644
--- a/tests/lib/server.py
+++ b/tests/lib/server.py
@@ -150,14 +150,6 @@ def html5_page(text: str) -> str:
)
-def index_page(spec: Dict[str, str]) -> "WSGIApplication":
- def link(name: str, value: str) -> str:
- return '{}'.format(value, name)
-
- links = "".join(link(*kv) for kv in spec.items())
- return text_html_response(html5_page(links))
-
-
def package_page(spec: Dict[str, str]) -> "WSGIApplication":
def link(name: str, value: str) -> str:
return '{}'.format(value, name)
diff --git a/tests/unit/test_collector.py b/tests/unit/test_collector.py
index 3afc5210dc7..bbc78a70317 100644
--- a/tests/unit/test_collector.py
+++ b/tests/unit/test_collector.py
@@ -11,13 +11,12 @@
import pytest
from pip._vendor import requests
+from pip._vendor.packaging.requirements import Requirement
from pip._internal.exceptions import NetworkConnectionError
from pip._internal.index.collector import (
IndexContent,
LinkCollector,
- _clean_link,
- _clean_url_path,
_get_index_content,
_get_simple_response,
_make_index_content,
@@ -28,7 +27,12 @@
from pip._internal.index.sources import _FlatDirectorySource, _IndexDirectorySource
from pip._internal.models.candidate import InstallationCandidate
from pip._internal.models.index import PyPI
-from pip._internal.models.link import Link
+from pip._internal.models.link import (
+ Link,
+ LinkHash,
+ _clean_url_path,
+ _ensure_quoted_url,
+)
from pip._internal.network.session import PipSession
from tests.lib import TestData, make_test_link_collector
@@ -402,13 +406,13 @@ def test_clean_url_path_with_local_path(path: str, expected: str) -> None:
),
],
)
-def test_clean_link(url: str, clean_url: str) -> None:
- assert _clean_link(url) == clean_url
+def test_ensure_quoted_url(url: str, clean_url: str) -> None:
+ assert _ensure_quoted_url(url) == clean_url
def _test_parse_links_data_attribute(
anchor_html: str, attr: str, expected: Optional[str]
-) -> None:
+) -> Link:
html = (
""
''
@@ -427,6 +431,7 @@ def _test_parse_links_data_attribute(
(link,) = links
actual = getattr(link, attr)
assert actual == expected
+ return link
@pytest.mark.parametrize(
@@ -534,6 +539,48 @@ def test_parse_links__yanked_reason(anchor_html: str, expected: Optional[str]) -
_test_parse_links_data_attribute(anchor_html, "yanked_reason", expected)
+# Requirement objects do not == each other unless they point to the same instance!
+_pkg1_requirement = Requirement("pkg1==1.0")
+
+
+@pytest.mark.parametrize(
+ "anchor_html, expected, link_hash",
+ [
+ # Test not present.
+ (
+ '',
+ None,
+ None,
+ ),
+ # Test with value "true".
+ (
+ '',
+ "true",
+ None,
+ ),
+ # Test with a provided hash value.
+ (
+ '', # noqa: E501
+ "sha256=aa113592bbe",
+ None,
+ ),
+ # Test with a provided hash value for both the requirement as well as metadata.
+ (
+ '', # noqa: E501
+ "sha256=aa113592bbe",
+ LinkHash("sha512", "abc132409cb"),
+ ),
+ ],
+)
+def test_parse_links__dist_info_metadata(
+ anchor_html: str,
+ expected: Optional[str],
+ link_hash: Optional[LinkHash],
+) -> None:
+ link = _test_parse_links_data_attribute(anchor_html, "dist_info_metadata", expected)
+ assert link.link_hash == link_hash
+
+
def test_parse_links_caches_same_page_by_url() -> None:
html = (
""
@@ -963,3 +1010,23 @@ def expand_path(path: str) -> str:
expected_temp2_dir = os.path.normcase(temp2_dir)
assert search_scope.find_links == ["~/temp1", expected_temp2_dir]
assert search_scope.index_urls == ["default_url"]
+
+
+@pytest.mark.parametrize(
+ "url, result",
+ [
+ (
+ "https://pypi.org/pip-18.0.tar.gz#sha256=aa113592bbe",
+ LinkHash("sha256", "aa113592bbe"),
+ ),
+ (
+ "https://pypi.org/pip-18.0.tar.gz#md5=aa113592bbe",
+ LinkHash("md5", "aa113592bbe"),
+ ),
+ ("https://pypi.org/pip-18.0.tar.gz", None),
+ # We don't recognize the "sha500" algorithm, so we discard it.
+ ("https://pypi.org/pip-18.0.tar.gz#sha500=aa113592bbe", None),
+ ],
+)
+def test_link_hash_parsing(url: str, result: Optional[LinkHash]) -> None:
+ assert LinkHash.split_hash_name_and_value(url) == result
From 28b6134dc677a35840950e2ae01127c9de08bd02 Mon Sep 17 00:00:00 2001
From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com>
Date: Wed, 11 May 2022 19:31:12 -0400
Subject: [PATCH 02/18] add NEWS entry
---
news/11111.feature.rst | 1 +
1 file changed, 1 insertion(+)
create mode 100644 news/11111.feature.rst
diff --git a/news/11111.feature.rst b/news/11111.feature.rst
new file mode 100644
index 00000000000..41fa2398f4e
--- /dev/null
+++ b/news/11111.feature.rst
@@ -0,0 +1 @@
+Use the ``data-dist-info-metadata`` attribute from :pep:`658` to resolve distribution metadata when ``--use-feature=fast-deps`` is enabled.
From f675e0995137c8c6295fc3a5408a3fa15d0447a8 Mon Sep 17 00:00:00 2001
From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com>
Date: Wed, 11 May 2022 20:42:36 -0400
Subject: [PATCH 03/18] explain why we don't validate hexdigests now
---
src/pip/_internal/models/link.py | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/src/pip/_internal/models/link.py b/src/pip/_internal/models/link.py
index 8fc42966816..970bcf49b44 100644
--- a/src/pip/_internal/models/link.py
+++ b/src/pip/_internal/models/link.py
@@ -58,6 +58,11 @@ class LinkHash:
value: str
_hash_re = re.compile(
+ # NB: we do not validate that the second group (.*) is a valid hex
+ # digest. Instead, we simply keep that string in this class, and then check it
+ # against Hashes when hash-checking is needed. This is easier to debug than
+ # proactively discarding an invalid hex digest, as we handle incorrect hashes
+ # and malformed hashes in the same place.
r"({choices})=(.*)".format(
choices="|".join(re.escape(hash_name) for hash_name in _SUPPORTED_HASHES)
),
From 6675ba3294ad771fcce83f40ddf9e73a7227616b Mon Sep 17 00:00:00 2001
From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com>
Date: Fri, 13 May 2022 01:13:41 -0400
Subject: [PATCH 04/18] fix importlib.metadata case
---
src/pip/_internal/operations/prepare.py | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/src/pip/_internal/operations/prepare.py b/src/pip/_internal/operations/prepare.py
index 7722370bcea..ed1c4664117 100644
--- a/src/pip/_internal/operations/prepare.py
+++ b/src/pip/_internal/operations/prepare.py
@@ -377,9 +377,15 @@ def _fetch_metadata_using_pep_658(
self._download,
hashes=metadata_link.as_hashes(),
)
+ # The file will be downloaded under the same name as it's listed in the index,
+ # which will end with .metadata. To make importlib.metadata.PathDistribution
+ # work, we need to keep it in the same directory, but rename it to METADATA.
+ containing_dir = os.path.dirname(metadata_file.path)
+ new_metadata_path = os.path.join(containing_dir, "METADATA")
+ os.rename(metadata_file.path, new_metadata_path)
assert req.req is not None
return get_metadata_distribution(
- metadata_file.path,
+ new_metadata_path,
req.link.filename,
req.req.name,
)
From 80e044a4d4c6d432c048431c753e8d3b25eb6a63 Mon Sep 17 00:00:00 2001
From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com>
Date: Sun, 15 May 2022 12:28:15 -0400
Subject: [PATCH 05/18] make it work without --use-feature=fast-deps!
---
news/11111.feature.rst | 2 +-
src/pip/_internal/operations/prepare.py | 22 +++++++++------
tests/functional/test_download.py | 37 ++++++++-----------------
3 files changed, 25 insertions(+), 36 deletions(-)
diff --git a/news/11111.feature.rst b/news/11111.feature.rst
index 41fa2398f4e..39cb4b35c12 100644
--- a/news/11111.feature.rst
+++ b/news/11111.feature.rst
@@ -1 +1 @@
-Use the ``data-dist-info-metadata`` attribute from :pep:`658` to resolve distribution metadata when ``--use-feature=fast-deps`` is enabled.
+Use the ``data-dist-info-metadata`` attribute from :pep:`658` to resolve distribution metadata without downloading the dist yet.
diff --git a/src/pip/_internal/operations/prepare.py b/src/pip/_internal/operations/prepare.py
index ed1c4664117..99aa6ff4596 100644
--- a/src/pip/_internal/operations/prepare.py
+++ b/src/pip/_internal/operations/prepare.py
@@ -350,19 +350,15 @@ def _fetch_metadata_only(
self,
req: InstallRequirement,
) -> Optional[BaseDistribution]:
- # --use-feature=fast-deps must be provided.
- if not self.use_lazy_wheel:
- return None
if self.require_hashes:
logger.debug(
"Metadata-only fetching is not used as hash checking is required",
)
return None
# Try PEP 658 metadata first, then fall back to lazy wheel if unavailable.
- pep_658_dist = self._fetch_metadata_using_pep_658(req)
- if pep_658_dist is not None:
- return pep_658_dist
- return self._fetch_metadata_using_lazy_wheel(req.link)
+ return self._fetch_metadata_using_pep_658(
+ req
+ ) or self._fetch_metadata_using_lazy_wheel(req.link)
def _fetch_metadata_using_pep_658(
self,
@@ -372,6 +368,12 @@ def _fetch_metadata_using_pep_658(
metadata_link = req.link.metadata_link()
if metadata_link is None:
return None
+ assert req.req is not None
+ logger.info(
+ "Obtaining dependency information for %s from %s",
+ req.req,
+ metadata_link,
+ )
metadata_file = get_http_url(
metadata_link,
self._download,
@@ -383,7 +385,6 @@ def _fetch_metadata_using_pep_658(
containing_dir = os.path.dirname(metadata_file.path)
new_metadata_path = os.path.join(containing_dir, "METADATA")
os.rename(metadata_file.path, new_metadata_path)
- assert req.req is not None
return get_metadata_distribution(
new_metadata_path,
req.link.filename,
@@ -395,9 +396,12 @@ def _fetch_metadata_using_lazy_wheel(
link: Link,
) -> Optional[BaseDistribution]:
"""Fetch metadata using lazy wheel, if possible."""
+ # --use-feature=fast-deps must be provided.
+ if not self.use_lazy_wheel:
+ return None
if link.is_file or not link.is_wheel:
logger.debug(
- "Lazy wheel is not used as %r does not points to a remote wheel",
+ "Lazy wheel is not used as %r does not point to a remote wheel",
link,
)
return None
diff --git a/tests/functional/test_download.py b/tests/functional/test_download.py
index 629c15a040f..6785fc73858 100644
--- a/tests/functional/test_download.py
+++ b/tests/functional/test_download.py
@@ -1414,7 +1414,6 @@ def download_generated_index(
def run_for_generated_index(
packages: Dict[str, List[Package]],
args: List[str],
- fast_deps: bool = False,
allow_error: bool = False,
) -> Tuple[TestPipResult, Path]:
"""
@@ -1422,26 +1421,15 @@ def run_for_generated_index(
execute `pip download -i ...` pointing to our generated index.
"""
index_dir = index_for_packages(packages)
- pip_args = []
- if fast_deps:
- pip_args.append("--use-feature=fast-deps")
- pip_args.extend(
- [
- "download",
- "-d",
- str(download_dir),
- "-i",
- path_to_url(str(index_dir)),
- ]
- )
- pip_args.extend(args)
- result = script.pip(
- *pip_args,
- # We need allow_stderr_warning=True if fast_deps=True, since that will print
- # a warning to stderr.
- allow_stderr_warning=fast_deps,
- allow_error=allow_error,
- )
+ pip_args = [
+ "download",
+ "-d",
+ str(download_dir),
+ "-i",
+ path_to_url(str(index_dir)),
+ *args,
+ ]
+ result = script.pip(*pip_args, allow_error=allow_error)
return (result, download_dir)
return run_for_generated_index
@@ -1477,12 +1465,11 @@ def test_download_metadata(
requirement_to_download: str,
expected_outputs: List[str],
) -> None:
- """Verify that when using --use-feature=fast-deps, if a data-dist-info-metadata
- attribute is present, then it is used instead of the actual dist's METADATA."""
+ """Verify that if a data-dist-info-metadata attribute is present, then it is used
+ instead of the actual dist's METADATA."""
_, download_dir = download_generated_index(
_simple_packages,
[requirement_to_download],
- fast_deps=True,
)
assert sorted(os.listdir(download_dir)) == expected_outputs
@@ -1506,7 +1493,6 @@ def test_incorrect_metadata_hash(
result, _ = download_generated_index(
_simple_packages,
[requirement_to_download],
- fast_deps=True,
allow_error=True,
)
assert result.returncode != 0
@@ -1530,7 +1516,6 @@ def test_metadata_not_found(
result, _ = download_generated_index(
_simple_packages,
[requirement_to_download],
- fast_deps=True,
allow_error=True,
)
assert result.returncode != 0
From 266c5cddc23c00235ed8e1860ea17678264cf9b3 Mon Sep 17 00:00:00 2001
From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com>
Date: Mon, 5 Sep 2022 09:36:47 -0400
Subject: [PATCH 06/18] respond to review comments
---
src/pip/_internal/operations/prepare.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/pip/_internal/operations/prepare.py b/src/pip/_internal/operations/prepare.py
index 99aa6ff4596..c71a4f5b325 100644
--- a/src/pip/_internal/operations/prepare.py
+++ b/src/pip/_internal/operations/prepare.py
@@ -356,11 +356,11 @@ def _fetch_metadata_only(
)
return None
# Try PEP 658 metadata first, then fall back to lazy wheel if unavailable.
- return self._fetch_metadata_using_pep_658(
+ return self._fetch_metadata_using_link_data_attr(
req
) or self._fetch_metadata_using_lazy_wheel(req.link)
- def _fetch_metadata_using_pep_658(
+ def _fetch_metadata_using_link_data_attr(
self,
req: InstallRequirement,
) -> Optional[BaseDistribution]:
From 8da1bdc91e97d9363956777c75427e9e07f9d421 Mon Sep 17 00:00:00 2001
From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com>
Date: Mon, 5 Sep 2022 10:04:37 -0400
Subject: [PATCH 07/18] ensure PEP 691 json parsing also supports PEP 658
dist-info-metadata
---
src/pip/_internal/models/link.py | 12 +++++++++---
1 file changed, 9 insertions(+), 3 deletions(-)
diff --git a/src/pip/_internal/models/link.py b/src/pip/_internal/models/link.py
index 970bcf49b44..e0f436fa345 100644
--- a/src/pip/_internal/models/link.py
+++ b/src/pip/_internal/models/link.py
@@ -246,7 +246,12 @@ def from_json(
if file_url is None:
return None
+ url = _ensure_quoted_url(urllib.parse.urljoin(page_url, file_url))
+ pyrequire = file_data.get("requires-python")
yanked_reason = file_data.get("yanked")
+ dist_info_metadata = file_data.get("dist-info-metadata")
+ hashes = file_data.get("hashes", {})
+
# The Link.yanked_reason expects an empty string instead of a boolean.
if yanked_reason and not isinstance(yanked_reason, str):
yanked_reason = ""
@@ -255,11 +260,12 @@ def from_json(
yanked_reason = None
return cls(
- _ensure_quoted_url(urllib.parse.urljoin(page_url, file_url)),
+ url,
comes_from=page_url,
- requires_python=file_data.get("requires-python"),
+ requires_python=pyrequire,
yanked_reason=yanked_reason,
- hashes=file_data.get("hashes", {}),
+ hashes=hashes,
+ dist_info_metadata=dist_info_metadata,
)
@classmethod
From 23e54924f760a2dfaa1b377b3d8f08dd16372b0f Mon Sep 17 00:00:00 2001
From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com>
Date: Mon, 5 Sep 2022 10:32:50 -0400
Subject: [PATCH 08/18] add test case for dist-info-metadata key from PEP 691!!
---
tests/unit/test_collector.py | 25 +++++++++++++++++++++++++
1 file changed, 25 insertions(+)
diff --git a/tests/unit/test_collector.py b/tests/unit/test_collector.py
index bbc78a70317..81d8765c837 100644
--- a/tests/unit/test_collector.py
+++ b/tests/unit/test_collector.py
@@ -479,6 +479,14 @@ def test_parse_links_json() -> None:
"requires-python": ">=3.7",
"dist-info-metadata": False,
},
+ # Same as above, but parsing dist-info-metadata.
+ {
+ "filename": "holygrail-1.0-py3-none-any.whl",
+ "url": "/files/holygrail-1.0-py3-none-any.whl",
+ "hashes": {"sha256": "sha256 hash", "blake2b": "blake2b hash"},
+ "requires-python": ">=3.7",
+ "dist-info-metadata": "sha512=aabdd41",
+ },
],
}
).encode("utf8")
@@ -507,8 +515,25 @@ def test_parse_links_json() -> None:
yanked_reason=None,
hashes={"sha256": "sha256 hash", "blake2b": "blake2b hash"},
),
+ Link(
+ "https://example.com/files/holygrail-1.0-py3-none-any.whl",
+ comes_from=page.url,
+ requires_python=">=3.7",
+ yanked_reason=None,
+ hashes={"sha256": "sha256 hash", "blake2b": "blake2b hash"},
+ dist_info_metadata="sha512=aabdd41",
+ ),
]
+ # Ensure the metadata info can be parsed into the correct link.
+ metadata_link = links[2].metadata_link()
+ assert metadata_link is not None
+ assert (
+ metadata_link.url
+ == "https://example.com/files/holygrail-1.0-py3-none-any.whl.metadata"
+ )
+ assert metadata_link.link_hash == LinkHash("sha512", "aabdd41")
+
@pytest.mark.parametrize(
"anchor_html, expected",
From a685a984d2faae9a3ffbd4a68db45e9cee84123e Mon Sep 17 00:00:00 2001
From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com>
Date: Mon, 5 Sep 2022 11:53:56 -0400
Subject: [PATCH 09/18] remove unused code after html5lib was removed!!
---
src/pip/_internal/index/collector.py | 23 +----------------------
src/pip/_internal/models/link.py | 3 ---
2 files changed, 1 insertion(+), 25 deletions(-)
diff --git a/src/pip/_internal/index/collector.py b/src/pip/_internal/index/collector.py
index 9ae213197dd..0120610c758 100644
--- a/src/pip/_internal/index/collector.py
+++ b/src/pip/_internal/index/collector.py
@@ -32,7 +32,7 @@
from pip._vendor.requests.exceptions import RetryError, SSLError
from pip._internal.exceptions import NetworkConnectionError
-from pip._internal.models.link import HTMLElement, Link
+from pip._internal.models.link import Link
from pip._internal.models.search_scope import SearchScope
from pip._internal.network.session import PipSession
from pip._internal.network.utils import raise_for_status
@@ -188,27 +188,6 @@ def _get_encoding_from_headers(headers: ResponseHeaders) -> Optional[str]:
return None
-def _determine_base_url(document: HTMLElement, page_url: str) -> str:
- """Determine the HTML document's base URL.
-
- This looks for a ```` tag in the HTML document. If present, its href
- attribute denotes the base URL of anchor tags in the document. If there is
- no such tag (or if it does not have a valid href attribute), the HTML
- file's URL is used as the base URL.
-
- :param document: An HTML document representation. The current
- implementation expects the result of ``html5lib.parse()``.
- :param page_url: The URL of the HTML document.
-
- TODO: Remove when `html5lib` is dropped.
- """
- for base in document.findall(".//base"):
- href = base.get("href")
- if href is not None:
- return href
- return page_url
-
-
class CacheablePageContent:
def __init__(self, page: "IndexContent") -> None:
assert page.cache_link_parsing
diff --git a/src/pip/_internal/models/link.py b/src/pip/_internal/models/link.py
index e0f436fa345..c792d128bcf 100644
--- a/src/pip/_internal/models/link.py
+++ b/src/pip/_internal/models/link.py
@@ -5,7 +5,6 @@
import posixpath
import re
import urllib.parse
-import xml.etree.ElementTree
from dataclasses import dataclass
from typing import (
TYPE_CHECKING,
@@ -36,8 +35,6 @@
logger = logging.getLogger(__name__)
-HTMLElement = xml.etree.ElementTree.Element
-
# Order matters, earlier hashes have a precedence over later hashes for what
# we will pick to use.
_SUPPORTED_HASHES = ("sha512", "sha384", "sha256", "sha224", "sha1", "md5")
From 96bd60e5918a2d86b55387a9267a78b6c98b0b4e Mon Sep 17 00:00:00 2001
From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com>
Date: Mon, 5 Sep 2022 12:28:50 -0400
Subject: [PATCH 10/18] rename WheelMetadata to InMemoryMetadata as per review
comments
---
src/pip/_internal/metadata/pkg_resources.py | 6 +++---
tests/unit/metadata/test_metadata_pkg_resources.py | 6 +++---
2 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/src/pip/_internal/metadata/pkg_resources.py b/src/pip/_internal/metadata/pkg_resources.py
index 29462223eb0..46e8b245d40 100644
--- a/src/pip/_internal/metadata/pkg_resources.py
+++ b/src/pip/_internal/metadata/pkg_resources.py
@@ -33,7 +33,7 @@ class EntryPoint(NamedTuple):
group: str
-class WheelMetadata:
+class InMemoryMetadata:
"""IMetadataProvider that reads metadata files from a dictionary.
This also maps metadata decoding exceptions to our internal exception type.
@@ -106,7 +106,7 @@ def from_metadata_file(
}
dist = pkg_resources.DistInfoDistribution(
location=filename,
- metadata=WheelMetadata(metadata_text, filename),
+ metadata=InMemoryMetadata(metadata_text, filename),
project_name=project_name,
)
return cls(dist)
@@ -127,7 +127,7 @@ def from_wheel(cls, wheel: Wheel, name: str) -> BaseDistribution:
raise UnsupportedWheel(f"{name} has an invalid wheel, {e}")
dist = pkg_resources.DistInfoDistribution(
location=wheel.location,
- metadata=WheelMetadata(metadata_text, wheel.location),
+ metadata=InMemoryMetadata(metadata_text, wheel.location),
project_name=name,
)
return cls(dist)
diff --git a/tests/unit/metadata/test_metadata_pkg_resources.py b/tests/unit/metadata/test_metadata_pkg_resources.py
index 6bb67156c9f..ab1a56107f4 100644
--- a/tests/unit/metadata/test_metadata_pkg_resources.py
+++ b/tests/unit/metadata/test_metadata_pkg_resources.py
@@ -11,7 +11,7 @@
from pip._internal.metadata.pkg_resources import (
Distribution,
Environment,
- WheelMetadata,
+ InMemoryMetadata,
)
pkg_resources = pytest.importorskip("pip._vendor.pkg_resources")
@@ -99,7 +99,7 @@ def test_wheel_metadata_works() -> None:
dist = Distribution(
pkg_resources.DistInfoDistribution(
location="",
- metadata=WheelMetadata({"METADATA": metadata.as_bytes()}, ""),
+ metadata=InMemoryMetadata({"METADATA": metadata.as_bytes()}, ""),
project_name=name,
),
)
@@ -116,7 +116,7 @@ def test_wheel_metadata_works() -> None:
def test_wheel_metadata_throws_on_bad_unicode() -> None:
- metadata = WheelMetadata({"METADATA": b"\xff"}, "")
+ metadata = InMemoryMetadata({"METADATA": b"\xff"}, "")
with pytest.raises(UnsupportedWheel) as e:
metadata.get_metadata("METADATA")
From 2aa1c2f0e02889fb6e6c92616501233464070164 Mon Sep 17 00:00:00 2001
From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com>
Date: Mon, 5 Sep 2022 12:53:55 -0400
Subject: [PATCH 11/18] rename from_metadata_file{,_contents}() to avoid
ambiguity pointed out by reviewer
---
src/pip/_internal/metadata/__init__.py | 15 ++++++++-------
src/pip/_internal/metadata/base.py | 9 ++++++---
src/pip/_internal/metadata/importlib/_dists.py | 18 +++++++++++++-----
src/pip/_internal/metadata/pkg_resources.py | 12 +++++-------
src/pip/_internal/operations/prepare.py | 12 +++++-------
5 files changed, 37 insertions(+), 29 deletions(-)
diff --git a/src/pip/_internal/metadata/__init__.py b/src/pip/_internal/metadata/__init__.py
index c41070cbab6..9f73ca7105f 100644
--- a/src/pip/_internal/metadata/__init__.py
+++ b/src/pip/_internal/metadata/__init__.py
@@ -106,21 +106,22 @@ def get_wheel_distribution(wheel: Wheel, canonical_name: str) -> BaseDistributio
def get_metadata_distribution(
- metadata_path: str,
+ metadata_contents: bytes,
filename: str,
canonical_name: str,
) -> BaseDistribution:
- """Get the representation of the specified METADATA file.
+ """Get the dist representation of the specified METADATA file contents.
- This returns a Distribution instance from the chosen backend based on the contents
- of the file at ``metadata_path``.
+ This returns a Distribution instance from the chosen backend sourced from the data
+ in `metadata_contents`.
- :param metadata_path: Path to the METADATA file.
+ :param metadata_contents: Contents of a METADATA file within a dist, or one served
+ via PEP 658.
:param filename: Filename for the dist this metadata represents.
:param canonical_name: Normalized project name of the given dist.
"""
- return select_backend().Distribution.from_metadata_file(
- metadata_path,
+ return select_backend().Distribution.from_metadata_file_contents(
+ metadata_contents,
filename,
canonical_name,
)
diff --git a/src/pip/_internal/metadata/base.py b/src/pip/_internal/metadata/base.py
index ef92b40ad59..cafb79fb3dc 100644
--- a/src/pip/_internal/metadata/base.py
+++ b/src/pip/_internal/metadata/base.py
@@ -114,15 +114,18 @@ def from_directory(cls, directory: str) -> "BaseDistribution":
raise NotImplementedError()
@classmethod
- def from_metadata_file(
+ def from_metadata_file_contents(
cls,
- metadata_path: str,
+ metadata_contents: bytes,
filename: str,
project_name: str,
) -> "BaseDistribution":
"""Load the distribution from the contents of a METADATA file.
- :param metadata: The path to a METADATA file.
+ This is used to implement PEP 658 by generating a "shallow" dist object that can
+ be used for resolution without downloading or building the actual dist yet.
+
+ :param metadata_contents: The contents of a METADATA file.
:param filename: File name for the dist with this metadata.
:param project_name: Name of the project this dist represents.
"""
diff --git a/src/pip/_internal/metadata/importlib/_dists.py b/src/pip/_internal/metadata/importlib/_dists.py
index 5d5ef7860ad..ae821802a6a 100644
--- a/src/pip/_internal/metadata/importlib/_dists.py
+++ b/src/pip/_internal/metadata/importlib/_dists.py
@@ -28,6 +28,7 @@
)
from pip._internal.utils.misc import normalize_path
from pip._internal.utils.packaging import safe_extra
+from pip._internal.utils.temp_dir import TempDirectory
from pip._internal.utils.wheel import parse_wheel, read_wheel_metadata_file
from ._compat import BasePath, get_dist_name
@@ -110,15 +111,22 @@ def from_directory(cls, directory: str) -> BaseDistribution:
return cls(dist, info_location, info_location.parent)
@classmethod
- def from_metadata_file(
+ def from_metadata_file_contents(
cls,
- metadata_path: str,
+ metadata_contents: bytes,
filename: str,
project_name: str,
) -> BaseDistribution:
- metadata_location = pathlib.Path(metadata_path)
- dist = importlib.metadata.Distribution.at(metadata_location.parent)
- return cls(dist, metadata_location.parent, None)
+ # Generate temp dir to contain the metadata file, and write the file contents.
+ temp_dir = pathlib.Path(
+ TempDirectory(kind="metadata", globally_managed=True).path
+ )
+ metadata_path = temp_dir / "METADATA"
+ with open(metadata_path, "wb") as f:
+ f.write(metadata_contents)
+ # Construct dist pointing to the newly created directory.
+ dist = importlib.metadata.Distribution.at(metadata_path.parent)
+ return cls(dist, metadata_path.parent, None)
@classmethod
def from_wheel(cls, wheel: Wheel, name: str) -> BaseDistribution:
diff --git a/src/pip/_internal/metadata/pkg_resources.py b/src/pip/_internal/metadata/pkg_resources.py
index 46e8b245d40..b2f3d1382a1 100644
--- a/src/pip/_internal/metadata/pkg_resources.py
+++ b/src/pip/_internal/metadata/pkg_resources.py
@@ -93,20 +93,18 @@ def from_directory(cls, directory: str) -> BaseDistribution:
return cls(dist)
@classmethod
- def from_metadata_file(
+ def from_metadata_file_contents(
cls,
- metadata_path: str,
+ metadata_contents: bytes,
filename: str,
project_name: str,
) -> BaseDistribution:
- with open(metadata_path, "rb") as f:
- metadata = f.read()
- metadata_text = {
- "METADATA": metadata,
+ metadata_dict = {
+ "METADATA": metadata_contents,
}
dist = pkg_resources.DistInfoDistribution(
location=filename,
- metadata=InMemoryMetadata(metadata_text, filename),
+ metadata=InMemoryMetadata(metadata_dict, filename),
project_name=project_name,
)
return cls(dist)
diff --git a/src/pip/_internal/operations/prepare.py b/src/pip/_internal/operations/prepare.py
index c71a4f5b325..2a61875b529 100644
--- a/src/pip/_internal/operations/prepare.py
+++ b/src/pip/_internal/operations/prepare.py
@@ -374,19 +374,17 @@ def _fetch_metadata_using_link_data_attr(
req.req,
metadata_link,
)
+ # Download the contents of the METADATA file, separate from the dist itself.
metadata_file = get_http_url(
metadata_link,
self._download,
hashes=metadata_link.as_hashes(),
)
- # The file will be downloaded under the same name as it's listed in the index,
- # which will end with .metadata. To make importlib.metadata.PathDistribution
- # work, we need to keep it in the same directory, but rename it to METADATA.
- containing_dir = os.path.dirname(metadata_file.path)
- new_metadata_path = os.path.join(containing_dir, "METADATA")
- os.rename(metadata_file.path, new_metadata_path)
+ with open(metadata_file.path, "rb") as f:
+ metadata_contents = f.read()
+ # Generate a dist just from those file contents.
return get_metadata_distribution(
- new_metadata_path,
+ metadata_contents,
req.link.filename,
req.req.name,
)
From e5b2fcd1a066a780920859f48255bbb840f3fb33 Mon Sep 17 00:00:00 2001
From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com>
Date: Mon, 5 Sep 2022 13:39:12 -0400
Subject: [PATCH 12/18] add tests for PEP 658 metadata with wheel files too
---
tests/functional/test_download.py | 138 +++++++++++++++++++++---------
1 file changed, 99 insertions(+), 39 deletions(-)
diff --git a/tests/functional/test_download.py b/tests/functional/test_download.py
index 6785fc73858..ede2213aa70 100644
--- a/tests/functional/test_download.py
+++ b/tests/functional/test_download.py
@@ -1264,17 +1264,14 @@ class Package:
name: str
version: str
+ filename: str
metadata: MetadataKind
# This will override any dependencies specified in the actual dist's METADATA.
requires_dist: Tuple[str, ...] = ()
- def filename(self) -> str:
- """We will only be pointed at .tar.gz packages, so we can guess the name."""
- return f"{self.name}-{self.version}.tar.gz"
-
def metadata_filename(self) -> str:
"""This is specified by PEP 658."""
- return f"{self.filename()}.metadata"
+ return f"{self.filename}.metadata"
def generate_additional_tag(self) -> str:
"""This gets injected into the tag in the generated PyPI index page for this
@@ -1289,26 +1286,27 @@ def generate_additional_tag(self) -> str:
checksum = sha256(self.generate_metadata()).hexdigest()
return f'data-dist-info-metadata="sha256={checksum}"'
+ def requires_str(self) -> str:
+ if not self.requires_dist:
+ return ""
+ joined = " and ".join(self.requires_dist)
+ return f"Requires-Dist: {joined}"
+
def generate_metadata(self) -> bytes:
"""This is written to `self.metadata_filename()` and will override the actual
dist's METADATA, unless `self.metadata == MetadataKind.NoFile`."""
- if self.requires_dist:
- joined = " and ".join(self.requires_dist)
- requires_str = f"Requires-Dist: {joined}"
- else:
- requires_str = ""
return dedent(
f"""\
Metadata-Version: 2.1
Name: {self.name}
Version: {self.version}
- {requires_str}
+ {self.requires_str()}
"""
).encode("utf-8")
@pytest.fixture(scope="function")
-def index_html_content(tmpdir: Path) -> Callable[..., Path]:
+def write_index_html_content(tmpdir: Path) -> Callable[[str], Path]:
"""Generate a PyPI package index.html within a temporary local directory."""
html_dir = tmpdir / "index_html_content"
html_dir.mkdir()
@@ -1327,13 +1325,14 @@ def generate_index_html_subdir(index_html: str) -> Path:
@pytest.fixture(scope="function")
-def index_for_packages(
+def html_index_for_packages(
shared_data: TestData,
- index_html_content: Callable[..., Path],
+ write_index_html_content: Callable[[str], Path],
) -> Callable[..., Path]:
- """Generate a PyPI package index within a local directory pointing to blank data."""
+ """Generate a PyPI HTML package index within a local directory pointing to
+ blank data."""
- def generate_index_for_packages(packages: Dict[str, List[Package]]) -> Path:
+ def generate_html_index_for_packages(packages: Dict[str, List[Package]]) -> Path:
"""
Produce a PyPI directory structure pointing to the specified packages.
"""
@@ -1353,7 +1352,7 @@ def generate_index_for_packages(packages: Dict[str, List[Package]]) -> Path:
"""
# (2) Generate the index.html in a new subdirectory of the temp directory.
- index_html_subdir = index_html_content(index_html)
+ index_html_subdir = write_index_html_content(index_html)
# (3) Generate subdirectories for individual packages, each with their own
# index.html.
@@ -1366,12 +1365,12 @@ def generate_index_for_packages(packages: Dict[str, List[Package]]) -> Path:
# (3.1) Generate the tag which pip can crawl pointing to this
# specific package version.
download_links.append(
- f' {package_link.filename()}
' # noqa: E501
+ f' {package_link.filename}
' # noqa: E501
)
# (3.2) Copy over the corresponding file in `shared_data.packages`.
shutil.copy(
- shared_data.packages / package_link.filename(),
- pkg_subdir / package_link.filename(),
+ shared_data.packages / package_link.filename,
+ pkg_subdir / package_link.filename,
)
# (3.3) Write a metadata file, if applicable.
if package_link.metadata != MetadataKind.NoFile:
@@ -1399,13 +1398,13 @@ def generate_index_for_packages(packages: Dict[str, List[Package]]) -> Path:
return index_html_subdir
- return generate_index_for_packages
+ return generate_html_index_for_packages
@pytest.fixture(scope="function")
-def download_generated_index(
+def download_generated_html_index(
script: PipTestEnvironment,
- index_for_packages: Callable[..., Path],
+ html_index_for_packages: Callable[[Dict[str, List[Package]]], Path],
tmpdir: Path,
) -> Callable[..., Tuple[TestPipResult, Path]]:
"""Execute `pip download` against a generated PyPI index."""
@@ -1420,7 +1419,7 @@ def run_for_generated_index(
Produce a PyPI directory structure pointing to the specified packages, then
execute `pip download -i ...` pointing to our generated index.
"""
- index_dir = index_for_packages(packages)
+ index_dir = html_index_for_packages(packages)
pip_args = [
"download",
"-d",
@@ -1438,17 +1437,61 @@ def run_for_generated_index(
# The package database we generate for testing PEP 658 support.
_simple_packages: Dict[str, List[Package]] = {
"simple": [
- Package("simple", "1.0", MetadataKind.Sha256),
- Package("simple", "2.0", MetadataKind.No),
+ Package("simple", "1.0", "simple-1.0.tar.gz", MetadataKind.Sha256),
+ Package("simple", "2.0", "simple-2.0.tar.gz", MetadataKind.No),
# This will raise a hashing error.
- Package("simple", "3.0", MetadataKind.WrongHash),
+ Package("simple", "3.0", "simple-3.0.tar.gz", MetadataKind.WrongHash),
],
"simple2": [
# Override the dependencies here in order to force pip to download
# simple-1.0.tar.gz as well.
- Package("simple2", "1.0", MetadataKind.Unhashed, ("simple==1.0",)),
+ Package(
+ "simple2",
+ "1.0",
+ "simple2-1.0.tar.gz",
+ MetadataKind.Unhashed,
+ ("simple==1.0",),
+ ),
# This will raise an error when pip attempts to fetch the metadata file.
- Package("simple2", "2.0", MetadataKind.NoFile),
+ Package("simple2", "2.0", "simple2-2.0.tar.gz", MetadataKind.NoFile),
+ ],
+ "colander": [
+ # Ensure we can read the dependencies from a metadata file within a wheel
+ # *without* PEP 658 metadata.
+ Package(
+ "colander", "0.9.9", "colander-0.9.9-py2.py3-none-any.whl", MetadataKind.No
+ ),
+ ],
+ "compilewheel": [
+ # Ensure we can override the dependencies of a wheel file by injecting PEP
+ # 658 metadata.
+ Package(
+ "compilewheel",
+ "1.0",
+ "compilewheel-1.0-py2.py3-none-any.whl",
+ MetadataKind.Unhashed,
+ ("simple==1.0",),
+ ),
+ ],
+ "has-script": [
+ # Ensure we check PEP 658 metadata hashing errors for wheel files.
+ Package(
+ "has-script",
+ "1.0",
+ "has.script-1.0-py2.py3-none-any.whl",
+ MetadataKind.WrongHash,
+ ),
+ ],
+ "translationstring": [
+ Package(
+ "translationstring", "1.1", "translationstring-1.1.tar.gz", MetadataKind.No
+ ),
+ ],
+ "priority": [
+ # Ensure we check for a missing metadata file for wheels.
+ Package(
+ "priority", "1.0", "priority-1.0-py2.py3-none-any.whl", MetadataKind.NoFile
+ ),
],
}
@@ -1458,16 +1501,24 @@ def run_for_generated_index(
[
("simple2==1.0", ["simple-1.0.tar.gz", "simple2-1.0.tar.gz"]),
("simple==2.0", ["simple-2.0.tar.gz"]),
+ (
+ "colander",
+ ["colander-0.9.9-py2.py3-none-any.whl", "translationstring-1.1.tar.gz"],
+ ),
+ (
+ "compilewheel",
+ ["compilewheel-1.0-py2.py3-none-any.whl", "simple-1.0.tar.gz"],
+ ),
],
)
def test_download_metadata(
- download_generated_index: Callable[..., Tuple[TestPipResult, Path]],
+ download_generated_html_index: Callable[..., Tuple[TestPipResult, Path]],
requirement_to_download: str,
expected_outputs: List[str],
) -> None:
"""Verify that if a data-dist-info-metadata attribute is present, then it is used
instead of the actual dist's METADATA."""
- _, download_dir = download_generated_index(
+ _, download_dir = download_generated_html_index(
_simple_packages,
[requirement_to_download],
)
@@ -1480,17 +1531,21 @@ def test_download_metadata(
(
"simple==3.0",
"95e0f200b6302989bcf2cead9465cf229168295ea330ca30d1ffeab5c0fed996",
- )
+ ),
+ (
+ "has-script",
+ "16ba92d7f6f992f6de5ecb7d58c914675cf21f57f8e674fb29dcb4f4c9507e5b",
+ ),
],
)
def test_incorrect_metadata_hash(
- download_generated_index: Callable[..., Tuple[TestPipResult, Path]],
+ download_generated_html_index: Callable[..., Tuple[TestPipResult, Path]],
requirement_to_download: str,
real_hash: str,
) -> None:
"""Verify that if a hash for data-dist-info-metadata is provided, it must match the
actual hash of the metadata file."""
- result, _ = download_generated_index(
+ result, _ = download_generated_html_index(
_simple_packages,
[requirement_to_download],
allow_error=True,
@@ -1503,23 +1558,28 @@ def test_incorrect_metadata_hash(
@pytest.mark.parametrize(
- "requirement_to_download",
- ["simple2==2.0"],
+ "requirement_to_download, expected_url",
+ [
+ ("simple2==2.0", "simple2-2.0.tar.gz.metadata"),
+ ("priority", "priority-1.0-py2.py3-none-any.whl.metadata"),
+ ],
)
def test_metadata_not_found(
- download_generated_index: Callable[..., Tuple[TestPipResult, Path]],
+ download_generated_html_index: Callable[..., Tuple[TestPipResult, Path]],
requirement_to_download: str,
+ expected_url: str,
) -> None:
"""Verify that if a data-dist-info-metadata attribute is provided, that pip will
fetch the .metadata file at the location specified by PEP 658, and error
if unavailable."""
- result, _ = download_generated_index(
+ result, _ = download_generated_html_index(
_simple_packages,
[requirement_to_download],
allow_error=True,
)
assert result.returncode != 0
+ expected_re = re.escape(expected_url)
pattern = re.compile(
- r"ERROR: 404 Client Error: FileNotFoundError for url:.*simple2-2\.0\.tar\.gz\.metadata" # noqa: E501
+ f"ERROR: 404 Client Error: FileNotFoundError for url:.*{expected_re}"
)
assert pattern.search(result.stderr), (pattern, result.stderr)
From 89f235d607f8e273800bacd61d8e968050d5312c Mon Sep 17 00:00:00 2001
From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com>
Date: Mon, 5 Sep 2022 13:53:34 -0400
Subject: [PATCH 13/18] add a note about further testing the json client
---
tests/unit/test_collector.py | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/tests/unit/test_collector.py b/tests/unit/test_collector.py
index 81d8765c837..55676a4fc5c 100644
--- a/tests/unit/test_collector.py
+++ b/tests/unit/test_collector.py
@@ -459,6 +459,12 @@ def test_parse_links__requires_python(
_test_parse_links_data_attribute(anchor_html, "requires_python", expected)
+# TODO: this test generates its own examples to validate the json client implementation
+# instead of sharing those examples with the html client testing. We expect this won't
+# hide any bugs because operations like resolving PEP 658 metadata should use the same
+# code for both types of indices, but it might be nice to explicitly have all our tests
+# in test_download.py execute over both html and json indices with
+# a pytest.mark.parameterize decorator to ensure nothing slips through the cracks.
def test_parse_links_json() -> None:
json_bytes = json.dumps(
{
From fc9bcdd25c9ded708cd478a5a8356364b689b5d3 Mon Sep 17 00:00:00 2001
From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com>
Date: Wed, 7 Sep 2022 09:58:36 -0400
Subject: [PATCH 14/18] refactor and rename some variables in a confusing code
of terse code
---
src/pip/_internal/metadata/pkg_resources.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/pip/_internal/metadata/pkg_resources.py b/src/pip/_internal/metadata/pkg_resources.py
index b2f3d1382a1..f330ef12a2c 100644
--- a/src/pip/_internal/metadata/pkg_resources.py
+++ b/src/pip/_internal/metadata/pkg_resources.py
@@ -114,7 +114,7 @@ def from_wheel(cls, wheel: Wheel, name: str) -> BaseDistribution:
try:
with wheel.as_zipfile() as zf:
info_dir, _ = parse_wheel(zf, name)
- metadata_text = {
+ metadata_dict = {
path.split("/", 1)[-1]: read_wheel_metadata_file(zf, path)
for path in zf.namelist()
if path.startswith(f"{info_dir}/")
@@ -125,7 +125,7 @@ def from_wheel(cls, wheel: Wheel, name: str) -> BaseDistribution:
raise UnsupportedWheel(f"{name} has an invalid wheel, {e}")
dist = pkg_resources.DistInfoDistribution(
location=wheel.location,
- metadata=InMemoryMetadata(metadata_text, wheel.location),
+ metadata=InMemoryMetadata(metadata_dict, wheel.location),
project_name=name,
)
return cls(dist)
From 036bfc0d66733eb2d1a352281fe65276ffb27d07 Mon Sep 17 00:00:00 2001
From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com>
Date: Wed, 7 Sep 2022 09:59:08 -0400
Subject: [PATCH 15/18] update message for MetadataIncohsistent to allow use
with PEP 658 metadata
---
src/pip/_internal/exceptions.py | 7 +++----
tests/functional/test_new_resolver.py | 2 +-
2 files changed, 4 insertions(+), 5 deletions(-)
diff --git a/src/pip/_internal/exceptions.py b/src/pip/_internal/exceptions.py
index 377cde52521..1cf93bc797d 100644
--- a/src/pip/_internal/exceptions.py
+++ b/src/pip/_internal/exceptions.py
@@ -335,8 +335,8 @@ class MetadataInconsistent(InstallationError):
"""Built metadata contains inconsistent information.
This is raised when the metadata contains values (e.g. name and version)
- that do not match the information previously obtained from sdist filename
- or user-supplied ``#egg=`` value.
+ that do not match the information previously obtained from sdist filename,
+ user-supplied ``#egg=`` value, or an install requirement name.
"""
def __init__(
@@ -349,8 +349,7 @@ def __init__(
def __str__(self) -> str:
template = (
- "Requested {} has inconsistent {}: "
- "filename has {!r}, but metadata has {!r}"
+ "Requested {} has inconsistent {}: expected {!r}, but metadata has {!r}"
)
return template.format(self.ireq, self.field, self.f_val, self.m_val)
diff --git a/tests/functional/test_new_resolver.py b/tests/functional/test_new_resolver.py
index efcae29289f..fc52ab9c8d8 100644
--- a/tests/functional/test_new_resolver.py
+++ b/tests/functional/test_new_resolver.py
@@ -1363,7 +1363,7 @@ def test_new_resolver_skip_inconsistent_metadata(script: PipTestEnvironment) ->
)
assert (
- " inconsistent version: filename has '3', but metadata has '2'"
+ " inconsistent version: expected '3', but metadata has '2'"
) in result.stdout, str(result)
script.assert_installed(a="1")
From 8aeb10847dc2b8edd2671728576cf453bcbdf800 Mon Sep 17 00:00:00 2001
From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com>
Date: Wed, 7 Sep 2022 09:59:56 -0400
Subject: [PATCH 16/18] raise MetadataInconsistent if the name from METADATA
doesn't match the install requirement
---
src/pip/_internal/operations/prepare.py | 19 ++++++++++++++++---
1 file changed, 16 insertions(+), 3 deletions(-)
diff --git a/src/pip/_internal/operations/prepare.py b/src/pip/_internal/operations/prepare.py
index 2a61875b529..4bf414cb005 100644
--- a/src/pip/_internal/operations/prepare.py
+++ b/src/pip/_internal/operations/prepare.py
@@ -19,6 +19,7 @@
HashMismatch,
HashUnpinned,
InstallationError,
+ MetadataInconsistent,
NetworkConnectionError,
PreviousBuildDirError,
VcsHashUnsupported,
@@ -365,6 +366,7 @@ def _fetch_metadata_using_link_data_attr(
req: InstallRequirement,
) -> Optional[BaseDistribution]:
"""Fetch metadata from the data-dist-info-metadata attribute, if possible."""
+ # (1) Get the link to the metadata file, if provided by the backend.
metadata_link = req.link.metadata_link()
if metadata_link is None:
return None
@@ -374,7 +376,7 @@ def _fetch_metadata_using_link_data_attr(
req.req,
metadata_link,
)
- # Download the contents of the METADATA file, separate from the dist itself.
+ # (2) Download the contents of the METADATA file, separate from the dist itself.
metadata_file = get_http_url(
metadata_link,
self._download,
@@ -382,12 +384,23 @@ def _fetch_metadata_using_link_data_attr(
)
with open(metadata_file.path, "rb") as f:
metadata_contents = f.read()
- # Generate a dist just from those file contents.
- return get_metadata_distribution(
+ # (3) Generate a dist just from those file contents.
+ metadata_dist = get_metadata_distribution(
metadata_contents,
req.link.filename,
req.req.name,
)
+ # (4) Ensure the Name: field from the METADATA file matches the name from the
+ # install requirement.
+ #
+ # NB: raw_name will fall back to the name from the install requirement if
+ # the Name: field is not present, but it's noted in the raw_name docstring
+ # that that should NEVER happen anyway.
+ if metadata_dist.raw_name != req.req.name:
+ raise MetadataInconsistent(
+ req, "Name", req.req.name, metadata_dist.raw_name
+ )
+ return metadata_dist
def _fetch_metadata_using_lazy_wheel(
self,
From 931501fae3d2be86ac9094ff612e0c2bf75e1600 Mon Sep 17 00:00:00 2001
From: Tzu-ping Chung
Date: Thu, 8 Sep 2022 07:30:45 +0800
Subject: [PATCH 17/18] Use pathlib shorthand to write file
---
src/pip/_internal/metadata/importlib/_dists.py | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/src/pip/_internal/metadata/importlib/_dists.py b/src/pip/_internal/metadata/importlib/_dists.py
index ae821802a6a..65c043c87ef 100644
--- a/src/pip/_internal/metadata/importlib/_dists.py
+++ b/src/pip/_internal/metadata/importlib/_dists.py
@@ -122,8 +122,7 @@ def from_metadata_file_contents(
TempDirectory(kind="metadata", globally_managed=True).path
)
metadata_path = temp_dir / "METADATA"
- with open(metadata_path, "wb") as f:
- f.write(metadata_contents)
+ metadata_path.write_bytes(metadata_contents)
# Construct dist pointing to the newly created directory.
dist = importlib.metadata.Distribution.at(metadata_path.parent)
return cls(dist, metadata_path.parent, None)
From f8d6daebcc7e92229cb4e34990a1b72d8aa24bb5 Mon Sep 17 00:00:00 2001
From: Tzu-ping Chung
Date: Thu, 8 Sep 2022 07:32:24 +0800
Subject: [PATCH 18/18] Switch to f-string
---
src/pip/_internal/exceptions.py | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/src/pip/_internal/exceptions.py b/src/pip/_internal/exceptions.py
index 1cf93bc797d..2ab1f591f12 100644
--- a/src/pip/_internal/exceptions.py
+++ b/src/pip/_internal/exceptions.py
@@ -348,10 +348,10 @@ def __init__(
self.m_val = m_val
def __str__(self) -> str:
- template = (
- "Requested {} has inconsistent {}: expected {!r}, but metadata has {!r}"
+ return (
+ f"Requested {self.ireq} has inconsistent {self.field}: "
+ f"expected {self.f_val!r}, but metadata has {self.m_val!r}"
)
- return template.format(self.ireq, self.field, self.f_val, self.m_val)
class LegacyInstallFailure(DiagnosticPipError):