From d0813e8a39736f1a376d4176a308bb1886244b75 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Wed, 11 May 2022 04:32:43 -0400 Subject: [PATCH 01/18] create LinkHash and check out dist-info-metadata (PEP 658) - use PEP 658 dist-info-metadata when --use-feature=fast-deps is on - add testing --- src/pip/_internal/index/collector.py | 131 ++------ src/pip/_internal/metadata/__init__.py | 21 ++ src/pip/_internal/metadata/base.py | 15 + .../_internal/metadata/importlib/_dists.py | 11 + src/pip/_internal/metadata/pkg_resources.py | 19 ++ src/pip/_internal/models/link.py | 239 ++++++++++++-- src/pip/_internal/operations/prepare.py | 52 ++- tests/functional/test_download.py | 310 +++++++++++++++++- tests/lib/server.py | 8 - tests/unit/test_collector.py | 79 ++++- 10 files changed, 723 insertions(+), 162 deletions(-) diff --git a/src/pip/_internal/index/collector.py b/src/pip/_internal/index/collector.py index f4e6e221f5d..9ae213197dd 100644 --- a/src/pip/_internal/index/collector.py +++ b/src/pip/_internal/index/collector.py @@ -9,10 +9,8 @@ import json import logging import os -import re import urllib.parse import urllib.request -import xml.etree.ElementTree from html.parser import HTMLParser from optparse import Values from typing import ( @@ -34,12 +32,12 @@ from pip._vendor.requests.exceptions import RetryError, SSLError from pip._internal.exceptions import NetworkConnectionError -from pip._internal.models.link import Link +from pip._internal.models.link import HTMLElement, Link from pip._internal.models.search_scope import SearchScope from pip._internal.network.session import PipSession from pip._internal.network.utils import raise_for_status from pip._internal.utils.filetypes import is_archive_file -from pip._internal.utils.misc import pairwise, redact_auth_from_url +from pip._internal.utils.misc import redact_auth_from_url from pip._internal.vcs import vcs from .sources import CandidatesFromPage, LinkSource, build_source @@ -51,7 +49,6 @@ logger = logging.getLogger(__name__) -HTMLElement = xml.etree.ElementTree.Element ResponseHeaders = MutableMapping[str, str] @@ -191,92 +188,25 @@ def _get_encoding_from_headers(headers: ResponseHeaders) -> Optional[str]: return None -def _clean_url_path_part(part: str) -> str: - """ - Clean a "part" of a URL path (i.e. after splitting on "@" characters). - """ - # We unquote prior to quoting to make sure nothing is double quoted. - return urllib.parse.quote(urllib.parse.unquote(part)) - - -def _clean_file_url_path(part: str) -> str: - """ - Clean the first part of a URL path that corresponds to a local - filesystem path (i.e. the first part after splitting on "@" characters). - """ - # We unquote prior to quoting to make sure nothing is double quoted. - # Also, on Windows the path part might contain a drive letter which - # should not be quoted. On Linux where drive letters do not - # exist, the colon should be quoted. We rely on urllib.request - # to do the right thing here. - return urllib.request.pathname2url(urllib.request.url2pathname(part)) - +def _determine_base_url(document: HTMLElement, page_url: str) -> str: + """Determine the HTML document's base URL. -# percent-encoded: / -_reserved_chars_re = re.compile("(@|%2F)", re.IGNORECASE) + This looks for a ```` tag in the HTML document. If present, its href + attribute denotes the base URL of anchor tags in the document. If there is + no such tag (or if it does not have a valid href attribute), the HTML + file's URL is used as the base URL. + :param document: An HTML document representation. The current + implementation expects the result of ``html5lib.parse()``. + :param page_url: The URL of the HTML document. -def _clean_url_path(path: str, is_local_path: bool) -> str: + TODO: Remove when `html5lib` is dropped. """ - Clean the path portion of a URL. - """ - if is_local_path: - clean_func = _clean_file_url_path - else: - clean_func = _clean_url_path_part - - # Split on the reserved characters prior to cleaning so that - # revision strings in VCS URLs are properly preserved. - parts = _reserved_chars_re.split(path) - - cleaned_parts = [] - for to_clean, reserved in pairwise(itertools.chain(parts, [""])): - cleaned_parts.append(clean_func(to_clean)) - # Normalize %xx escapes (e.g. %2f -> %2F) - cleaned_parts.append(reserved.upper()) - - return "".join(cleaned_parts) - - -def _clean_link(url: str) -> str: - """ - Make sure a link is fully quoted. - For example, if ' ' occurs in the URL, it will be replaced with "%20", - and without double-quoting other characters. - """ - # Split the URL into parts according to the general structure - # `scheme://netloc/path;parameters?query#fragment`. - result = urllib.parse.urlparse(url) - # If the netloc is empty, then the URL refers to a local filesystem path. - is_local_path = not result.netloc - path = _clean_url_path(result.path, is_local_path=is_local_path) - return urllib.parse.urlunparse(result._replace(path=path)) - - -def _create_link_from_element( - element_attribs: Dict[str, Optional[str]], - page_url: str, - base_url: str, -) -> Optional[Link]: - """ - Convert an anchor element's attributes in a simple repository page to a Link. - """ - href = element_attribs.get("href") - if not href: - return None - - url = _clean_link(urllib.parse.urljoin(base_url, href)) - pyrequire = element_attribs.get("data-requires-python") - yanked_reason = element_attribs.get("data-yanked") - - link = Link( - url, - comes_from=page_url, - requires_python=pyrequire, - yanked_reason=yanked_reason, - ) - - return link + for base in document.findall(".//base"): + href = base.get("href") + if href is not None: + return href + return page_url class CacheablePageContent: @@ -326,25 +256,10 @@ def parse_links(page: "IndexContent") -> Iterable[Link]: if content_type_l.startswith("application/vnd.pypi.simple.v1+json"): data = json.loads(page.content) for file in data.get("files", []): - file_url = file.get("url") - if file_url is None: + link = Link.from_json(file, page.url) + if link is None: continue - - # The Link.yanked_reason expects an empty string instead of a boolean. - yanked_reason = file.get("yanked") - if yanked_reason and not isinstance(yanked_reason, str): - yanked_reason = "" - # The Link.yanked_reason expects None instead of False - elif not yanked_reason: - yanked_reason = None - - yield Link( - _clean_link(urllib.parse.urljoin(page.url, file_url)), - comes_from=page.url, - requires_python=file.get("requires-python"), - yanked_reason=yanked_reason, - hashes=file.get("hashes", {}), - ) + yield link return parser = HTMLLinkParser(page.url) @@ -354,11 +269,7 @@ def parse_links(page: "IndexContent") -> Iterable[Link]: url = page.url base_url = parser.base_url or url for anchor in parser.anchors: - link = _create_link_from_element( - anchor, - page_url=url, - base_url=base_url, - ) + link = Link.from_element(anchor, page_url=url, base_url=base_url) if link is None: continue yield link diff --git a/src/pip/_internal/metadata/__init__.py b/src/pip/_internal/metadata/__init__.py index 8cd0fda6851..c41070cbab6 100644 --- a/src/pip/_internal/metadata/__init__.py +++ b/src/pip/_internal/metadata/__init__.py @@ -103,3 +103,24 @@ def get_wheel_distribution(wheel: Wheel, canonical_name: str) -> BaseDistributio :param canonical_name: Normalized project name of the given wheel. """ return select_backend().Distribution.from_wheel(wheel, canonical_name) + + +def get_metadata_distribution( + metadata_path: str, + filename: str, + canonical_name: str, +) -> BaseDistribution: + """Get the representation of the specified METADATA file. + + This returns a Distribution instance from the chosen backend based on the contents + of the file at ``metadata_path``. + + :param metadata_path: Path to the METADATA file. + :param filename: Filename for the dist this metadata represents. + :param canonical_name: Normalized project name of the given dist. + """ + return select_backend().Distribution.from_metadata_file( + metadata_path, + filename, + canonical_name, + ) diff --git a/src/pip/_internal/metadata/base.py b/src/pip/_internal/metadata/base.py index 151fd6d009e..ef92b40ad59 100644 --- a/src/pip/_internal/metadata/base.py +++ b/src/pip/_internal/metadata/base.py @@ -113,6 +113,21 @@ def from_directory(cls, directory: str) -> "BaseDistribution": """ raise NotImplementedError() + @classmethod + def from_metadata_file( + cls, + metadata_path: str, + filename: str, + project_name: str, + ) -> "BaseDistribution": + """Load the distribution from the contents of a METADATA file. + + :param metadata: The path to a METADATA file. + :param filename: File name for the dist with this metadata. + :param project_name: Name of the project this dist represents. + """ + raise NotImplementedError() + @classmethod def from_wheel(cls, wheel: "Wheel", name: str) -> "BaseDistribution": """Load the distribution from a given wheel. diff --git a/src/pip/_internal/metadata/importlib/_dists.py b/src/pip/_internal/metadata/importlib/_dists.py index fbf9a93218a..5d5ef7860ad 100644 --- a/src/pip/_internal/metadata/importlib/_dists.py +++ b/src/pip/_internal/metadata/importlib/_dists.py @@ -109,6 +109,17 @@ def from_directory(cls, directory: str) -> BaseDistribution: dist = importlib.metadata.Distribution.at(info_location) return cls(dist, info_location, info_location.parent) + @classmethod + def from_metadata_file( + cls, + metadata_path: str, + filename: str, + project_name: str, + ) -> BaseDistribution: + metadata_location = pathlib.Path(metadata_path) + dist = importlib.metadata.Distribution.at(metadata_location.parent) + return cls(dist, metadata_location.parent, None) + @classmethod def from_wheel(cls, wheel: Wheel, name: str) -> BaseDistribution: try: diff --git a/src/pip/_internal/metadata/pkg_resources.py b/src/pip/_internal/metadata/pkg_resources.py index bf79ba139c0..29462223eb0 100644 --- a/src/pip/_internal/metadata/pkg_resources.py +++ b/src/pip/_internal/metadata/pkg_resources.py @@ -92,6 +92,25 @@ def from_directory(cls, directory: str) -> BaseDistribution: dist = dist_cls(base_dir, project_name=dist_name, metadata=metadata) return cls(dist) + @classmethod + def from_metadata_file( + cls, + metadata_path: str, + filename: str, + project_name: str, + ) -> BaseDistribution: + with open(metadata_path, "rb") as f: + metadata = f.read() + metadata_text = { + "METADATA": metadata, + } + dist = pkg_resources.DistInfoDistribution( + location=filename, + metadata=WheelMetadata(metadata_text, filename), + project_name=project_name, + ) + return cls(dist) + @classmethod def from_wheel(cls, wheel: Wheel, name: str) -> BaseDistribution: try: diff --git a/src/pip/_internal/models/link.py b/src/pip/_internal/models/link.py index 8fd1c3d9960..8fc42966816 100644 --- a/src/pip/_internal/models/link.py +++ b/src/pip/_internal/models/link.py @@ -1,11 +1,15 @@ import functools +import itertools import logging import os import posixpath import re import urllib.parse +import xml.etree.ElementTree +from dataclasses import dataclass from typing import ( TYPE_CHECKING, + Any, Dict, List, Mapping, @@ -18,6 +22,7 @@ from pip._internal.utils.filetypes import WHEEL_EXTENSION from pip._internal.utils.hashes import Hashes from pip._internal.utils.misc import ( + pairwise, redact_auth_from_url, split_auth_from_netloc, splitext, @@ -31,11 +36,121 @@ logger = logging.getLogger(__name__) +HTMLElement = xml.etree.ElementTree.Element + # Order matters, earlier hashes have a precedence over later hashes for what # we will pick to use. _SUPPORTED_HASHES = ("sha512", "sha384", "sha256", "sha224", "sha1", "md5") +@dataclass(frozen=True) +class LinkHash: + """Links to content may have embedded hash values. This class parses those. + + `name` must be any member of `_SUPPORTED_HASHES`. + + This class can be converted to and from `ArchiveInfo`. While ArchiveInfo intends to + be JSON-serializable to conform to PEP 610, this class contains the logic for + parsing a hash name and value for correctness, and then checking whether that hash + conforms to a schema with `.is_hash_allowed()`.""" + + name: str + value: str + + _hash_re = re.compile( + r"({choices})=(.*)".format( + choices="|".join(re.escape(hash_name) for hash_name in _SUPPORTED_HASHES) + ), + ) + + def __post_init__(self) -> None: + assert self._hash_re.match(f"{self.name}={self.value}") + + @classmethod + @functools.lru_cache(maxsize=None) + def split_hash_name_and_value(cls, url: str) -> Optional["LinkHash"]: + """Search a string for a checksum algorithm name and encoded output value.""" + match = cls._hash_re.search(url) + if match is None: + return None + name, value = match.groups() + return cls(name=name, value=value) + + def as_hashes(self) -> Hashes: + """Return a Hashes instance which checks only for the current hash.""" + return Hashes({self.name: [self.value]}) + + def is_hash_allowed(self, hashes: Optional[Hashes]) -> bool: + """ + Return True if the current hash is allowed by `hashes`. + """ + if hashes is None: + return False + return hashes.is_hash_allowed(self.name, hex_digest=self.value) + + +def _clean_url_path_part(part: str) -> str: + """ + Clean a "part" of a URL path (i.e. after splitting on "@" characters). + """ + # We unquote prior to quoting to make sure nothing is double quoted. + return urllib.parse.quote(urllib.parse.unquote(part)) + + +def _clean_file_url_path(part: str) -> str: + """ + Clean the first part of a URL path that corresponds to a local + filesystem path (i.e. the first part after splitting on "@" characters). + """ + # We unquote prior to quoting to make sure nothing is double quoted. + # Also, on Windows the path part might contain a drive letter which + # should not be quoted. On Linux where drive letters do not + # exist, the colon should be quoted. We rely on urllib.request + # to do the right thing here. + return urllib.request.pathname2url(urllib.request.url2pathname(part)) + + +# percent-encoded: / +_reserved_chars_re = re.compile("(@|%2F)", re.IGNORECASE) + + +def _clean_url_path(path: str, is_local_path: bool) -> str: + """ + Clean the path portion of a URL. + """ + if is_local_path: + clean_func = _clean_file_url_path + else: + clean_func = _clean_url_path_part + + # Split on the reserved characters prior to cleaning so that + # revision strings in VCS URLs are properly preserved. + parts = _reserved_chars_re.split(path) + + cleaned_parts = [] + for to_clean, reserved in pairwise(itertools.chain(parts, [""])): + cleaned_parts.append(clean_func(to_clean)) + # Normalize %xx escapes (e.g. %2f -> %2F) + cleaned_parts.append(reserved.upper()) + + return "".join(cleaned_parts) + + +def _ensure_quoted_url(url: str) -> str: + """ + Make sure a link is fully quoted. + For example, if ' ' occurs in the URL, it will be replaced with "%20", + and without double-quoting other characters. + """ + # Split the URL into parts according to the general structure + # `scheme://netloc/path;parameters?query#fragment`. + result = urllib.parse.urlparse(url) + # If the netloc is empty, then the URL refers to a local filesystem path. + is_local_path = not result.netloc + path = _clean_url_path(result.path, is_local_path=is_local_path) + return urllib.parse.urlunparse(result._replace(path=path)) + + class Link(KeyBasedCompareMixin): """Represents a parsed link from a Package Index's simple URL""" @@ -46,6 +161,8 @@ class Link(KeyBasedCompareMixin): "comes_from", "requires_python", "yanked_reason", + "dist_info_metadata", + "link_hash", "cache_link_parsing", ] @@ -55,6 +172,8 @@ def __init__( comes_from: Optional[Union[str, "IndexContent"]] = None, requires_python: Optional[str] = None, yanked_reason: Optional[str] = None, + dist_info_metadata: Optional[str] = None, + link_hash: Optional[LinkHash] = None, cache_link_parsing: bool = True, hashes: Optional[Mapping[str, str]] = None, ) -> None: @@ -72,6 +191,14 @@ def __init__( a simple repository HTML link. If the file has been yanked but no reason was provided, this should be the empty string. See PEP 592 for more information and the specification. + :param dist_info_metadata: the metadata attached to the file, or None if no such + metadata is provided. This is the value of the "data-dist-info-metadata" + attribute, if present, in a simple repository HTML link. This may be parsed + into its own `Link` by `self.metadata_link()`. See PEP 658 for more + information and the specification. + :param link_hash: a checksum for the content the link points to. If not + provided, this will be extracted from the link URL, if the URL has + any checksum. :param cache_link_parsing: A flag that is used elsewhere to determine whether resources retrieved from this link should be cached. PyPI index urls should @@ -94,11 +221,69 @@ def __init__( self.comes_from = comes_from self.requires_python = requires_python if requires_python else None self.yanked_reason = yanked_reason + self.dist_info_metadata = dist_info_metadata + self.link_hash = link_hash or LinkHash.split_hash_name_and_value(self._url) super().__init__(key=url, defining_class=Link) self.cache_link_parsing = cache_link_parsing + @classmethod + def from_json( + cls, + file_data: Dict[str, Any], + page_url: str, + ) -> Optional["Link"]: + """ + Convert an pypi json document from a simple repository page into a Link. + """ + file_url = file_data.get("url") + if file_url is None: + return None + + yanked_reason = file_data.get("yanked") + # The Link.yanked_reason expects an empty string instead of a boolean. + if yanked_reason and not isinstance(yanked_reason, str): + yanked_reason = "" + # The Link.yanked_reason expects None instead of False. + elif not yanked_reason: + yanked_reason = None + + return cls( + _ensure_quoted_url(urllib.parse.urljoin(page_url, file_url)), + comes_from=page_url, + requires_python=file_data.get("requires-python"), + yanked_reason=yanked_reason, + hashes=file_data.get("hashes", {}), + ) + + @classmethod + def from_element( + cls, + anchor_attribs: Dict[str, Optional[str]], + page_url: str, + base_url: str, + ) -> Optional["Link"]: + """ + Convert an anchor element's attributes in a simple repository page to a Link. + """ + href = anchor_attribs.get("href") + if not href: + return None + + url = _ensure_quoted_url(urllib.parse.urljoin(base_url, href)) + pyrequire = anchor_attribs.get("data-requires-python") + yanked_reason = anchor_attribs.get("data-yanked") + dist_info_metadata = anchor_attribs.get("data-dist-info-metadata") + + return cls( + url, + comes_from=page_url, + requires_python=pyrequire, + yanked_reason=yanked_reason, + dist_info_metadata=dist_info_metadata, + ) + def __str__(self) -> str: if self.requires_python: rp = f" (requires-python:{self.requires_python})" @@ -181,32 +366,36 @@ def subdirectory_fragment(self) -> Optional[str]: return None return match.group(1) - _hash_re = re.compile( - r"({choices})=([a-f0-9]+)".format(choices="|".join(_SUPPORTED_HASHES)) - ) + def metadata_link(self) -> Optional["Link"]: + """Implementation of PEP 658 parsing.""" + # Note that Link.from_element() parsing the "data-dist-info-metadata" attribute + # from an HTML anchor tag is typically how the Link.dist_info_metadata attribute + # gets set. + if self.dist_info_metadata is None: + return None + metadata_url = f"{self.url_without_fragment}.metadata" + link_hash: Optional[LinkHash] = None + # If data-dist-info-metadata="true" is set, then the metadata file exists, + # but there is no information about its checksum or anything else. + if self.dist_info_metadata != "true": + link_hash = LinkHash.split_hash_name_and_value(self.dist_info_metadata) + return Link(metadata_url, link_hash=link_hash) + + def as_hashes(self) -> Optional[Hashes]: + if self.link_hash is not None: + return self.link_hash.as_hashes() + return None @property def hash(self) -> Optional[str]: - for hashname in _SUPPORTED_HASHES: - if hashname in self._hashes: - return self._hashes[hashname] - - match = self._hash_re.search(self._url) - if match: - return match.group(2) - + if self.link_hash is not None: + return self.link_hash.value return None @property def hash_name(self) -> Optional[str]: - for hashname in _SUPPORTED_HASHES: - if hashname in self._hashes: - return hashname - - match = self._hash_re.search(self._url) - if match: - return match.group(1) - + if self.link_hash is not None: + return self.link_hash.name return None @property @@ -236,19 +425,15 @@ def is_yanked(self) -> bool: @property def has_hash(self) -> bool: - return self.hash_name is not None + return self.link_hash is not None def is_hash_allowed(self, hashes: Optional[Hashes]) -> bool: """ - Return True if the link has a hash and it is allowed. + Return True if the link has a hash and it is allowed by `hashes`. """ - if hashes is None or not self.has_hash: + if self.link_hash is None: return False - # Assert non-None so mypy knows self.hash_name and self.hash are str. - assert self.hash_name is not None - assert self.hash is not None - - return hashes.is_hash_allowed(self.hash_name, hex_digest=self.hash) + return self.link_hash.is_hash_allowed(hashes) class _CleanResult(NamedTuple): diff --git a/src/pip/_internal/operations/prepare.py b/src/pip/_internal/operations/prepare.py index 80723fffe47..7722370bcea 100644 --- a/src/pip/_internal/operations/prepare.py +++ b/src/pip/_internal/operations/prepare.py @@ -24,7 +24,7 @@ VcsHashUnsupported, ) from pip._internal.index.package_finder import PackageFinder -from pip._internal.metadata import BaseDistribution +from pip._internal.metadata import BaseDistribution, get_metadata_distribution from pip._internal.models.direct_url import ArchiveInfo from pip._internal.models.link import Link from pip._internal.models.wheel import Wheel @@ -346,16 +346,49 @@ def _get_linked_req_hashes(self, req: InstallRequirement) -> Hashes: # showing the user what the hash should be. return req.hashes(trust_internet=False) or MissingHashes() - def _fetch_metadata_using_lazy_wheel( + def _fetch_metadata_only( self, - link: Link, + req: InstallRequirement, ) -> Optional[BaseDistribution]: - """Fetch metadata using lazy wheel, if possible.""" + # --use-feature=fast-deps must be provided. if not self.use_lazy_wheel: return None if self.require_hashes: - logger.debug("Lazy wheel is not used as hash checking is required") + logger.debug( + "Metadata-only fetching is not used as hash checking is required", + ) + return None + # Try PEP 658 metadata first, then fall back to lazy wheel if unavailable. + pep_658_dist = self._fetch_metadata_using_pep_658(req) + if pep_658_dist is not None: + return pep_658_dist + return self._fetch_metadata_using_lazy_wheel(req.link) + + def _fetch_metadata_using_pep_658( + self, + req: InstallRequirement, + ) -> Optional[BaseDistribution]: + """Fetch metadata from the data-dist-info-metadata attribute, if possible.""" + metadata_link = req.link.metadata_link() + if metadata_link is None: return None + metadata_file = get_http_url( + metadata_link, + self._download, + hashes=metadata_link.as_hashes(), + ) + assert req.req is not None + return get_metadata_distribution( + metadata_file.path, + req.link.filename, + req.req.name, + ) + + def _fetch_metadata_using_lazy_wheel( + self, + link: Link, + ) -> Optional[BaseDistribution]: + """Fetch metadata using lazy wheel, if possible.""" if link.is_file or not link.is_wheel: logger.debug( "Lazy wheel is not used as %r does not points to a remote wheel", @@ -414,13 +447,12 @@ def prepare_linked_requirement( ) -> BaseDistribution: """Prepare a requirement to be obtained from req.link.""" assert req.link - link = req.link self._log_preparing_link(req) with indent_log(): # Check if the relevant file is already available # in the download directory file_path = None - if self.download_dir is not None and link.is_wheel: + if self.download_dir is not None and req.link.is_wheel: hashes = self._get_linked_req_hashes(req) file_path = _check_download_dir(req.link, self.download_dir, hashes) @@ -429,10 +461,10 @@ def prepare_linked_requirement( self._downloaded[req.link.url] = file_path else: # The file is not available, attempt to fetch only metadata - wheel_dist = self._fetch_metadata_using_lazy_wheel(link) - if wheel_dist is not None: + metadata_dist = self._fetch_metadata_only(req) + if metadata_dist is not None: req.needs_more_preparation = True - return wheel_dist + return metadata_dist # None of the optimizations worked, fully prepare the requirement return self._prepare_linked_requirement(req, parallel_builds) diff --git a/tests/functional/test_download.py b/tests/functional/test_download.py index 89318b74553..629c15a040f 100644 --- a/tests/functional/test_download.py +++ b/tests/functional/test_download.py @@ -1,17 +1,24 @@ import os +import re import shutil import textwrap +import uuid +from dataclasses import dataclass +from enum import Enum from hashlib import sha256 from pathlib import Path -from typing import List +from textwrap import dedent +from typing import Callable, Dict, List, Tuple import pytest from pip._internal.cli.status_codes import ERROR +from pip._internal.utils.urls import path_to_url from tests.conftest import MockServer, ScriptFactory from tests.lib import ( PipTestEnvironment, TestData, + TestPipResult, create_basic_sdist_for_package, create_really_basic_wheel, ) @@ -1230,3 +1237,304 @@ def test_download_use_pep517_propagation( downloads = os.listdir(download_dir) assert len(downloads) == 2 + + +class MetadataKind(Enum): + """All the types of values we might be provided for the data-dist-info-metadata + attribute from PEP 658.""" + + # Valid: will read metadata from the dist instead. + No = "none" + # Valid: will read the .metadata file, but won't check its hash. + Unhashed = "unhashed" + # Valid: will read the .metadata file and check its hash matches. + Sha256 = "sha256" + # Invalid: will error out after checking the hash. + WrongHash = "wrong-hash" + # Invalid: will error out after failing to fetch the .metadata file. + NoFile = "no-file" + + +@dataclass(frozen=True) +class Package: + """Mock package structure used to generate a PyPI repository. + + Package name and version should correspond to sdists (.tar.gz files) in our test + data.""" + + name: str + version: str + metadata: MetadataKind + # This will override any dependencies specified in the actual dist's METADATA. + requires_dist: Tuple[str, ...] = () + + def filename(self) -> str: + """We will only be pointed at .tar.gz packages, so we can guess the name.""" + return f"{self.name}-{self.version}.tar.gz" + + def metadata_filename(self) -> str: + """This is specified by PEP 658.""" + return f"{self.filename()}.metadata" + + def generate_additional_tag(self) -> str: + """This gets injected into the tag in the generated PyPI index page for this + package.""" + if self.metadata == MetadataKind.No: + return "" + if self.metadata in [MetadataKind.Unhashed, MetadataKind.NoFile]: + return 'data-dist-info-metadata="true"' + if self.metadata == MetadataKind.WrongHash: + return 'data-dist-info-metadata="sha256=WRONG-HASH"' + assert self.metadata == MetadataKind.Sha256 + checksum = sha256(self.generate_metadata()).hexdigest() + return f'data-dist-info-metadata="sha256={checksum}"' + + def generate_metadata(self) -> bytes: + """This is written to `self.metadata_filename()` and will override the actual + dist's METADATA, unless `self.metadata == MetadataKind.NoFile`.""" + if self.requires_dist: + joined = " and ".join(self.requires_dist) + requires_str = f"Requires-Dist: {joined}" + else: + requires_str = "" + return dedent( + f"""\ + Metadata-Version: 2.1 + Name: {self.name} + Version: {self.version} + {requires_str} + """ + ).encode("utf-8") + + +@pytest.fixture(scope="function") +def index_html_content(tmpdir: Path) -> Callable[..., Path]: + """Generate a PyPI package index.html within a temporary local directory.""" + html_dir = tmpdir / "index_html_content" + html_dir.mkdir() + + def generate_index_html_subdir(index_html: str) -> Path: + """Create a new subdirectory after a UUID and write an index.html.""" + new_subdir = html_dir / uuid.uuid4().hex + new_subdir.mkdir() + + with open(new_subdir / "index.html", "w") as f: + f.write(index_html) + + return new_subdir + + return generate_index_html_subdir + + +@pytest.fixture(scope="function") +def index_for_packages( + shared_data: TestData, + index_html_content: Callable[..., Path], +) -> Callable[..., Path]: + """Generate a PyPI package index within a local directory pointing to blank data.""" + + def generate_index_for_packages(packages: Dict[str, List[Package]]) -> Path: + """ + Produce a PyPI directory structure pointing to the specified packages. + """ + # (1) Generate the content for a PyPI index.html. + pkg_links = "\n".join( + f' {pkg}' for pkg in packages.keys() + ) + index_html = f"""\ + + + + + Simple index + + +{pkg_links} + +""" + # (2) Generate the index.html in a new subdirectory of the temp directory. + index_html_subdir = index_html_content(index_html) + + # (3) Generate subdirectories for individual packages, each with their own + # index.html. + for pkg, links in packages.items(): + pkg_subdir = index_html_subdir / pkg + pkg_subdir.mkdir() + + download_links: List[str] = [] + for package_link in links: + # (3.1) Generate the tag which pip can crawl pointing to this + # specific package version. + download_links.append( + f' {package_link.filename()}
' # noqa: E501 + ) + # (3.2) Copy over the corresponding file in `shared_data.packages`. + shutil.copy( + shared_data.packages / package_link.filename(), + pkg_subdir / package_link.filename(), + ) + # (3.3) Write a metadata file, if applicable. + if package_link.metadata != MetadataKind.NoFile: + with open(pkg_subdir / package_link.metadata_filename(), "wb") as f: + f.write(package_link.generate_metadata()) + + # (3.4) After collating all the download links and copying over the files, + # write an index.html with the generated download links for each + # copied file for this specific package name. + download_links_str = "\n".join(download_links) + pkg_index_content = f"""\ + + + + + Links for {pkg} + + +

Links for {pkg}

+{download_links_str} + +""" + with open(pkg_subdir / "index.html", "w") as f: + f.write(pkg_index_content) + + return index_html_subdir + + return generate_index_for_packages + + +@pytest.fixture(scope="function") +def download_generated_index( + script: PipTestEnvironment, + index_for_packages: Callable[..., Path], + tmpdir: Path, +) -> Callable[..., Tuple[TestPipResult, Path]]: + """Execute `pip download` against a generated PyPI index.""" + download_dir = tmpdir / "download_dir" + + def run_for_generated_index( + packages: Dict[str, List[Package]], + args: List[str], + fast_deps: bool = False, + allow_error: bool = False, + ) -> Tuple[TestPipResult, Path]: + """ + Produce a PyPI directory structure pointing to the specified packages, then + execute `pip download -i ...` pointing to our generated index. + """ + index_dir = index_for_packages(packages) + pip_args = [] + if fast_deps: + pip_args.append("--use-feature=fast-deps") + pip_args.extend( + [ + "download", + "-d", + str(download_dir), + "-i", + path_to_url(str(index_dir)), + ] + ) + pip_args.extend(args) + result = script.pip( + *pip_args, + # We need allow_stderr_warning=True if fast_deps=True, since that will print + # a warning to stderr. + allow_stderr_warning=fast_deps, + allow_error=allow_error, + ) + return (result, download_dir) + + return run_for_generated_index + + +# The package database we generate for testing PEP 658 support. +_simple_packages: Dict[str, List[Package]] = { + "simple": [ + Package("simple", "1.0", MetadataKind.Sha256), + Package("simple", "2.0", MetadataKind.No), + # This will raise a hashing error. + Package("simple", "3.0", MetadataKind.WrongHash), + ], + "simple2": [ + # Override the dependencies here in order to force pip to download + # simple-1.0.tar.gz as well. + Package("simple2", "1.0", MetadataKind.Unhashed, ("simple==1.0",)), + # This will raise an error when pip attempts to fetch the metadata file. + Package("simple2", "2.0", MetadataKind.NoFile), + ], +} + + +@pytest.mark.parametrize( + "requirement_to_download, expected_outputs", + [ + ("simple2==1.0", ["simple-1.0.tar.gz", "simple2-1.0.tar.gz"]), + ("simple==2.0", ["simple-2.0.tar.gz"]), + ], +) +def test_download_metadata( + download_generated_index: Callable[..., Tuple[TestPipResult, Path]], + requirement_to_download: str, + expected_outputs: List[str], +) -> None: + """Verify that when using --use-feature=fast-deps, if a data-dist-info-metadata + attribute is present, then it is used instead of the actual dist's METADATA.""" + _, download_dir = download_generated_index( + _simple_packages, + [requirement_to_download], + fast_deps=True, + ) + assert sorted(os.listdir(download_dir)) == expected_outputs + + +@pytest.mark.parametrize( + "requirement_to_download, real_hash", + [ + ( + "simple==3.0", + "95e0f200b6302989bcf2cead9465cf229168295ea330ca30d1ffeab5c0fed996", + ) + ], +) +def test_incorrect_metadata_hash( + download_generated_index: Callable[..., Tuple[TestPipResult, Path]], + requirement_to_download: str, + real_hash: str, +) -> None: + """Verify that if a hash for data-dist-info-metadata is provided, it must match the + actual hash of the metadata file.""" + result, _ = download_generated_index( + _simple_packages, + [requirement_to_download], + fast_deps=True, + allow_error=True, + ) + assert result.returncode != 0 + expected_msg = f"""\ + Expected sha256 WRONG-HASH + Got {real_hash}""" + assert expected_msg in result.stderr + + +@pytest.mark.parametrize( + "requirement_to_download", + ["simple2==2.0"], +) +def test_metadata_not_found( + download_generated_index: Callable[..., Tuple[TestPipResult, Path]], + requirement_to_download: str, +) -> None: + """Verify that if a data-dist-info-metadata attribute is provided, that pip will + fetch the .metadata file at the location specified by PEP 658, and error + if unavailable.""" + result, _ = download_generated_index( + _simple_packages, + [requirement_to_download], + fast_deps=True, + allow_error=True, + ) + assert result.returncode != 0 + pattern = re.compile( + r"ERROR: 404 Client Error: FileNotFoundError for url:.*simple2-2\.0\.tar\.gz\.metadata" # noqa: E501 + ) + assert pattern.search(result.stderr), (pattern, result.stderr) diff --git a/tests/lib/server.py b/tests/lib/server.py index 4b5add345d3..4cc18452cb5 100644 --- a/tests/lib/server.py +++ b/tests/lib/server.py @@ -150,14 +150,6 @@ def html5_page(text: str) -> str: ) -def index_page(spec: Dict[str, str]) -> "WSGIApplication": - def link(name: str, value: str) -> str: - return '{}'.format(value, name) - - links = "".join(link(*kv) for kv in spec.items()) - return text_html_response(html5_page(links)) - - def package_page(spec: Dict[str, str]) -> "WSGIApplication": def link(name: str, value: str) -> str: return '{}'.format(value, name) diff --git a/tests/unit/test_collector.py b/tests/unit/test_collector.py index 3afc5210dc7..bbc78a70317 100644 --- a/tests/unit/test_collector.py +++ b/tests/unit/test_collector.py @@ -11,13 +11,12 @@ import pytest from pip._vendor import requests +from pip._vendor.packaging.requirements import Requirement from pip._internal.exceptions import NetworkConnectionError from pip._internal.index.collector import ( IndexContent, LinkCollector, - _clean_link, - _clean_url_path, _get_index_content, _get_simple_response, _make_index_content, @@ -28,7 +27,12 @@ from pip._internal.index.sources import _FlatDirectorySource, _IndexDirectorySource from pip._internal.models.candidate import InstallationCandidate from pip._internal.models.index import PyPI -from pip._internal.models.link import Link +from pip._internal.models.link import ( + Link, + LinkHash, + _clean_url_path, + _ensure_quoted_url, +) from pip._internal.network.session import PipSession from tests.lib import TestData, make_test_link_collector @@ -402,13 +406,13 @@ def test_clean_url_path_with_local_path(path: str, expected: str) -> None: ), ], ) -def test_clean_link(url: str, clean_url: str) -> None: - assert _clean_link(url) == clean_url +def test_ensure_quoted_url(url: str, clean_url: str) -> None: + assert _ensure_quoted_url(url) == clean_url def _test_parse_links_data_attribute( anchor_html: str, attr: str, expected: Optional[str] -) -> None: +) -> Link: html = ( "" '' @@ -427,6 +431,7 @@ def _test_parse_links_data_attribute( (link,) = links actual = getattr(link, attr) assert actual == expected + return link @pytest.mark.parametrize( @@ -534,6 +539,48 @@ def test_parse_links__yanked_reason(anchor_html: str, expected: Optional[str]) - _test_parse_links_data_attribute(anchor_html, "yanked_reason", expected) +# Requirement objects do not == each other unless they point to the same instance! +_pkg1_requirement = Requirement("pkg1==1.0") + + +@pytest.mark.parametrize( + "anchor_html, expected, link_hash", + [ + # Test not present. + ( + '', + None, + None, + ), + # Test with value "true". + ( + '', + "true", + None, + ), + # Test with a provided hash value. + ( + '', # noqa: E501 + "sha256=aa113592bbe", + None, + ), + # Test with a provided hash value for both the requirement as well as metadata. + ( + '', # noqa: E501 + "sha256=aa113592bbe", + LinkHash("sha512", "abc132409cb"), + ), + ], +) +def test_parse_links__dist_info_metadata( + anchor_html: str, + expected: Optional[str], + link_hash: Optional[LinkHash], +) -> None: + link = _test_parse_links_data_attribute(anchor_html, "dist_info_metadata", expected) + assert link.link_hash == link_hash + + def test_parse_links_caches_same_page_by_url() -> None: html = ( "" @@ -963,3 +1010,23 @@ def expand_path(path: str) -> str: expected_temp2_dir = os.path.normcase(temp2_dir) assert search_scope.find_links == ["~/temp1", expected_temp2_dir] assert search_scope.index_urls == ["default_url"] + + +@pytest.mark.parametrize( + "url, result", + [ + ( + "https://pypi.org/pip-18.0.tar.gz#sha256=aa113592bbe", + LinkHash("sha256", "aa113592bbe"), + ), + ( + "https://pypi.org/pip-18.0.tar.gz#md5=aa113592bbe", + LinkHash("md5", "aa113592bbe"), + ), + ("https://pypi.org/pip-18.0.tar.gz", None), + # We don't recognize the "sha500" algorithm, so we discard it. + ("https://pypi.org/pip-18.0.tar.gz#sha500=aa113592bbe", None), + ], +) +def test_link_hash_parsing(url: str, result: Optional[LinkHash]) -> None: + assert LinkHash.split_hash_name_and_value(url) == result From 28b6134dc677a35840950e2ae01127c9de08bd02 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Wed, 11 May 2022 19:31:12 -0400 Subject: [PATCH 02/18] add NEWS entry --- news/11111.feature.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 news/11111.feature.rst diff --git a/news/11111.feature.rst b/news/11111.feature.rst new file mode 100644 index 00000000000..41fa2398f4e --- /dev/null +++ b/news/11111.feature.rst @@ -0,0 +1 @@ +Use the ``data-dist-info-metadata`` attribute from :pep:`658` to resolve distribution metadata when ``--use-feature=fast-deps`` is enabled. From f675e0995137c8c6295fc3a5408a3fa15d0447a8 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Wed, 11 May 2022 20:42:36 -0400 Subject: [PATCH 03/18] explain why we don't validate hexdigests now --- src/pip/_internal/models/link.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/pip/_internal/models/link.py b/src/pip/_internal/models/link.py index 8fc42966816..970bcf49b44 100644 --- a/src/pip/_internal/models/link.py +++ b/src/pip/_internal/models/link.py @@ -58,6 +58,11 @@ class LinkHash: value: str _hash_re = re.compile( + # NB: we do not validate that the second group (.*) is a valid hex + # digest. Instead, we simply keep that string in this class, and then check it + # against Hashes when hash-checking is needed. This is easier to debug than + # proactively discarding an invalid hex digest, as we handle incorrect hashes + # and malformed hashes in the same place. r"({choices})=(.*)".format( choices="|".join(re.escape(hash_name) for hash_name in _SUPPORTED_HASHES) ), From 6675ba3294ad771fcce83f40ddf9e73a7227616b Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Fri, 13 May 2022 01:13:41 -0400 Subject: [PATCH 04/18] fix importlib.metadata case --- src/pip/_internal/operations/prepare.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/pip/_internal/operations/prepare.py b/src/pip/_internal/operations/prepare.py index 7722370bcea..ed1c4664117 100644 --- a/src/pip/_internal/operations/prepare.py +++ b/src/pip/_internal/operations/prepare.py @@ -377,9 +377,15 @@ def _fetch_metadata_using_pep_658( self._download, hashes=metadata_link.as_hashes(), ) + # The file will be downloaded under the same name as it's listed in the index, + # which will end with .metadata. To make importlib.metadata.PathDistribution + # work, we need to keep it in the same directory, but rename it to METADATA. + containing_dir = os.path.dirname(metadata_file.path) + new_metadata_path = os.path.join(containing_dir, "METADATA") + os.rename(metadata_file.path, new_metadata_path) assert req.req is not None return get_metadata_distribution( - metadata_file.path, + new_metadata_path, req.link.filename, req.req.name, ) From 80e044a4d4c6d432c048431c753e8d3b25eb6a63 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Sun, 15 May 2022 12:28:15 -0400 Subject: [PATCH 05/18] make it work without --use-feature=fast-deps! --- news/11111.feature.rst | 2 +- src/pip/_internal/operations/prepare.py | 22 +++++++++------ tests/functional/test_download.py | 37 ++++++++----------------- 3 files changed, 25 insertions(+), 36 deletions(-) diff --git a/news/11111.feature.rst b/news/11111.feature.rst index 41fa2398f4e..39cb4b35c12 100644 --- a/news/11111.feature.rst +++ b/news/11111.feature.rst @@ -1 +1 @@ -Use the ``data-dist-info-metadata`` attribute from :pep:`658` to resolve distribution metadata when ``--use-feature=fast-deps`` is enabled. +Use the ``data-dist-info-metadata`` attribute from :pep:`658` to resolve distribution metadata without downloading the dist yet. diff --git a/src/pip/_internal/operations/prepare.py b/src/pip/_internal/operations/prepare.py index ed1c4664117..99aa6ff4596 100644 --- a/src/pip/_internal/operations/prepare.py +++ b/src/pip/_internal/operations/prepare.py @@ -350,19 +350,15 @@ def _fetch_metadata_only( self, req: InstallRequirement, ) -> Optional[BaseDistribution]: - # --use-feature=fast-deps must be provided. - if not self.use_lazy_wheel: - return None if self.require_hashes: logger.debug( "Metadata-only fetching is not used as hash checking is required", ) return None # Try PEP 658 metadata first, then fall back to lazy wheel if unavailable. - pep_658_dist = self._fetch_metadata_using_pep_658(req) - if pep_658_dist is not None: - return pep_658_dist - return self._fetch_metadata_using_lazy_wheel(req.link) + return self._fetch_metadata_using_pep_658( + req + ) or self._fetch_metadata_using_lazy_wheel(req.link) def _fetch_metadata_using_pep_658( self, @@ -372,6 +368,12 @@ def _fetch_metadata_using_pep_658( metadata_link = req.link.metadata_link() if metadata_link is None: return None + assert req.req is not None + logger.info( + "Obtaining dependency information for %s from %s", + req.req, + metadata_link, + ) metadata_file = get_http_url( metadata_link, self._download, @@ -383,7 +385,6 @@ def _fetch_metadata_using_pep_658( containing_dir = os.path.dirname(metadata_file.path) new_metadata_path = os.path.join(containing_dir, "METADATA") os.rename(metadata_file.path, new_metadata_path) - assert req.req is not None return get_metadata_distribution( new_metadata_path, req.link.filename, @@ -395,9 +396,12 @@ def _fetch_metadata_using_lazy_wheel( link: Link, ) -> Optional[BaseDistribution]: """Fetch metadata using lazy wheel, if possible.""" + # --use-feature=fast-deps must be provided. + if not self.use_lazy_wheel: + return None if link.is_file or not link.is_wheel: logger.debug( - "Lazy wheel is not used as %r does not points to a remote wheel", + "Lazy wheel is not used as %r does not point to a remote wheel", link, ) return None diff --git a/tests/functional/test_download.py b/tests/functional/test_download.py index 629c15a040f..6785fc73858 100644 --- a/tests/functional/test_download.py +++ b/tests/functional/test_download.py @@ -1414,7 +1414,6 @@ def download_generated_index( def run_for_generated_index( packages: Dict[str, List[Package]], args: List[str], - fast_deps: bool = False, allow_error: bool = False, ) -> Tuple[TestPipResult, Path]: """ @@ -1422,26 +1421,15 @@ def run_for_generated_index( execute `pip download -i ...` pointing to our generated index. """ index_dir = index_for_packages(packages) - pip_args = [] - if fast_deps: - pip_args.append("--use-feature=fast-deps") - pip_args.extend( - [ - "download", - "-d", - str(download_dir), - "-i", - path_to_url(str(index_dir)), - ] - ) - pip_args.extend(args) - result = script.pip( - *pip_args, - # We need allow_stderr_warning=True if fast_deps=True, since that will print - # a warning to stderr. - allow_stderr_warning=fast_deps, - allow_error=allow_error, - ) + pip_args = [ + "download", + "-d", + str(download_dir), + "-i", + path_to_url(str(index_dir)), + *args, + ] + result = script.pip(*pip_args, allow_error=allow_error) return (result, download_dir) return run_for_generated_index @@ -1477,12 +1465,11 @@ def test_download_metadata( requirement_to_download: str, expected_outputs: List[str], ) -> None: - """Verify that when using --use-feature=fast-deps, if a data-dist-info-metadata - attribute is present, then it is used instead of the actual dist's METADATA.""" + """Verify that if a data-dist-info-metadata attribute is present, then it is used + instead of the actual dist's METADATA.""" _, download_dir = download_generated_index( _simple_packages, [requirement_to_download], - fast_deps=True, ) assert sorted(os.listdir(download_dir)) == expected_outputs @@ -1506,7 +1493,6 @@ def test_incorrect_metadata_hash( result, _ = download_generated_index( _simple_packages, [requirement_to_download], - fast_deps=True, allow_error=True, ) assert result.returncode != 0 @@ -1530,7 +1516,6 @@ def test_metadata_not_found( result, _ = download_generated_index( _simple_packages, [requirement_to_download], - fast_deps=True, allow_error=True, ) assert result.returncode != 0 From 266c5cddc23c00235ed8e1860ea17678264cf9b3 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Mon, 5 Sep 2022 09:36:47 -0400 Subject: [PATCH 06/18] respond to review comments --- src/pip/_internal/operations/prepare.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pip/_internal/operations/prepare.py b/src/pip/_internal/operations/prepare.py index 99aa6ff4596..c71a4f5b325 100644 --- a/src/pip/_internal/operations/prepare.py +++ b/src/pip/_internal/operations/prepare.py @@ -356,11 +356,11 @@ def _fetch_metadata_only( ) return None # Try PEP 658 metadata first, then fall back to lazy wheel if unavailable. - return self._fetch_metadata_using_pep_658( + return self._fetch_metadata_using_link_data_attr( req ) or self._fetch_metadata_using_lazy_wheel(req.link) - def _fetch_metadata_using_pep_658( + def _fetch_metadata_using_link_data_attr( self, req: InstallRequirement, ) -> Optional[BaseDistribution]: From 8da1bdc91e97d9363956777c75427e9e07f9d421 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Mon, 5 Sep 2022 10:04:37 -0400 Subject: [PATCH 07/18] ensure PEP 691 json parsing also supports PEP 658 dist-info-metadata --- src/pip/_internal/models/link.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/pip/_internal/models/link.py b/src/pip/_internal/models/link.py index 970bcf49b44..e0f436fa345 100644 --- a/src/pip/_internal/models/link.py +++ b/src/pip/_internal/models/link.py @@ -246,7 +246,12 @@ def from_json( if file_url is None: return None + url = _ensure_quoted_url(urllib.parse.urljoin(page_url, file_url)) + pyrequire = file_data.get("requires-python") yanked_reason = file_data.get("yanked") + dist_info_metadata = file_data.get("dist-info-metadata") + hashes = file_data.get("hashes", {}) + # The Link.yanked_reason expects an empty string instead of a boolean. if yanked_reason and not isinstance(yanked_reason, str): yanked_reason = "" @@ -255,11 +260,12 @@ def from_json( yanked_reason = None return cls( - _ensure_quoted_url(urllib.parse.urljoin(page_url, file_url)), + url, comes_from=page_url, - requires_python=file_data.get("requires-python"), + requires_python=pyrequire, yanked_reason=yanked_reason, - hashes=file_data.get("hashes", {}), + hashes=hashes, + dist_info_metadata=dist_info_metadata, ) @classmethod From 23e54924f760a2dfaa1b377b3d8f08dd16372b0f Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Mon, 5 Sep 2022 10:32:50 -0400 Subject: [PATCH 08/18] add test case for dist-info-metadata key from PEP 691!! --- tests/unit/test_collector.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/tests/unit/test_collector.py b/tests/unit/test_collector.py index bbc78a70317..81d8765c837 100644 --- a/tests/unit/test_collector.py +++ b/tests/unit/test_collector.py @@ -479,6 +479,14 @@ def test_parse_links_json() -> None: "requires-python": ">=3.7", "dist-info-metadata": False, }, + # Same as above, but parsing dist-info-metadata. + { + "filename": "holygrail-1.0-py3-none-any.whl", + "url": "/files/holygrail-1.0-py3-none-any.whl", + "hashes": {"sha256": "sha256 hash", "blake2b": "blake2b hash"}, + "requires-python": ">=3.7", + "dist-info-metadata": "sha512=aabdd41", + }, ], } ).encode("utf8") @@ -507,8 +515,25 @@ def test_parse_links_json() -> None: yanked_reason=None, hashes={"sha256": "sha256 hash", "blake2b": "blake2b hash"}, ), + Link( + "https://example.com/files/holygrail-1.0-py3-none-any.whl", + comes_from=page.url, + requires_python=">=3.7", + yanked_reason=None, + hashes={"sha256": "sha256 hash", "blake2b": "blake2b hash"}, + dist_info_metadata="sha512=aabdd41", + ), ] + # Ensure the metadata info can be parsed into the correct link. + metadata_link = links[2].metadata_link() + assert metadata_link is not None + assert ( + metadata_link.url + == "https://example.com/files/holygrail-1.0-py3-none-any.whl.metadata" + ) + assert metadata_link.link_hash == LinkHash("sha512", "aabdd41") + @pytest.mark.parametrize( "anchor_html, expected", From a685a984d2faae9a3ffbd4a68db45e9cee84123e Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Mon, 5 Sep 2022 11:53:56 -0400 Subject: [PATCH 09/18] remove unused code after html5lib was removed!! --- src/pip/_internal/index/collector.py | 23 +---------------------- src/pip/_internal/models/link.py | 3 --- 2 files changed, 1 insertion(+), 25 deletions(-) diff --git a/src/pip/_internal/index/collector.py b/src/pip/_internal/index/collector.py index 9ae213197dd..0120610c758 100644 --- a/src/pip/_internal/index/collector.py +++ b/src/pip/_internal/index/collector.py @@ -32,7 +32,7 @@ from pip._vendor.requests.exceptions import RetryError, SSLError from pip._internal.exceptions import NetworkConnectionError -from pip._internal.models.link import HTMLElement, Link +from pip._internal.models.link import Link from pip._internal.models.search_scope import SearchScope from pip._internal.network.session import PipSession from pip._internal.network.utils import raise_for_status @@ -188,27 +188,6 @@ def _get_encoding_from_headers(headers: ResponseHeaders) -> Optional[str]: return None -def _determine_base_url(document: HTMLElement, page_url: str) -> str: - """Determine the HTML document's base URL. - - This looks for a ```` tag in the HTML document. If present, its href - attribute denotes the base URL of anchor tags in the document. If there is - no such tag (or if it does not have a valid href attribute), the HTML - file's URL is used as the base URL. - - :param document: An HTML document representation. The current - implementation expects the result of ``html5lib.parse()``. - :param page_url: The URL of the HTML document. - - TODO: Remove when `html5lib` is dropped. - """ - for base in document.findall(".//base"): - href = base.get("href") - if href is not None: - return href - return page_url - - class CacheablePageContent: def __init__(self, page: "IndexContent") -> None: assert page.cache_link_parsing diff --git a/src/pip/_internal/models/link.py b/src/pip/_internal/models/link.py index e0f436fa345..c792d128bcf 100644 --- a/src/pip/_internal/models/link.py +++ b/src/pip/_internal/models/link.py @@ -5,7 +5,6 @@ import posixpath import re import urllib.parse -import xml.etree.ElementTree from dataclasses import dataclass from typing import ( TYPE_CHECKING, @@ -36,8 +35,6 @@ logger = logging.getLogger(__name__) -HTMLElement = xml.etree.ElementTree.Element - # Order matters, earlier hashes have a precedence over later hashes for what # we will pick to use. _SUPPORTED_HASHES = ("sha512", "sha384", "sha256", "sha224", "sha1", "md5") From 96bd60e5918a2d86b55387a9267a78b6c98b0b4e Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Mon, 5 Sep 2022 12:28:50 -0400 Subject: [PATCH 10/18] rename WheelMetadata to InMemoryMetadata as per review comments --- src/pip/_internal/metadata/pkg_resources.py | 6 +++--- tests/unit/metadata/test_metadata_pkg_resources.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/pip/_internal/metadata/pkg_resources.py b/src/pip/_internal/metadata/pkg_resources.py index 29462223eb0..46e8b245d40 100644 --- a/src/pip/_internal/metadata/pkg_resources.py +++ b/src/pip/_internal/metadata/pkg_resources.py @@ -33,7 +33,7 @@ class EntryPoint(NamedTuple): group: str -class WheelMetadata: +class InMemoryMetadata: """IMetadataProvider that reads metadata files from a dictionary. This also maps metadata decoding exceptions to our internal exception type. @@ -106,7 +106,7 @@ def from_metadata_file( } dist = pkg_resources.DistInfoDistribution( location=filename, - metadata=WheelMetadata(metadata_text, filename), + metadata=InMemoryMetadata(metadata_text, filename), project_name=project_name, ) return cls(dist) @@ -127,7 +127,7 @@ def from_wheel(cls, wheel: Wheel, name: str) -> BaseDistribution: raise UnsupportedWheel(f"{name} has an invalid wheel, {e}") dist = pkg_resources.DistInfoDistribution( location=wheel.location, - metadata=WheelMetadata(metadata_text, wheel.location), + metadata=InMemoryMetadata(metadata_text, wheel.location), project_name=name, ) return cls(dist) diff --git a/tests/unit/metadata/test_metadata_pkg_resources.py b/tests/unit/metadata/test_metadata_pkg_resources.py index 6bb67156c9f..ab1a56107f4 100644 --- a/tests/unit/metadata/test_metadata_pkg_resources.py +++ b/tests/unit/metadata/test_metadata_pkg_resources.py @@ -11,7 +11,7 @@ from pip._internal.metadata.pkg_resources import ( Distribution, Environment, - WheelMetadata, + InMemoryMetadata, ) pkg_resources = pytest.importorskip("pip._vendor.pkg_resources") @@ -99,7 +99,7 @@ def test_wheel_metadata_works() -> None: dist = Distribution( pkg_resources.DistInfoDistribution( location="", - metadata=WheelMetadata({"METADATA": metadata.as_bytes()}, ""), + metadata=InMemoryMetadata({"METADATA": metadata.as_bytes()}, ""), project_name=name, ), ) @@ -116,7 +116,7 @@ def test_wheel_metadata_works() -> None: def test_wheel_metadata_throws_on_bad_unicode() -> None: - metadata = WheelMetadata({"METADATA": b"\xff"}, "") + metadata = InMemoryMetadata({"METADATA": b"\xff"}, "") with pytest.raises(UnsupportedWheel) as e: metadata.get_metadata("METADATA") From 2aa1c2f0e02889fb6e6c92616501233464070164 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Mon, 5 Sep 2022 12:53:55 -0400 Subject: [PATCH 11/18] rename from_metadata_file{,_contents}() to avoid ambiguity pointed out by reviewer --- src/pip/_internal/metadata/__init__.py | 15 ++++++++------- src/pip/_internal/metadata/base.py | 9 ++++++--- src/pip/_internal/metadata/importlib/_dists.py | 18 +++++++++++++----- src/pip/_internal/metadata/pkg_resources.py | 12 +++++------- src/pip/_internal/operations/prepare.py | 12 +++++------- 5 files changed, 37 insertions(+), 29 deletions(-) diff --git a/src/pip/_internal/metadata/__init__.py b/src/pip/_internal/metadata/__init__.py index c41070cbab6..9f73ca7105f 100644 --- a/src/pip/_internal/metadata/__init__.py +++ b/src/pip/_internal/metadata/__init__.py @@ -106,21 +106,22 @@ def get_wheel_distribution(wheel: Wheel, canonical_name: str) -> BaseDistributio def get_metadata_distribution( - metadata_path: str, + metadata_contents: bytes, filename: str, canonical_name: str, ) -> BaseDistribution: - """Get the representation of the specified METADATA file. + """Get the dist representation of the specified METADATA file contents. - This returns a Distribution instance from the chosen backend based on the contents - of the file at ``metadata_path``. + This returns a Distribution instance from the chosen backend sourced from the data + in `metadata_contents`. - :param metadata_path: Path to the METADATA file. + :param metadata_contents: Contents of a METADATA file within a dist, or one served + via PEP 658. :param filename: Filename for the dist this metadata represents. :param canonical_name: Normalized project name of the given dist. """ - return select_backend().Distribution.from_metadata_file( - metadata_path, + return select_backend().Distribution.from_metadata_file_contents( + metadata_contents, filename, canonical_name, ) diff --git a/src/pip/_internal/metadata/base.py b/src/pip/_internal/metadata/base.py index ef92b40ad59..cafb79fb3dc 100644 --- a/src/pip/_internal/metadata/base.py +++ b/src/pip/_internal/metadata/base.py @@ -114,15 +114,18 @@ def from_directory(cls, directory: str) -> "BaseDistribution": raise NotImplementedError() @classmethod - def from_metadata_file( + def from_metadata_file_contents( cls, - metadata_path: str, + metadata_contents: bytes, filename: str, project_name: str, ) -> "BaseDistribution": """Load the distribution from the contents of a METADATA file. - :param metadata: The path to a METADATA file. + This is used to implement PEP 658 by generating a "shallow" dist object that can + be used for resolution without downloading or building the actual dist yet. + + :param metadata_contents: The contents of a METADATA file. :param filename: File name for the dist with this metadata. :param project_name: Name of the project this dist represents. """ diff --git a/src/pip/_internal/metadata/importlib/_dists.py b/src/pip/_internal/metadata/importlib/_dists.py index 5d5ef7860ad..ae821802a6a 100644 --- a/src/pip/_internal/metadata/importlib/_dists.py +++ b/src/pip/_internal/metadata/importlib/_dists.py @@ -28,6 +28,7 @@ ) from pip._internal.utils.misc import normalize_path from pip._internal.utils.packaging import safe_extra +from pip._internal.utils.temp_dir import TempDirectory from pip._internal.utils.wheel import parse_wheel, read_wheel_metadata_file from ._compat import BasePath, get_dist_name @@ -110,15 +111,22 @@ def from_directory(cls, directory: str) -> BaseDistribution: return cls(dist, info_location, info_location.parent) @classmethod - def from_metadata_file( + def from_metadata_file_contents( cls, - metadata_path: str, + metadata_contents: bytes, filename: str, project_name: str, ) -> BaseDistribution: - metadata_location = pathlib.Path(metadata_path) - dist = importlib.metadata.Distribution.at(metadata_location.parent) - return cls(dist, metadata_location.parent, None) + # Generate temp dir to contain the metadata file, and write the file contents. + temp_dir = pathlib.Path( + TempDirectory(kind="metadata", globally_managed=True).path + ) + metadata_path = temp_dir / "METADATA" + with open(metadata_path, "wb") as f: + f.write(metadata_contents) + # Construct dist pointing to the newly created directory. + dist = importlib.metadata.Distribution.at(metadata_path.parent) + return cls(dist, metadata_path.parent, None) @classmethod def from_wheel(cls, wheel: Wheel, name: str) -> BaseDistribution: diff --git a/src/pip/_internal/metadata/pkg_resources.py b/src/pip/_internal/metadata/pkg_resources.py index 46e8b245d40..b2f3d1382a1 100644 --- a/src/pip/_internal/metadata/pkg_resources.py +++ b/src/pip/_internal/metadata/pkg_resources.py @@ -93,20 +93,18 @@ def from_directory(cls, directory: str) -> BaseDistribution: return cls(dist) @classmethod - def from_metadata_file( + def from_metadata_file_contents( cls, - metadata_path: str, + metadata_contents: bytes, filename: str, project_name: str, ) -> BaseDistribution: - with open(metadata_path, "rb") as f: - metadata = f.read() - metadata_text = { - "METADATA": metadata, + metadata_dict = { + "METADATA": metadata_contents, } dist = pkg_resources.DistInfoDistribution( location=filename, - metadata=InMemoryMetadata(metadata_text, filename), + metadata=InMemoryMetadata(metadata_dict, filename), project_name=project_name, ) return cls(dist) diff --git a/src/pip/_internal/operations/prepare.py b/src/pip/_internal/operations/prepare.py index c71a4f5b325..2a61875b529 100644 --- a/src/pip/_internal/operations/prepare.py +++ b/src/pip/_internal/operations/prepare.py @@ -374,19 +374,17 @@ def _fetch_metadata_using_link_data_attr( req.req, metadata_link, ) + # Download the contents of the METADATA file, separate from the dist itself. metadata_file = get_http_url( metadata_link, self._download, hashes=metadata_link.as_hashes(), ) - # The file will be downloaded under the same name as it's listed in the index, - # which will end with .metadata. To make importlib.metadata.PathDistribution - # work, we need to keep it in the same directory, but rename it to METADATA. - containing_dir = os.path.dirname(metadata_file.path) - new_metadata_path = os.path.join(containing_dir, "METADATA") - os.rename(metadata_file.path, new_metadata_path) + with open(metadata_file.path, "rb") as f: + metadata_contents = f.read() + # Generate a dist just from those file contents. return get_metadata_distribution( - new_metadata_path, + metadata_contents, req.link.filename, req.req.name, ) From e5b2fcd1a066a780920859f48255bbb840f3fb33 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Mon, 5 Sep 2022 13:39:12 -0400 Subject: [PATCH 12/18] add tests for PEP 658 metadata with wheel files too --- tests/functional/test_download.py | 138 +++++++++++++++++++++--------- 1 file changed, 99 insertions(+), 39 deletions(-) diff --git a/tests/functional/test_download.py b/tests/functional/test_download.py index 6785fc73858..ede2213aa70 100644 --- a/tests/functional/test_download.py +++ b/tests/functional/test_download.py @@ -1264,17 +1264,14 @@ class Package: name: str version: str + filename: str metadata: MetadataKind # This will override any dependencies specified in the actual dist's METADATA. requires_dist: Tuple[str, ...] = () - def filename(self) -> str: - """We will only be pointed at .tar.gz packages, so we can guess the name.""" - return f"{self.name}-{self.version}.tar.gz" - def metadata_filename(self) -> str: """This is specified by PEP 658.""" - return f"{self.filename()}.metadata" + return f"{self.filename}.metadata" def generate_additional_tag(self) -> str: """This gets injected into the tag in the generated PyPI index page for this @@ -1289,26 +1286,27 @@ def generate_additional_tag(self) -> str: checksum = sha256(self.generate_metadata()).hexdigest() return f'data-dist-info-metadata="sha256={checksum}"' + def requires_str(self) -> str: + if not self.requires_dist: + return "" + joined = " and ".join(self.requires_dist) + return f"Requires-Dist: {joined}" + def generate_metadata(self) -> bytes: """This is written to `self.metadata_filename()` and will override the actual dist's METADATA, unless `self.metadata == MetadataKind.NoFile`.""" - if self.requires_dist: - joined = " and ".join(self.requires_dist) - requires_str = f"Requires-Dist: {joined}" - else: - requires_str = "" return dedent( f"""\ Metadata-Version: 2.1 Name: {self.name} Version: {self.version} - {requires_str} + {self.requires_str()} """ ).encode("utf-8") @pytest.fixture(scope="function") -def index_html_content(tmpdir: Path) -> Callable[..., Path]: +def write_index_html_content(tmpdir: Path) -> Callable[[str], Path]: """Generate a PyPI package index.html within a temporary local directory.""" html_dir = tmpdir / "index_html_content" html_dir.mkdir() @@ -1327,13 +1325,14 @@ def generate_index_html_subdir(index_html: str) -> Path: @pytest.fixture(scope="function") -def index_for_packages( +def html_index_for_packages( shared_data: TestData, - index_html_content: Callable[..., Path], + write_index_html_content: Callable[[str], Path], ) -> Callable[..., Path]: - """Generate a PyPI package index within a local directory pointing to blank data.""" + """Generate a PyPI HTML package index within a local directory pointing to + blank data.""" - def generate_index_for_packages(packages: Dict[str, List[Package]]) -> Path: + def generate_html_index_for_packages(packages: Dict[str, List[Package]]) -> Path: """ Produce a PyPI directory structure pointing to the specified packages. """ @@ -1353,7 +1352,7 @@ def generate_index_for_packages(packages: Dict[str, List[Package]]) -> Path: """ # (2) Generate the index.html in a new subdirectory of the temp directory. - index_html_subdir = index_html_content(index_html) + index_html_subdir = write_index_html_content(index_html) # (3) Generate subdirectories for individual packages, each with their own # index.html. @@ -1366,12 +1365,12 @@ def generate_index_for_packages(packages: Dict[str, List[Package]]) -> Path: # (3.1) Generate the tag which pip can crawl pointing to this # specific package version. download_links.append( - f' {package_link.filename()}
' # noqa: E501 + f' {package_link.filename}
' # noqa: E501 ) # (3.2) Copy over the corresponding file in `shared_data.packages`. shutil.copy( - shared_data.packages / package_link.filename(), - pkg_subdir / package_link.filename(), + shared_data.packages / package_link.filename, + pkg_subdir / package_link.filename, ) # (3.3) Write a metadata file, if applicable. if package_link.metadata != MetadataKind.NoFile: @@ -1399,13 +1398,13 @@ def generate_index_for_packages(packages: Dict[str, List[Package]]) -> Path: return index_html_subdir - return generate_index_for_packages + return generate_html_index_for_packages @pytest.fixture(scope="function") -def download_generated_index( +def download_generated_html_index( script: PipTestEnvironment, - index_for_packages: Callable[..., Path], + html_index_for_packages: Callable[[Dict[str, List[Package]]], Path], tmpdir: Path, ) -> Callable[..., Tuple[TestPipResult, Path]]: """Execute `pip download` against a generated PyPI index.""" @@ -1420,7 +1419,7 @@ def run_for_generated_index( Produce a PyPI directory structure pointing to the specified packages, then execute `pip download -i ...` pointing to our generated index. """ - index_dir = index_for_packages(packages) + index_dir = html_index_for_packages(packages) pip_args = [ "download", "-d", @@ -1438,17 +1437,61 @@ def run_for_generated_index( # The package database we generate for testing PEP 658 support. _simple_packages: Dict[str, List[Package]] = { "simple": [ - Package("simple", "1.0", MetadataKind.Sha256), - Package("simple", "2.0", MetadataKind.No), + Package("simple", "1.0", "simple-1.0.tar.gz", MetadataKind.Sha256), + Package("simple", "2.0", "simple-2.0.tar.gz", MetadataKind.No), # This will raise a hashing error. - Package("simple", "3.0", MetadataKind.WrongHash), + Package("simple", "3.0", "simple-3.0.tar.gz", MetadataKind.WrongHash), ], "simple2": [ # Override the dependencies here in order to force pip to download # simple-1.0.tar.gz as well. - Package("simple2", "1.0", MetadataKind.Unhashed, ("simple==1.0",)), + Package( + "simple2", + "1.0", + "simple2-1.0.tar.gz", + MetadataKind.Unhashed, + ("simple==1.0",), + ), # This will raise an error when pip attempts to fetch the metadata file. - Package("simple2", "2.0", MetadataKind.NoFile), + Package("simple2", "2.0", "simple2-2.0.tar.gz", MetadataKind.NoFile), + ], + "colander": [ + # Ensure we can read the dependencies from a metadata file within a wheel + # *without* PEP 658 metadata. + Package( + "colander", "0.9.9", "colander-0.9.9-py2.py3-none-any.whl", MetadataKind.No + ), + ], + "compilewheel": [ + # Ensure we can override the dependencies of a wheel file by injecting PEP + # 658 metadata. + Package( + "compilewheel", + "1.0", + "compilewheel-1.0-py2.py3-none-any.whl", + MetadataKind.Unhashed, + ("simple==1.0",), + ), + ], + "has-script": [ + # Ensure we check PEP 658 metadata hashing errors for wheel files. + Package( + "has-script", + "1.0", + "has.script-1.0-py2.py3-none-any.whl", + MetadataKind.WrongHash, + ), + ], + "translationstring": [ + Package( + "translationstring", "1.1", "translationstring-1.1.tar.gz", MetadataKind.No + ), + ], + "priority": [ + # Ensure we check for a missing metadata file for wheels. + Package( + "priority", "1.0", "priority-1.0-py2.py3-none-any.whl", MetadataKind.NoFile + ), ], } @@ -1458,16 +1501,24 @@ def run_for_generated_index( [ ("simple2==1.0", ["simple-1.0.tar.gz", "simple2-1.0.tar.gz"]), ("simple==2.0", ["simple-2.0.tar.gz"]), + ( + "colander", + ["colander-0.9.9-py2.py3-none-any.whl", "translationstring-1.1.tar.gz"], + ), + ( + "compilewheel", + ["compilewheel-1.0-py2.py3-none-any.whl", "simple-1.0.tar.gz"], + ), ], ) def test_download_metadata( - download_generated_index: Callable[..., Tuple[TestPipResult, Path]], + download_generated_html_index: Callable[..., Tuple[TestPipResult, Path]], requirement_to_download: str, expected_outputs: List[str], ) -> None: """Verify that if a data-dist-info-metadata attribute is present, then it is used instead of the actual dist's METADATA.""" - _, download_dir = download_generated_index( + _, download_dir = download_generated_html_index( _simple_packages, [requirement_to_download], ) @@ -1480,17 +1531,21 @@ def test_download_metadata( ( "simple==3.0", "95e0f200b6302989bcf2cead9465cf229168295ea330ca30d1ffeab5c0fed996", - ) + ), + ( + "has-script", + "16ba92d7f6f992f6de5ecb7d58c914675cf21f57f8e674fb29dcb4f4c9507e5b", + ), ], ) def test_incorrect_metadata_hash( - download_generated_index: Callable[..., Tuple[TestPipResult, Path]], + download_generated_html_index: Callable[..., Tuple[TestPipResult, Path]], requirement_to_download: str, real_hash: str, ) -> None: """Verify that if a hash for data-dist-info-metadata is provided, it must match the actual hash of the metadata file.""" - result, _ = download_generated_index( + result, _ = download_generated_html_index( _simple_packages, [requirement_to_download], allow_error=True, @@ -1503,23 +1558,28 @@ def test_incorrect_metadata_hash( @pytest.mark.parametrize( - "requirement_to_download", - ["simple2==2.0"], + "requirement_to_download, expected_url", + [ + ("simple2==2.0", "simple2-2.0.tar.gz.metadata"), + ("priority", "priority-1.0-py2.py3-none-any.whl.metadata"), + ], ) def test_metadata_not_found( - download_generated_index: Callable[..., Tuple[TestPipResult, Path]], + download_generated_html_index: Callable[..., Tuple[TestPipResult, Path]], requirement_to_download: str, + expected_url: str, ) -> None: """Verify that if a data-dist-info-metadata attribute is provided, that pip will fetch the .metadata file at the location specified by PEP 658, and error if unavailable.""" - result, _ = download_generated_index( + result, _ = download_generated_html_index( _simple_packages, [requirement_to_download], allow_error=True, ) assert result.returncode != 0 + expected_re = re.escape(expected_url) pattern = re.compile( - r"ERROR: 404 Client Error: FileNotFoundError for url:.*simple2-2\.0\.tar\.gz\.metadata" # noqa: E501 + f"ERROR: 404 Client Error: FileNotFoundError for url:.*{expected_re}" ) assert pattern.search(result.stderr), (pattern, result.stderr) From 89f235d607f8e273800bacd61d8e968050d5312c Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Mon, 5 Sep 2022 13:53:34 -0400 Subject: [PATCH 13/18] add a note about further testing the json client --- tests/unit/test_collector.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/unit/test_collector.py b/tests/unit/test_collector.py index 81d8765c837..55676a4fc5c 100644 --- a/tests/unit/test_collector.py +++ b/tests/unit/test_collector.py @@ -459,6 +459,12 @@ def test_parse_links__requires_python( _test_parse_links_data_attribute(anchor_html, "requires_python", expected) +# TODO: this test generates its own examples to validate the json client implementation +# instead of sharing those examples with the html client testing. We expect this won't +# hide any bugs because operations like resolving PEP 658 metadata should use the same +# code for both types of indices, but it might be nice to explicitly have all our tests +# in test_download.py execute over both html and json indices with +# a pytest.mark.parameterize decorator to ensure nothing slips through the cracks. def test_parse_links_json() -> None: json_bytes = json.dumps( { From fc9bcdd25c9ded708cd478a5a8356364b689b5d3 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Wed, 7 Sep 2022 09:58:36 -0400 Subject: [PATCH 14/18] refactor and rename some variables in a confusing code of terse code --- src/pip/_internal/metadata/pkg_resources.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pip/_internal/metadata/pkg_resources.py b/src/pip/_internal/metadata/pkg_resources.py index b2f3d1382a1..f330ef12a2c 100644 --- a/src/pip/_internal/metadata/pkg_resources.py +++ b/src/pip/_internal/metadata/pkg_resources.py @@ -114,7 +114,7 @@ def from_wheel(cls, wheel: Wheel, name: str) -> BaseDistribution: try: with wheel.as_zipfile() as zf: info_dir, _ = parse_wheel(zf, name) - metadata_text = { + metadata_dict = { path.split("/", 1)[-1]: read_wheel_metadata_file(zf, path) for path in zf.namelist() if path.startswith(f"{info_dir}/") @@ -125,7 +125,7 @@ def from_wheel(cls, wheel: Wheel, name: str) -> BaseDistribution: raise UnsupportedWheel(f"{name} has an invalid wheel, {e}") dist = pkg_resources.DistInfoDistribution( location=wheel.location, - metadata=InMemoryMetadata(metadata_text, wheel.location), + metadata=InMemoryMetadata(metadata_dict, wheel.location), project_name=name, ) return cls(dist) From 036bfc0d66733eb2d1a352281fe65276ffb27d07 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Wed, 7 Sep 2022 09:59:08 -0400 Subject: [PATCH 15/18] update message for MetadataIncohsistent to allow use with PEP 658 metadata --- src/pip/_internal/exceptions.py | 7 +++---- tests/functional/test_new_resolver.py | 2 +- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/pip/_internal/exceptions.py b/src/pip/_internal/exceptions.py index 377cde52521..1cf93bc797d 100644 --- a/src/pip/_internal/exceptions.py +++ b/src/pip/_internal/exceptions.py @@ -335,8 +335,8 @@ class MetadataInconsistent(InstallationError): """Built metadata contains inconsistent information. This is raised when the metadata contains values (e.g. name and version) - that do not match the information previously obtained from sdist filename - or user-supplied ``#egg=`` value. + that do not match the information previously obtained from sdist filename, + user-supplied ``#egg=`` value, or an install requirement name. """ def __init__( @@ -349,8 +349,7 @@ def __init__( def __str__(self) -> str: template = ( - "Requested {} has inconsistent {}: " - "filename has {!r}, but metadata has {!r}" + "Requested {} has inconsistent {}: expected {!r}, but metadata has {!r}" ) return template.format(self.ireq, self.field, self.f_val, self.m_val) diff --git a/tests/functional/test_new_resolver.py b/tests/functional/test_new_resolver.py index efcae29289f..fc52ab9c8d8 100644 --- a/tests/functional/test_new_resolver.py +++ b/tests/functional/test_new_resolver.py @@ -1363,7 +1363,7 @@ def test_new_resolver_skip_inconsistent_metadata(script: PipTestEnvironment) -> ) assert ( - " inconsistent version: filename has '3', but metadata has '2'" + " inconsistent version: expected '3', but metadata has '2'" ) in result.stdout, str(result) script.assert_installed(a="1") From 8aeb10847dc2b8edd2671728576cf453bcbdf800 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Wed, 7 Sep 2022 09:59:56 -0400 Subject: [PATCH 16/18] raise MetadataInconsistent if the name from METADATA doesn't match the install requirement --- src/pip/_internal/operations/prepare.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/src/pip/_internal/operations/prepare.py b/src/pip/_internal/operations/prepare.py index 2a61875b529..4bf414cb005 100644 --- a/src/pip/_internal/operations/prepare.py +++ b/src/pip/_internal/operations/prepare.py @@ -19,6 +19,7 @@ HashMismatch, HashUnpinned, InstallationError, + MetadataInconsistent, NetworkConnectionError, PreviousBuildDirError, VcsHashUnsupported, @@ -365,6 +366,7 @@ def _fetch_metadata_using_link_data_attr( req: InstallRequirement, ) -> Optional[BaseDistribution]: """Fetch metadata from the data-dist-info-metadata attribute, if possible.""" + # (1) Get the link to the metadata file, if provided by the backend. metadata_link = req.link.metadata_link() if metadata_link is None: return None @@ -374,7 +376,7 @@ def _fetch_metadata_using_link_data_attr( req.req, metadata_link, ) - # Download the contents of the METADATA file, separate from the dist itself. + # (2) Download the contents of the METADATA file, separate from the dist itself. metadata_file = get_http_url( metadata_link, self._download, @@ -382,12 +384,23 @@ def _fetch_metadata_using_link_data_attr( ) with open(metadata_file.path, "rb") as f: metadata_contents = f.read() - # Generate a dist just from those file contents. - return get_metadata_distribution( + # (3) Generate a dist just from those file contents. + metadata_dist = get_metadata_distribution( metadata_contents, req.link.filename, req.req.name, ) + # (4) Ensure the Name: field from the METADATA file matches the name from the + # install requirement. + # + # NB: raw_name will fall back to the name from the install requirement if + # the Name: field is not present, but it's noted in the raw_name docstring + # that that should NEVER happen anyway. + if metadata_dist.raw_name != req.req.name: + raise MetadataInconsistent( + req, "Name", req.req.name, metadata_dist.raw_name + ) + return metadata_dist def _fetch_metadata_using_lazy_wheel( self, From 931501fae3d2be86ac9094ff612e0c2bf75e1600 Mon Sep 17 00:00:00 2001 From: Tzu-ping Chung Date: Thu, 8 Sep 2022 07:30:45 +0800 Subject: [PATCH 17/18] Use pathlib shorthand to write file --- src/pip/_internal/metadata/importlib/_dists.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/pip/_internal/metadata/importlib/_dists.py b/src/pip/_internal/metadata/importlib/_dists.py index ae821802a6a..65c043c87ef 100644 --- a/src/pip/_internal/metadata/importlib/_dists.py +++ b/src/pip/_internal/metadata/importlib/_dists.py @@ -122,8 +122,7 @@ def from_metadata_file_contents( TempDirectory(kind="metadata", globally_managed=True).path ) metadata_path = temp_dir / "METADATA" - with open(metadata_path, "wb") as f: - f.write(metadata_contents) + metadata_path.write_bytes(metadata_contents) # Construct dist pointing to the newly created directory. dist = importlib.metadata.Distribution.at(metadata_path.parent) return cls(dist, metadata_path.parent, None) From f8d6daebcc7e92229cb4e34990a1b72d8aa24bb5 Mon Sep 17 00:00:00 2001 From: Tzu-ping Chung Date: Thu, 8 Sep 2022 07:32:24 +0800 Subject: [PATCH 18/18] Switch to f-string --- src/pip/_internal/exceptions.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/pip/_internal/exceptions.py b/src/pip/_internal/exceptions.py index 1cf93bc797d..2ab1f591f12 100644 --- a/src/pip/_internal/exceptions.py +++ b/src/pip/_internal/exceptions.py @@ -348,10 +348,10 @@ def __init__( self.m_val = m_val def __str__(self) -> str: - template = ( - "Requested {} has inconsistent {}: expected {!r}, but metadata has {!r}" + return ( + f"Requested {self.ireq} has inconsistent {self.field}: " + f"expected {self.f_val!r}, but metadata has {self.m_val!r}" ) - return template.format(self.ireq, self.field, self.f_val, self.m_val) class LegacyInstallFailure(DiagnosticPipError):