From 39ccd3f74b3d7984be6795ae2a0abbc4be344059 Mon Sep 17 00:00:00 2001
From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com>
Date: Wed, 5 Jan 2022 22:34:51 -0500
Subject: [PATCH 1/4] implement --dry-run
---
src/pip/_internal/cli/req_command.py | 2 ++
src/pip/_internal/commands/download.py | 25 +++++++++++++------
.../resolution/resolvelib/resolver.py | 8 ++++--
tests/functional/test_download.py | 11 ++++++++
.../resolution_resolvelib/test_resolver.py | 1 +
5 files changed, 37 insertions(+), 10 deletions(-)
diff --git a/src/pip/_internal/cli/req_command.py b/src/pip/_internal/cli/req_command.py
index aab177002d4..74a1514055a 100644
--- a/src/pip/_internal/cli/req_command.py
+++ b/src/pip/_internal/cli/req_command.py
@@ -313,6 +313,7 @@ def make_resolver(
use_user_site: bool = False,
ignore_installed: bool = True,
ignore_requires_python: bool = False,
+ dry_run: bool = False,
force_reinstall: bool = False,
upgrade_strategy: str = "to-satisfy-only",
use_pep517: Optional[bool] = None,
@@ -344,6 +345,7 @@ def make_resolver(
ignore_dependencies=options.ignore_dependencies,
ignore_installed=ignore_installed,
ignore_requires_python=ignore_requires_python,
+ dry_run=dry_run,
force_reinstall=force_reinstall,
upgrade_strategy=upgrade_strategy,
py_version_info=py_version_info,
diff --git a/src/pip/_internal/commands/download.py b/src/pip/_internal/commands/download.py
index a6d7e628f2b..dcdc49ef2c0 100644
--- a/src/pip/_internal/commands/download.py
+++ b/src/pip/_internal/commands/download.py
@@ -62,6 +62,13 @@ def add_options(self) -> None:
help="Download packages into
.",
)
+ self.cmd_opts.add_option(
+ "--dry-run",
+ dest="dry_run",
+ action="store_true",
+ help="Avoid actually downloading wheels.",
+ )
+
cmdoptions.add_target_python_options(self.cmd_opts)
index_opts = cmdoptions.make_option_group(
@@ -122,19 +129,21 @@ def run(self, options: Values, args: List[str]) -> int:
options=options,
ignore_requires_python=options.ignore_requires_python,
py_version_info=options.python_version,
+ dry_run=options.dry_run,
)
self.trace_basic_info(finder)
requirement_set = resolver.resolve(reqs, check_supported_wheels=True)
- downloaded: List[str] = []
- for req in requirement_set.requirements.values():
- if req.satisfied_by is None:
- assert req.name is not None
- preparer.save_linked_requirement(req)
- downloaded.append(req.name)
- if downloaded:
- write_output("Successfully downloaded %s", " ".join(downloaded))
+ if not options.dry_run:
+ downloaded: List[str] = []
+ for req in requirement_set.requirements.values():
+ if req.satisfied_by is None:
+ assert req.name is not None
+ preparer.save_linked_requirement(req)
+ downloaded.append(req.name)
+ if downloaded:
+ write_output("Successfully downloaded %s", " ".join(downloaded))
return SUCCESS
diff --git a/src/pip/_internal/resolution/resolvelib/resolver.py b/src/pip/_internal/resolution/resolvelib/resolver.py
index 32ef7899ba6..47e3defd8cd 100644
--- a/src/pip/_internal/resolution/resolvelib/resolver.py
+++ b/src/pip/_internal/resolution/resolvelib/resolver.py
@@ -45,6 +45,7 @@ def __init__(
ignore_dependencies: bool,
ignore_installed: bool,
ignore_requires_python: bool,
+ dry_run: bool,
force_reinstall: bool,
upgrade_strategy: str,
suppress_build_failures: bool,
@@ -66,6 +67,7 @@ def __init__(
py_version_info=py_version_info,
)
self.ignore_dependencies = ignore_dependencies
+ self.dry_run = dry_run
self.upgrade_strategy = upgrade_strategy
self._result: Optional[Result] = None
@@ -159,8 +161,10 @@ def resolve(
req_set.add_named_requirement(ireq)
- reqs = req_set.all_requirements
- self.factory.preparer.prepare_linked_requirements_more(reqs)
+ if not self.dry_run:
+ reqs = req_set.all_requirements
+ self.factory.preparer.prepare_linked_requirements_more(reqs)
+
return req_set
def get_installation_order(
diff --git a/tests/functional/test_download.py b/tests/functional/test_download.py
index ace2ff74c5b..af55276d1ba 100644
--- a/tests/functional/test_download.py
+++ b/tests/functional/test_download.py
@@ -90,6 +90,17 @@ def test_basic_download_should_download_dependencies(
result.did_not_create(script.site_packages / "openid")
+@pytest.mark.network
+def test_dry_run_should_not_download_dependencies(
+ script: PipTestEnvironment,
+) -> None:
+ """
+ It should not download dependencies into the scratch path.
+ """
+ result = script.pip("download", "--dry-run", "Paste[openid]==1.7.5.1", "-d", ".")
+ result.did_not_create(Path("scratch") / "Paste-1.7.5.1.tar.gz")
+
+
def test_download_wheel_archive(script: PipTestEnvironment, data: TestData) -> None:
"""
It should download a wheel archive path
diff --git a/tests/unit/resolution_resolvelib/test_resolver.py b/tests/unit/resolution_resolvelib/test_resolver.py
index db71f911acd..50816c1e526 100644
--- a/tests/unit/resolution_resolvelib/test_resolver.py
+++ b/tests/unit/resolution_resolvelib/test_resolver.py
@@ -28,6 +28,7 @@ def resolver(preparer: RequirementPreparer, finder: PackageFinder) -> Resolver:
ignore_installed=False,
ignore_requires_python=False,
force_reinstall=False,
+ dry_run=True,
upgrade_strategy="to-satisfy-only",
suppress_build_failures=False,
)
From 711972f25f3ddd2db8c3e60fa6f1ab37ed174344 Mon Sep 17 00:00:00 2001
From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com>
Date: Sun, 19 Dec 2021 23:35:51 -0500
Subject: [PATCH 2/4] implement --report
- create LinkWithSource to retain attrs from InstallRequirement
- add tests for report output for top-level requirements
- add tests for more of the report JSON format
- add passing tests for JSON report output including PEP 658!
- add docstrings to several classes and functions, including tests!
- move the --report implementation into resolvelib
- use an abstract base class instead of a Union for InfoType
- use frozen dataclasses for InfoType subclasses
---
src/pip/_internal/commands/download.py | 42 ++-
src/pip/_internal/index/collector.py | 103 +-----
src/pip/_internal/metadata/base.py | 3 +
src/pip/_internal/metadata/pkg_resources.py | 3 +
src/pip/_internal/models/direct_url.py | 109 +++----
src/pip/_internal/models/link.py | 249 ++++++++++++++-
src/pip/_internal/req/req_install.py | 33 +-
src/pip/_internal/resolution/base.py | 37 ++-
.../_internal/resolution/resolvelib/base.py | 57 ++--
.../resolution/resolvelib/candidates.py | 28 +-
.../resolution/resolvelib/provider.py | 3 +-
.../resolution/resolvelib/reporter.py | 190 +++++++++++-
.../resolution/resolvelib/requirements.py | 18 ++
.../resolution/resolvelib/resolver.py | 19 +-
tests/functional/test_download.py | 293 +++++++++++++++++-
tests/functional/test_freeze.py | 9 +-
tests/functional/test_list.py | 5 +-
tests/lib/server.py | 8 -
tests/unit/test_collector.py | 123 +++++++-
tests/unit/test_direct_url_helpers.py | 13 +-
20 files changed, 1107 insertions(+), 238 deletions(-)
diff --git a/src/pip/_internal/commands/download.py b/src/pip/_internal/commands/download.py
index dcdc49ef2c0..180e657bae9 100644
--- a/src/pip/_internal/commands/download.py
+++ b/src/pip/_internal/commands/download.py
@@ -1,3 +1,4 @@
+import json
import logging
import os
from optparse import Values
@@ -7,7 +8,10 @@
from pip._internal.cli.cmdoptions import make_target_python
from pip._internal.cli.req_command import RequirementCommand, with_cleanup
from pip._internal.cli.status_codes import SUCCESS
+from pip._internal.exceptions import CommandError
from pip._internal.operations.build.build_tracker import get_build_tracker
+from pip._internal.resolution.base import RequirementSetWithCandidates
+from pip._internal.resolution.resolvelib.reporter import ResolutionResult
from pip._internal.utils.misc import ensure_dir, normalize_path, write_output
from pip._internal.utils.temp_dir import TempDirectory
@@ -66,7 +70,22 @@ def add_options(self) -> None:
"--dry-run",
dest="dry_run",
action="store_true",
- help="Avoid actually downloading wheels.",
+ help=(
+ "Avoid actually downloading wheels or sdists. "
+ "Intended to be used with --report."
+ ),
+ )
+
+ self.cmd_opts.add_option(
+ "--report",
+ "--resolution-report",
+ dest="json_report_file",
+ metavar="file",
+ default=None,
+ help=(
+ "Print a JSON object representing the resolve into . "
+ "Often used with --dry-run."
+ ),
)
cmdoptions.add_target_python_options(self.cmd_opts)
@@ -146,4 +165,25 @@ def run(self, options: Values, args: List[str]) -> int:
if downloaded:
write_output("Successfully downloaded %s", " ".join(downloaded))
+ # The rest of this method pertains to generating the ResolutionReport with
+ # --report.
+ if not options.json_report_file:
+ return SUCCESS
+ if not isinstance(requirement_set, RequirementSetWithCandidates):
+ raise CommandError(
+ "The legacy resolver is being used via "
+ "--use-deprecated=legacy-resolver."
+ "The legacy resolver does not retain detailed dependency information, "
+ "so `pip download --report` cannot be used with it. "
+ )
+
+ resolution_result = ResolutionResult.generate_resolve_report(
+ reqs, requirement_set
+ )
+
+ # Write the full report data to the JSON output file.
+ with open(options.json_report_file, "w") as f:
+ json.dump(resolution_result.to_dict(), f, indent=4)
+ write_output(f"JSON report written to '{options.json_report_file}'.")
+
return SUCCESS
diff --git a/src/pip/_internal/index/collector.py b/src/pip/_internal/index/collector.py
index e6e9469af1a..15c9dbdef54 100644
--- a/src/pip/_internal/index/collector.py
+++ b/src/pip/_internal/index/collector.py
@@ -8,10 +8,8 @@
import itertools
import logging
import os
-import re
import urllib.parse
import urllib.request
-import xml.etree.ElementTree
from html.parser import HTMLParser
from optparse import Values
from typing import (
@@ -33,12 +31,12 @@
from pip._vendor.requests.exceptions import RetryError, SSLError
from pip._internal.exceptions import NetworkConnectionError
-from pip._internal.models.link import Link
+from pip._internal.models.link import HTMLElement, Link
from pip._internal.models.search_scope import SearchScope
from pip._internal.network.session import PipSession
from pip._internal.network.utils import raise_for_status
from pip._internal.utils.filetypes import is_archive_file
-from pip._internal.utils.misc import pairwise, redact_auth_from_url
+from pip._internal.utils.misc import redact_auth_from_url
from pip._internal.vcs import vcs
from .sources import CandidatesFromPage, LinkSource, build_source
@@ -50,7 +48,6 @@
logger = logging.getLogger(__name__)
-HTMLElement = xml.etree.ElementTree.Element
ResponseHeaders = MutableMapping[str, str]
@@ -182,94 +179,6 @@ def _determine_base_url(document: HTMLElement, page_url: str) -> str:
return page_url
-def _clean_url_path_part(part: str) -> str:
- """
- Clean a "part" of a URL path (i.e. after splitting on "@" characters).
- """
- # We unquote prior to quoting to make sure nothing is double quoted.
- return urllib.parse.quote(urllib.parse.unquote(part))
-
-
-def _clean_file_url_path(part: str) -> str:
- """
- Clean the first part of a URL path that corresponds to a local
- filesystem path (i.e. the first part after splitting on "@" characters).
- """
- # We unquote prior to quoting to make sure nothing is double quoted.
- # Also, on Windows the path part might contain a drive letter which
- # should not be quoted. On Linux where drive letters do not
- # exist, the colon should be quoted. We rely on urllib.request
- # to do the right thing here.
- return urllib.request.pathname2url(urllib.request.url2pathname(part))
-
-
-# percent-encoded: /
-_reserved_chars_re = re.compile("(@|%2F)", re.IGNORECASE)
-
-
-def _clean_url_path(path: str, is_local_path: bool) -> str:
- """
- Clean the path portion of a URL.
- """
- if is_local_path:
- clean_func = _clean_file_url_path
- else:
- clean_func = _clean_url_path_part
-
- # Split on the reserved characters prior to cleaning so that
- # revision strings in VCS URLs are properly preserved.
- parts = _reserved_chars_re.split(path)
-
- cleaned_parts = []
- for to_clean, reserved in pairwise(itertools.chain(parts, [""])):
- cleaned_parts.append(clean_func(to_clean))
- # Normalize %xx escapes (e.g. %2f -> %2F)
- cleaned_parts.append(reserved.upper())
-
- return "".join(cleaned_parts)
-
-
-def _clean_link(url: str) -> str:
- """
- Make sure a link is fully quoted.
- For example, if ' ' occurs in the URL, it will be replaced with "%20",
- and without double-quoting other characters.
- """
- # Split the URL into parts according to the general structure
- # `scheme://netloc/path;parameters?query#fragment`.
- result = urllib.parse.urlparse(url)
- # If the netloc is empty, then the URL refers to a local filesystem path.
- is_local_path = not result.netloc
- path = _clean_url_path(result.path, is_local_path=is_local_path)
- return urllib.parse.urlunparse(result._replace(path=path))
-
-
-def _create_link_from_element(
- element_attribs: Dict[str, Optional[str]],
- page_url: str,
- base_url: str,
-) -> Optional[Link]:
- """
- Convert an anchor element's attributes in a simple repository page to a Link.
- """
- href = element_attribs.get("href")
- if not href:
- return None
-
- url = _clean_link(urllib.parse.urljoin(base_url, href))
- pyrequire = element_attribs.get("data-requires-python")
- yanked_reason = element_attribs.get("data-yanked")
-
- link = Link(
- url,
- comes_from=page_url,
- requires_python=pyrequire,
- yanked_reason=yanked_reason,
- )
-
- return link
-
-
class CacheablePageContent:
def __init__(self, page: "HTMLPage") -> None:
assert page.cache_link_parsing
@@ -326,7 +235,7 @@ def _parse_links_html5lib(page: "HTMLPage") -> Iterable[Link]:
url = page.url
base_url = _determine_base_url(document, url)
for anchor in document.findall(".//a"):
- link = _create_link_from_element(
+ link = Link.from_element(
anchor.attrib,
page_url=url,
base_url=base_url,
@@ -353,11 +262,7 @@ def parse_links(page: "HTMLPage", use_deprecated_html5lib: bool) -> Iterable[Lin
url = page.url
base_url = parser.base_url or url
for anchor in parser.anchors:
- link = _create_link_from_element(
- anchor,
- page_url=url,
- base_url=base_url,
- )
+ link = Link.from_element(anchor, page_url=url, base_url=base_url)
if link is None:
continue
yield link
diff --git a/src/pip/_internal/metadata/base.py b/src/pip/_internal/metadata/base.py
index f1a1ee62faa..1528500c48c 100644
--- a/src/pip/_internal/metadata/base.py
+++ b/src/pip/_internal/metadata/base.py
@@ -120,6 +120,9 @@ def __repr__(self) -> str:
def __str__(self) -> str:
return f"{self.raw_name} {self.version}"
+ def as_serializable_requirement(self) -> Requirement:
+ raise NotImplementedError()
+
@property
def location(self) -> Optional[str]:
"""Where the distribution is loaded from.
diff --git a/src/pip/_internal/metadata/pkg_resources.py b/src/pip/_internal/metadata/pkg_resources.py
index ffde8c77e73..fa49ba37189 100644
--- a/src/pip/_internal/metadata/pkg_resources.py
+++ b/src/pip/_internal/metadata/pkg_resources.py
@@ -113,6 +113,9 @@ def from_wheel(cls, wheel: Wheel, name: str) -> BaseDistribution:
)
return cls(dist)
+ def as_serializable_requirement(self) -> Requirement:
+ return self._dist.as_requirement()
+
@property
def location(self) -> Optional[str]:
return self._dist.location
diff --git a/src/pip/_internal/models/direct_url.py b/src/pip/_internal/models/direct_url.py
index e75feda9ca9..9eff12ba3e5 100644
--- a/src/pip/_internal/models/direct_url.py
+++ b/src/pip/_internal/models/direct_url.py
@@ -1,8 +1,10 @@
""" PEP 610 """
+import abc
import json
import re
import urllib.parse
-from typing import Any, Dict, Iterable, Optional, Type, TypeVar, Union
+from dataclasses import dataclass
+from typing import Any, ClassVar, Dict, Iterable, Optional, Type, TypeVar
__all__ = [
"DirectUrl",
@@ -47,8 +49,39 @@ def _get_required(
return value
-def _exactly_one_of(infos: Iterable[Optional["InfoType"]]) -> "InfoType":
- infos = [info for info in infos if info is not None]
+def _filter_none(**kwargs: Any) -> Dict[str, Any]:
+ """Make dict excluding None values."""
+ return {k: v for k, v in kwargs.items() if v is not None}
+
+
+class InfoType(metaclass=abc.ABCMeta):
+ """Superclass for the types of metadata that can be stored within a "direct URL"."""
+
+ name: ClassVar[str]
+
+ @classmethod
+ @abc.abstractmethod
+ def _from_dict(cls: Type[T], d: Optional[Dict[str, Any]]) -> Optional[T]:
+ """Parse an instance of this class from a JSON-serializable dict."""
+
+ @abc.abstractmethod
+ def _to_dict(self) -> Dict[str, Any]:
+ """Produce a JSON-serializable dict which can be parsed with `._from_dict()`."""
+
+ @classmethod
+ def from_dict(cls, d: Dict[str, Any]) -> "InfoType":
+ """Parse exactly one of the known subclasses from the dict `d`."""
+ return _exactly_one_of(
+ [
+ ArchiveInfo._from_dict(_get(d, dict, "archive_info")),
+ DirInfo._from_dict(_get(d, dict, "dir_info")),
+ VcsInfo._from_dict(_get(d, dict, "vcs_info")),
+ ]
+ )
+
+
+def _exactly_one_of(infos: Iterable[Optional[InfoType]]) -> InfoType:
+ infos = list(filter(None, infos))
if not infos:
raise DirectUrlValidationError(
"missing one of archive_info, dir_info, vcs_info"
@@ -61,23 +94,15 @@ def _exactly_one_of(infos: Iterable[Optional["InfoType"]]) -> "InfoType":
return infos[0]
-def _filter_none(**kwargs: Any) -> Dict[str, Any]:
- """Make dict excluding None values."""
- return {k: v for k, v in kwargs.items() if v is not None}
-
-
-class VcsInfo:
- name = "vcs_info"
+@dataclass(frozen=True)
+class VcsInfo(InfoType):
+ vcs: str
+ commit_id: str
+ requested_revision: Optional[str] = None
+ resolved_revision: Optional[str] = None
+ resolved_revision_type: Optional[str] = None
- def __init__(
- self,
- vcs: str,
- commit_id: str,
- requested_revision: Optional[str] = None,
- ) -> None:
- self.vcs = vcs
- self.requested_revision = requested_revision
- self.commit_id = commit_id
+ name: ClassVar[str] = "vcs_info"
@classmethod
def _from_dict(cls, d: Optional[Dict[str, Any]]) -> Optional["VcsInfo"]:
@@ -97,14 +122,11 @@ def _to_dict(self) -> Dict[str, Any]:
)
-class ArchiveInfo:
- name = "archive_info"
+@dataclass(frozen=True)
+class ArchiveInfo(InfoType):
+ hash: Optional[str] = None
- def __init__(
- self,
- hash: Optional[str] = None,
- ) -> None:
- self.hash = hash
+ name: ClassVar[str] = "archive_info"
@classmethod
def _from_dict(cls, d: Optional[Dict[str, Any]]) -> Optional["ArchiveInfo"]:
@@ -116,14 +138,11 @@ def _to_dict(self) -> Dict[str, Any]:
return _filter_none(hash=self.hash)
-class DirInfo:
- name = "dir_info"
+@dataclass(frozen=True)
+class DirInfo(InfoType):
+ editable: bool = False
- def __init__(
- self,
- editable: bool = False,
- ) -> None:
- self.editable = editable
+ name: ClassVar[str] = "dir_info"
@classmethod
def _from_dict(cls, d: Optional[Dict[str, Any]]) -> Optional["DirInfo"]:
@@ -135,19 +154,11 @@ def _to_dict(self) -> Dict[str, Any]:
return _filter_none(editable=self.editable or None)
-InfoType = Union[ArchiveInfo, DirInfo, VcsInfo]
-
-
+@dataclass(frozen=True)
class DirectUrl:
- def __init__(
- self,
- url: str,
- info: InfoType,
- subdirectory: Optional[str] = None,
- ) -> None:
- self.url = url
- self.info = info
- self.subdirectory = subdirectory
+ url: str
+ info: InfoType
+ subdirectory: Optional[str] = None
def _remove_auth_from_netloc(self, netloc: str) -> str:
if "@" not in netloc:
@@ -184,13 +195,7 @@ def from_dict(cls, d: Dict[str, Any]) -> "DirectUrl":
return DirectUrl(
url=_get_required(d, str, "url"),
subdirectory=_get(d, str, "subdirectory"),
- info=_exactly_one_of(
- [
- ArchiveInfo._from_dict(_get(d, dict, "archive_info")),
- DirInfo._from_dict(_get(d, dict, "dir_info")),
- VcsInfo._from_dict(_get(d, dict, "vcs_info")),
- ]
- ),
+ info=InfoType.from_dict(d),
)
def to_dict(self) -> Dict[str, Any]:
diff --git a/src/pip/_internal/models/link.py b/src/pip/_internal/models/link.py
index 6069b278b9b..6f1aa62e83a 100644
--- a/src/pip/_internal/models/link.py
+++ b/src/pip/_internal/models/link.py
@@ -1,14 +1,19 @@
import functools
+import itertools
import logging
import os
import posixpath
import re
import urllib.parse
-from typing import TYPE_CHECKING, Dict, List, NamedTuple, Optional, Tuple, Union
+import xml.etree.ElementTree
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, Any, Dict, List, NamedTuple, Optional, Tuple, Union
+from pip._internal.models.direct_url import ArchiveInfo, DirectUrl
from pip._internal.utils.filetypes import WHEEL_EXTENSION
from pip._internal.utils.hashes import Hashes
from pip._internal.utils.misc import (
+ pairwise,
redact_auth_from_url,
split_auth_from_netloc,
splitext,
@@ -22,9 +27,116 @@
logger = logging.getLogger(__name__)
+HTMLElement = xml.etree.ElementTree.Element
+
+
_SUPPORTED_HASHES = ("sha1", "sha224", "sha384", "sha256", "sha512", "md5")
+@dataclass(frozen=True)
+class LinkHash:
+ """Links to content may have embedded hash values. This class parses those.
+
+ `name` must be any member of `_SUPPORTED_HASHES`."""
+
+ name: str
+ value: str
+
+ # TODO: consider beginning/ending this with \b? Otherwise we risk accepting invalid
+ # hashes such as "sha256=aa113592bbeg", since this pattern will just terminate the
+ # search at "aa113592bbe" and discount the "g".
+ # TODO: consider re.IGNORECASE?
+ _hash_re = re.compile(
+ r"({choices})=([a-f0-9]+)".format(
+ choices="|".join(re.escape(hash_name) for hash_name in _SUPPORTED_HASHES)
+ )
+ )
+
+ @classmethod
+ @functools.lru_cache(maxsize=None)
+ def split_hash_name_and_value(cls, url: str) -> Optional["LinkHash"]:
+ """Search a string for a checksum algorithm name and encoded output value."""
+ match = cls._hash_re.search(url)
+ if match is None:
+ return None
+ name, value = match.groups()
+ return cls(name=name, value=value)
+
+ def to_archive_info(self) -> ArchiveInfo:
+ """Convert to ArchiveInfo to form a DirectUrl instance (see PEP 610)."""
+ return ArchiveInfo(hash=f"{self.name}={self.value}")
+
+ def is_hash_allowed(self, hashes: Optional[Hashes]) -> bool:
+ """
+ Return True if the current hash is allowed by `hashes`.
+ """
+ if hashes is None:
+ return False
+ return hashes.is_hash_allowed(self.name, hex_digest=self.value)
+
+
+def _clean_url_path_part(part: str) -> str:
+ """
+ Clean a "part" of a URL path (i.e. after splitting on "@" characters).
+ """
+ # We unquote prior to quoting to make sure nothing is double quoted.
+ return urllib.parse.quote(urllib.parse.unquote(part))
+
+
+def _clean_file_url_path(part: str) -> str:
+ """
+ Clean the first part of a URL path that corresponds to a local
+ filesystem path (i.e. the first part after splitting on "@" characters).
+ """
+ # We unquote prior to quoting to make sure nothing is double quoted.
+ # Also, on Windows the path part might contain a drive letter which
+ # should not be quoted. On Linux where drive letters do not
+ # exist, the colon should be quoted. We rely on urllib.request
+ # to do the right thing here.
+ return urllib.request.pathname2url(urllib.request.url2pathname(part))
+
+
+# percent-encoded: /
+_reserved_chars_re = re.compile("(@|%2F)", re.IGNORECASE)
+
+
+def _clean_url_path(path: str, is_local_path: bool) -> str:
+ """
+ Clean the path portion of a URL.
+ """
+ if is_local_path:
+ clean_func = _clean_file_url_path
+ else:
+ clean_func = _clean_url_path_part
+
+ # Split on the reserved characters prior to cleaning so that
+ # revision strings in VCS URLs are properly preserved.
+ parts = _reserved_chars_re.split(path)
+
+ cleaned_parts = []
+ for to_clean, reserved in pairwise(itertools.chain(parts, [""])):
+ cleaned_parts.append(clean_func(to_clean))
+ # Normalize %xx escapes (e.g. %2f -> %2F)
+ cleaned_parts.append(reserved.upper())
+
+ return "".join(cleaned_parts)
+
+
+def _ensure_quoted_url(url: str) -> str:
+ """
+ Make sure a link is fully quoted.
+ For example, if ' ' occurs in the URL, it will be replaced with "%20",
+ and without double-quoting other characters.
+ """
+ # Split the URL into parts according to the general structure
+ # `scheme://netloc/path;parameters?query#fragment`.
+ result = urllib.parse.urlparse(url)
+ # If the netloc is empty, then the URL refers to a local filesystem path.
+ is_local_path = not result.netloc
+ path = _clean_url_path(result.path, is_local_path=is_local_path)
+ return urllib.parse.urlunparse(result._replace(path=path))
+
+
class Link(KeyBasedCompareMixin):
"""Represents a parsed link from a Package Index's simple URL"""
@@ -34,6 +146,7 @@ class Link(KeyBasedCompareMixin):
"comes_from",
"requires_python",
"yanked_reason",
+ "dist_info_metadata",
"cache_link_parsing",
]
@@ -43,6 +156,7 @@ def __init__(
comes_from: Optional[Union[str, "HTMLPage"]] = None,
requires_python: Optional[str] = None,
yanked_reason: Optional[str] = None,
+ dist_info_metadata: Optional[str] = None,
cache_link_parsing: bool = True,
) -> None:
"""
@@ -59,6 +173,11 @@ def __init__(
a simple repository HTML link. If the file has been yanked but
no reason was provided, this should be the empty string. See
PEP 592 for more information and the specification.
+ :param dist_info_metadata: the metadata attached to the file, or None if no such
+ metadata is provided. This is the value of the "data-dist-info-metadata"
+ attribute, if present, in a simple repository HTML link. This may be parsed
+ by `URLDownloadInfo.from_link_with_source()`. See PEP 658 for more
+ information and the specification.
:param cache_link_parsing: A flag that is used elsewhere to determine
whether resources retrieved from this link
should be cached. PyPI index urls should
@@ -78,11 +197,41 @@ def __init__(
self.comes_from = comes_from
self.requires_python = requires_python if requires_python else None
self.yanked_reason = yanked_reason
+ self.dist_info_metadata = dist_info_metadata
super().__init__(key=url, defining_class=Link)
self.cache_link_parsing = cache_link_parsing
+ @classmethod
+ def from_element(
+ cls,
+ anchor_attribs: Dict[str, Optional[str]],
+ page_url: str,
+ base_url: str,
+ ) -> Optional["Link"]:
+ """
+ Convert an anchor element's attributes in a simple repository page to a Link.
+ """
+ href = anchor_attribs.get("href")
+ if not href:
+ return None
+
+ url = _ensure_quoted_url(urllib.parse.urljoin(base_url, href))
+ pyrequire = anchor_attribs.get("data-requires-python")
+ yanked_reason = anchor_attribs.get("data-yanked")
+ dist_info_metadata = anchor_attribs.get("data-dist-info-metadata")
+
+ link = Link(
+ url,
+ comes_from=page_url,
+ requires_python=pyrequire,
+ yanked_reason=yanked_reason,
+ dist_info_metadata=dist_info_metadata,
+ )
+
+ return link
+
def __str__(self) -> str:
if self.requires_python:
rp = f" (requires-python:{self.requires_python})"
@@ -165,22 +314,21 @@ def subdirectory_fragment(self) -> Optional[str]:
return None
return match.group(1)
- _hash_re = re.compile(
- r"({choices})=([a-f0-9]+)".format(choices="|".join(_SUPPORTED_HASHES))
- )
+ def get_link_hash(self) -> Optional[LinkHash]:
+ return LinkHash.split_hash_name_and_value(self._url)
@property
def hash(self) -> Optional[str]:
- match = self._hash_re.search(self._url)
- if match:
- return match.group(2)
+ link_hash = self.get_link_hash()
+ if link_hash is not None:
+ return link_hash.value
return None
@property
def hash_name(self) -> Optional[str]:
- match = self._hash_re.search(self._url)
- if match:
- return match.group(1)
+ link_hash = self.get_link_hash()
+ if link_hash is not None:
+ return link_hash.name
return None
@property
@@ -210,19 +358,86 @@ def is_yanked(self) -> bool:
@property
def has_hash(self) -> bool:
- return self.hash_name is not None
+ return self.get_link_hash() is not None
def is_hash_allowed(self, hashes: Optional[Hashes]) -> bool:
"""
- Return True if the link has a hash and it is allowed.
+ Return True if the link has a hash and it is allowed by `hashes`.
"""
- if hashes is None or not self.has_hash:
+ link_hash = self.get_link_hash()
+ if link_hash is None:
return False
- # Assert non-None so mypy knows self.hash_name and self.hash are str.
- assert self.hash_name is not None
- assert self.hash is not None
+ return link_hash.is_hash_allowed(hashes)
+
+
+@dataclass(frozen=True)
+class LinkWithSource:
+ """Retain a `Link` instance along with all the additional information necessary to
+ construct a `DirectUrl` instance with `direct_url_from_link`."""
+
+ link: Link
+ source_dir: Optional[str] = None
+ link_is_in_wheel_cache: bool = False
+
+
+@dataclass(frozen=True)
+class URLDownloadInfo:
+ """Retain a `DirectUrl` instance for a `Link` and for any metadata about it.
+
+ The metadata would typically be parsed from a simple PyPI repository supporting
+ PEP 658."""
+
+ direct_url: DirectUrl
+ dist_info_metadata: Optional[DirectUrl]
+
+ @classmethod
+ def from_link_with_source(
+ cls,
+ link_with_source: LinkWithSource,
+ ) -> "URLDownloadInfo":
+ """Parse a `DirectUrl` instance from a `Link` and any metadata."""
+ from pip._internal.utils.direct_url_helpers import direct_url_from_link
+
+ link = link_with_source.link
+
+ # Implementation of PEP 658 parsing. Note that Link.from_element() parsing the
+ # "data-dist-info-metadata" attribute from an HTML anchor tag is typically how
+ # the Link.dist_info_metadata attribute gets set.
+ dist_info_metadata: Optional[DirectUrl] = None
+ if link.dist_info_metadata is not None:
+ metadata_url = f"{link.url_without_fragment}.metadata"
+ metadata_hash: Optional[ArchiveInfo] = None
+ # If data-dist-info-metadata="true" is set, then the metadata file exists,
+ # but there is no information about its checksum or anything else.
+ if link.dist_info_metadata != "true":
+ link_hash = LinkHash.split_hash_name_and_value(link.dist_info_metadata)
+ if link_hash is not None:
+ metadata_hash = link_hash.to_archive_info()
+
+ dist_info_metadata = DirectUrl(
+ url=metadata_url,
+ info=metadata_hash or ArchiveInfo(hash=None),
+ subdirectory=link.subdirectory_fragment,
+ )
- return hashes.is_hash_allowed(self.hash_name, hex_digest=self.hash)
+ return cls(
+ direct_url=direct_url_from_link(
+ link,
+ source_dir=link_with_source.source_dir,
+ link_is_in_wheel_cache=link_with_source.link_is_in_wheel_cache,
+ ),
+ dist_info_metadata=dist_info_metadata,
+ )
+
+ def to_dict(self) -> Dict[str, Any]:
+ """Take advantage of `DirectUrl.to_dict()` to produce a JSON-serializable
+ representation."""
+ return {
+ "direct_url": self.direct_url.to_dict(),
+ "dist_info_metadata": (
+ self.dist_info_metadata and self.dist_info_metadata.to_dict()
+ ),
+ }
class _CleanResult(NamedTuple):
diff --git a/src/pip/_internal/req/req_install.py b/src/pip/_internal/req/req_install.py
index b40d9e251f8..06eb0bca8c2 100644
--- a/src/pip/_internal/req/req_install.py
+++ b/src/pip/_internal/req/req_install.py
@@ -62,6 +62,21 @@
logger = logging.getLogger(__name__)
+def produce_exact_version_specifier(version: str) -> SpecifierSet:
+ if isinstance(parse_version(version), Version):
+ op = "=="
+ else:
+ op = "==="
+
+ return SpecifierSet(f"{op}{version}")
+
+
+def produce_exact_version_requirement(name: str, version: str) -> Requirement:
+ specifier = produce_exact_version_specifier(version)
+
+ return Requirement(f"{name}{specifier}")
+
+
class InstallRequirement:
"""
Represents something that may be installed later on, may have information
@@ -350,20 +365,10 @@ def _set_requirement(self) -> None:
assert self.metadata is not None
assert self.source_dir is not None
- # Construct a Requirement object from the generated metadata
- if isinstance(parse_version(self.metadata["Version"]), Version):
- op = "=="
- else:
- op = "==="
-
- self.req = Requirement(
- "".join(
- [
- self.metadata["Name"],
- op,
- self.metadata["Version"],
- ]
- )
+ # Construct a Requirement object from the generated metadata.
+ self.req = produce_exact_version_requirement(
+ self.metadata["Name"],
+ self.metadata["Version"],
)
def warn_on_mismatching_name(self) -> None:
diff --git a/src/pip/_internal/resolution/base.py b/src/pip/_internal/resolution/base.py
index 42dade18c1e..283dfb5ed71 100644
--- a/src/pip/_internal/resolution/base.py
+++ b/src/pip/_internal/resolution/base.py
@@ -1,20 +1,49 @@
-from typing import Callable, List, Optional
+import abc
+from typing import TYPE_CHECKING, Callable, List, Optional, cast
+
+from pip._vendor.packaging.utils import NormalizedName
from pip._internal.req.req_install import InstallRequirement
from pip._internal.req.req_set import RequirementSet
+if TYPE_CHECKING:
+ from pip._vendor.resolvelib.resolvers import Result as RLResult
+
+ from .resolvelib.base import Candidate, Requirement
+
+ Result = RLResult[Requirement, Candidate, str]
+
InstallRequirementProvider = Callable[
[str, Optional[InstallRequirement]], InstallRequirement
]
-class BaseResolver:
+# Avoid conflicting with the PyPI package "Python".
+REQUIRES_PYTHON_IDENTIFIER = cast(NormalizedName, "")
+# Avoid clashing with any package on PyPI, but remain parseable as a Requirement. This
+# should only be used for .as_serializable_requirement().
+REQUIRES_PYTHON_SERIALIZABLE_IDENTIFIER = cast(NormalizedName, "Requires-Python")
+
+
+class RequirementSetWithCandidates(RequirementSet):
+ def __init__(
+ self,
+ candidates: "Result",
+ check_supported_wheels: bool = True,
+ ) -> None:
+ self.candidates = candidates
+ super().__init__(check_supported_wheels=check_supported_wheels)
+
+
+class BaseResolver(metaclass=abc.ABCMeta):
+ @abc.abstractmethod
def resolve(
self, root_reqs: List[InstallRequirement], check_supported_wheels: bool
) -> RequirementSet:
- raise NotImplementedError()
+ ...
+ @abc.abstractmethod
def get_installation_order(
self, req_set: RequirementSet
) -> List[InstallRequirement]:
- raise NotImplementedError()
+ ...
diff --git a/src/pip/_internal/resolution/resolvelib/base.py b/src/pip/_internal/resolution/resolvelib/base.py
index b206692a0a9..f8657e1eed6 100644
--- a/src/pip/_internal/resolution/resolvelib/base.py
+++ b/src/pip/_internal/resolution/resolvelib/base.py
@@ -1,5 +1,7 @@
+import abc
from typing import FrozenSet, Iterable, Optional, Tuple, Union
+from pip._vendor.packaging.requirements import Requirement as PkgRequirement
from pip._vendor.packaging.specifiers import SpecifierSet
from pip._vendor.packaging.utils import NormalizedName, canonicalize_name
from pip._vendor.packaging.version import LegacyVersion, Version
@@ -59,8 +61,8 @@ def is_satisfied_by(self, candidate: "Candidate") -> bool:
return self.specifier.contains(candidate.version, prereleases=True)
-class Requirement:
- @property
+class Requirement(metaclass=abc.ABCMeta):
+ @abc.abstractproperty
def project_name(self) -> NormalizedName:
"""The "project name" of a requirement.
@@ -68,25 +70,29 @@ def project_name(self) -> NormalizedName:
in which case ``name`` would contain the ``[...]`` part, while this
refers to the name of the project.
"""
- raise NotImplementedError("Subclass should override")
- @property
+ @abc.abstractproperty
def name(self) -> str:
"""The name identifying this requirement in the resolver.
This is different from ``project_name`` if this requirement contains
extras, where ``project_name`` would not contain the ``[...]`` part.
"""
- raise NotImplementedError("Subclass should override")
def is_satisfied_by(self, candidate: "Candidate") -> bool:
return False
+ @abc.abstractmethod
def get_candidate_lookup(self) -> CandidateLookup:
- raise NotImplementedError("Subclass should override")
+ ...
+ @abc.abstractmethod
def format_for_error(self) -> str:
- raise NotImplementedError("Subclass should override")
+ ...
+
+ @abc.abstractmethod
+ def as_serializable_requirement(self) -> Optional[PkgRequirement]:
+ ...
def _match_link(link: Link, candidate: "Candidate") -> bool:
@@ -95,8 +101,8 @@ def _match_link(link: Link, candidate: "Candidate") -> bool:
return False
-class Candidate:
- @property
+class Candidate(metaclass=abc.ABCMeta):
+ @abc.abstractproperty
def project_name(self) -> NormalizedName:
"""The "project name" of the candidate.
@@ -104,38 +110,43 @@ def project_name(self) -> NormalizedName:
in which case ``name`` would contain the ``[...]`` part, while this
refers to the name of the project.
"""
- raise NotImplementedError("Override in subclass")
- @property
+ @abc.abstractproperty
def name(self) -> str:
"""The name identifying this candidate in the resolver.
This is different from ``project_name`` if this candidate contains
extras, where ``project_name`` would not contain the ``[...]`` part.
"""
- raise NotImplementedError("Override in subclass")
- @property
+ @abc.abstractproperty
def version(self) -> CandidateVersion:
- raise NotImplementedError("Override in subclass")
+ ...
+
+ @abc.abstractmethod
+ def as_serializable_requirement(self) -> PkgRequirement:
+ ...
- @property
+ @abc.abstractproperty
def is_installed(self) -> bool:
- raise NotImplementedError("Override in subclass")
+ ...
- @property
+ @abc.abstractproperty
def is_editable(self) -> bool:
- raise NotImplementedError("Override in subclass")
+ ...
- @property
+ @abc.abstractproperty
def source_link(self) -> Optional[Link]:
- raise NotImplementedError("Override in subclass")
+ ...
+ @abc.abstractmethod
def iter_dependencies(self, with_requires: bool) -> Iterable[Optional[Requirement]]:
- raise NotImplementedError("Override in subclass")
+ ...
+ @abc.abstractmethod
def get_install_requirement(self) -> Optional[InstallRequirement]:
- raise NotImplementedError("Override in subclass")
+ ...
+ @abc.abstractmethod
def format_for_error(self) -> str:
- raise NotImplementedError("Subclass should override")
+ ...
diff --git a/src/pip/_internal/resolution/resolvelib/candidates.py b/src/pip/_internal/resolution/resolvelib/candidates.py
index d1470ecbf4e..52a7e3f575f 100644
--- a/src/pip/_internal/resolution/resolvelib/candidates.py
+++ b/src/pip/_internal/resolution/resolvelib/candidates.py
@@ -1,7 +1,8 @@
import logging
import sys
-from typing import TYPE_CHECKING, Any, FrozenSet, Iterable, Optional, Tuple, Union, cast
+from typing import TYPE_CHECKING, Any, FrozenSet, Iterable, Optional, Tuple, Union
+from pip._vendor.packaging.requirements import Requirement as PkgRequirement
from pip._vendor.packaging.utils import NormalizedName, canonicalize_name
from pip._vendor.packaging.version import Version
@@ -17,7 +18,11 @@
install_req_from_editable,
install_req_from_line,
)
-from pip._internal.req.req_install import InstallRequirement
+from pip._internal.req.req_install import (
+ InstallRequirement,
+ produce_exact_version_requirement,
+)
+from pip._internal.resolution.base import REQUIRES_PYTHON_IDENTIFIER
from pip._internal.utils.misc import normalize_version_info
from .base import Candidate, CandidateVersion, Requirement, format_name
@@ -33,9 +38,6 @@
"LinkCandidate",
]
-# Avoid conflicting with the PyPI package "Python".
-REQUIRES_PYTHON_IDENTIFIER = cast(NormalizedName, "")
-
def as_base_candidate(candidate: Candidate) -> Optional[BaseCandidate]:
"""The runtime version of BaseCandidate."""
@@ -163,6 +165,9 @@ def __init__(
def __str__(self) -> str:
return f"{self.name} {self.version}"
+ def as_serializable_requirement(self) -> PkgRequirement:
+ return produce_exact_version_requirement(self.name, str(self.version))
+
def __repr__(self) -> str:
return "{class_name}({link!r})".format(
class_name=self.__class__.__name__,
@@ -376,6 +381,9 @@ def name(self) -> str:
def version(self) -> CandidateVersion:
return self.dist.version
+ def as_serializable_requirement(self) -> PkgRequirement:
+ return self.dist.as_serializable_requirement()
+
@property
def is_editable(self) -> bool:
return self.dist.editable
@@ -458,6 +466,9 @@ def name(self) -> str:
def version(self) -> CandidateVersion:
return self.base.version
+ def as_serializable_requirement(self) -> PkgRequirement:
+ return self.base.as_serializable_requirement()
+
def format_for_error(self) -> str:
return "{} [{}]".format(
self.base.format_for_error(), ", ".join(sorted(self.extras))
@@ -540,6 +551,13 @@ def name(self) -> str:
def version(self) -> CandidateVersion:
return self._version
+ def as_serializable_requirement(self) -> PkgRequirement:
+ raise NotImplementedError()
+
+ @property
+ def is_editable(self) -> bool:
+ return False
+
def format_for_error(self) -> str:
return f"Python {self.version}"
diff --git a/src/pip/_internal/resolution/resolvelib/provider.py b/src/pip/_internal/resolution/resolvelib/provider.py
index e6ec9594f62..23988bf2712 100644
--- a/src/pip/_internal/resolution/resolvelib/provider.py
+++ b/src/pip/_internal/resolution/resolvelib/provider.py
@@ -13,8 +13,9 @@
from pip._vendor.resolvelib.providers import AbstractProvider
+from pip._internal.resolution.base import REQUIRES_PYTHON_IDENTIFIER
+
from .base import Candidate, Constraint, Requirement
-from .candidates import REQUIRES_PYTHON_IDENTIFIER
from .factory import Factory
if TYPE_CHECKING:
diff --git a/src/pip/_internal/resolution/resolvelib/reporter.py b/src/pip/_internal/resolution/resolvelib/reporter.py
index 6ced5329b81..7739b6d84a0 100644
--- a/src/pip/_internal/resolution/resolvelib/reporter.py
+++ b/src/pip/_internal/resolution/resolvelib/reporter.py
@@ -1,9 +1,27 @@
from collections import defaultdict
+from dataclasses import dataclass, field
from logging import getLogger
-from typing import Any, DefaultDict
+from typing import Any, DefaultDict, Dict, Iterable, List, Optional, Tuple
+from pip._vendor.packaging.requirements import Requirement as PkgRequirement
+from pip._vendor.packaging.specifiers import SpecifierSet
from pip._vendor.resolvelib.reporters import BaseReporter
+from pip._internal.models.link import LinkWithSource, URLDownloadInfo
+from pip._internal.req.req_install import (
+ InstallRequirement,
+ produce_exact_version_specifier,
+)
+from pip._internal.resolution.base import RequirementSetWithCandidates
+from pip._internal.resolution.resolvelib.candidates import (
+ LinkCandidate,
+ RequiresPythonCandidate,
+)
+from pip._internal.resolution.resolvelib.requirements import (
+ ExplicitRequirement,
+ RequiresPythonRequirement,
+)
+
from .base import Candidate, Requirement
logger = getLogger(__name__)
@@ -66,3 +84,173 @@ def backtracking(self, candidate: Candidate) -> None:
def pinning(self, candidate: Candidate) -> None:
logger.info("Reporter.pinning(%r)", candidate)
+
+
+@dataclass(frozen=True)
+class ResolvedCandidate:
+ """Coalesce all the information pip's resolver retains about an
+ installation candidate."""
+
+ req: PkgRequirement
+ download_info: URLDownloadInfo
+ dependencies: Tuple[PkgRequirement, ...]
+ requires_python: Optional[SpecifierSet]
+
+ def to_dict(self) -> Dict[str, Any]:
+ """Return a JSON-serializable representation of this install candidate."""
+ return {
+ "requirement": str(self.req),
+ "download_info": self.download_info.to_dict(),
+ "dependencies": {dep.name: str(dep) for dep in self.dependencies},
+ "requires_python": str(self.requires_python)
+ if self.requires_python
+ else None,
+ }
+
+
+@dataclass
+class ResolutionResult:
+ """The inputs and outputs of a pip internal resolve process."""
+
+ input_requirements: Tuple[str, ...]
+ python_version: Optional[SpecifierSet] = None
+ candidates: Dict[str, ResolvedCandidate] = field(default_factory=dict)
+
+ def to_dict(self) -> Dict[str, Any]:
+ """Return a JSON-serializable representation of the resolve process."""
+ return {
+ "experimental": True,
+ "input_requirements": [str(req) for req in self.input_requirements],
+ "python_version": str(self.python_version),
+ "candidates": {
+ name: info.to_dict() for name, info in self.candidates.items()
+ },
+ }
+
+ @classmethod
+ def _extract_hashable_resolve_input(
+ cls,
+ reqs: Iterable[InstallRequirement],
+ ) -> Tuple[str, ...]:
+ """Reconstruct the input requirements provided to the resolve.
+
+ In theory, pip should be able to be re-run with these arguments to get the same
+ resolve output. Because pip can accept URLs as well as parseable requirement
+ strings on the command line, this method returns a list of strings instead of
+ `PkgRequirement` instances.
+
+ These strings are sorted so that they can be hashed and compared efficiently.
+ """
+ input_requirements: List[str] = []
+ for ireq in reqs:
+ if ireq.req:
+ # If the initial requirement string contained a url (retained in
+ # InstallRequirement.link), add it back to the requirement string
+ # included in the JSON report.
+ if ireq.link:
+ req_string = f"{ireq.req}@{ireq.link.url}"
+ else:
+ req_string = str(ireq.req)
+ else:
+ # If the InstallRequirement has no Requirement information, don't
+ # produce a Requirement string, but simply reproduce the URL.
+ assert ireq.link
+ req_string = ireq.link.url
+
+ input_requirements.append(req_string)
+
+ return tuple(sorted(input_requirements))
+
+ @classmethod
+ def generate_resolve_report(
+ cls,
+ input_requirements: Iterable[InstallRequirement],
+ resolved_requirement_set: RequirementSetWithCandidates,
+ ) -> "ResolutionResult":
+ """Process the resolve to obtain a JSON-serializable/pretty-printable report."""
+ hashable_input = cls._extract_hashable_resolve_input(input_requirements)
+ resolution_result = cls(input_requirements=hashable_input)
+
+ # (1) Scan all the install candidates from `.candidates`.
+ for candidate in resolved_requirement_set.candidates.mapping.values():
+
+ # (2) Map each install candidate back to a specific install requirement from
+ # `.requirements`.
+ req = resolved_requirement_set.requirements.get(candidate.name, None)
+ if req is None:
+ # Pip will impose an implicit `Requires-Python` constraint upon the
+ # whole resolve corresponding to the value of the `--python-version`
+ # argument. This shows up as an installation candidate which does not
+ # correspond to any requirement from the requirement set.
+ if isinstance(candidate, RequiresPythonCandidate):
+ # This candidate should only appear once.
+ assert resolution_result.python_version is None
+ # Generate a serializable `SpecifierSet` instance.
+ resolution_result.python_version = produce_exact_version_specifier(
+ str(candidate.version)
+ )
+ continue
+
+ # All other types of installation candidates are expected to be found
+ # within the resolved requirement set.
+ raise TypeError(
+ f"unknown candidate not found in requirement set: {candidate}"
+ )
+ assert req.name is not None
+ assert req.link is not None
+ # Each project name should only be fulfilled by a single
+ # installation candidate.
+ assert req.name not in resolution_result.candidates
+
+ # (3) Scan the dependencies of the installation candidates, which cover both
+ # normal dependencies as well as Requires-Python information.
+ requires_python: Optional[SpecifierSet] = None
+ dependencies: List[PkgRequirement] = []
+ for maybe_dep in candidate.iter_dependencies(with_requires=True):
+ # It's unclear why `.iter_dependencies()` may occasionally yield `None`.
+ if maybe_dep is None:
+ continue
+
+ # There will only ever be one python version constraint for each
+ # candidate, if any. We extract the version specifier.
+ if isinstance(maybe_dep, RequiresPythonRequirement):
+ requires_python = maybe_dep.specifier
+ continue
+
+ # Convert the 2020 resolver-internal Requirement subclass instance into
+ # a `packaging.requirements.Requirement` instance.
+ maybe_req = maybe_dep.as_serializable_requirement()
+ if maybe_req is None:
+ continue
+
+ # For `ExplicitRequirement`s only, we want to make sure we propagate any
+ # source URL into a dependency's `packaging.requirements.Requirement`
+ # instance.
+ if isinstance(maybe_dep, ExplicitRequirement):
+ dep_candidate = maybe_dep.candidate
+ if maybe_req.url is None and isinstance(
+ dep_candidate, LinkCandidate
+ ):
+ assert dep_candidate.source_link is not None
+ maybe_req = PkgRequirement(
+ f"{maybe_req}@{dep_candidate.source_link.url}"
+ )
+
+ dependencies.append(maybe_req)
+
+ # Mutate the candidates dictionary to add this candidate after processing
+ # any dependencies and python version requirement.
+ resolution_result.candidates[req.name] = ResolvedCandidate(
+ req=candidate.as_serializable_requirement(),
+ download_info=URLDownloadInfo.from_link_with_source(
+ LinkWithSource(
+ req.link,
+ source_dir=req.source_dir,
+ link_is_in_wheel_cache=req.original_link_is_in_wheel_cache,
+ )
+ ),
+ dependencies=tuple(dependencies),
+ requires_python=requires_python,
+ )
+
+ return resolution_result
diff --git a/src/pip/_internal/resolution/resolvelib/requirements.py b/src/pip/_internal/resolution/resolvelib/requirements.py
index f561f1f1e27..816d5a5709c 100644
--- a/src/pip/_internal/resolution/resolvelib/requirements.py
+++ b/src/pip/_internal/resolution/resolvelib/requirements.py
@@ -1,7 +1,11 @@
+from typing import Optional
+
+from pip._vendor.packaging.requirements import Requirement as PkgRequirement
from pip._vendor.packaging.specifiers import SpecifierSet
from pip._vendor.packaging.utils import NormalizedName, canonicalize_name
from pip._internal.req.req_install import InstallRequirement
+from pip._internal.resolution.base import REQUIRES_PYTHON_SERIALIZABLE_IDENTIFIER
from .base import Candidate, CandidateLookup, Requirement, format_name
@@ -29,6 +33,9 @@ def name(self) -> str:
# No need to canonicalize - the candidate did this
return self.candidate.name
+ def as_serializable_requirement(self) -> PkgRequirement:
+ return self.candidate.as_serializable_requirement()
+
def format_for_error(self) -> str:
return self.candidate.format_for_error()
@@ -77,6 +84,9 @@ def format_for_error(self) -> str:
return ", ".join(parts[:-1]) + " and " + parts[-1]
+ def as_serializable_requirement(self) -> Optional[PkgRequirement]:
+ return self._ireq.req
+
def get_candidate_lookup(self) -> CandidateLookup:
return None, self._ireq
@@ -120,6 +130,11 @@ def name(self) -> str:
def format_for_error(self) -> str:
return str(self)
+ def as_serializable_requirement(self) -> PkgRequirement:
+ return PkgRequirement(
+ f"{REQUIRES_PYTHON_SERIALIZABLE_IDENTIFIER}{self.specifier}",
+ )
+
def get_candidate_lookup(self) -> CandidateLookup:
if self.specifier.contains(self._candidate.version, prereleases=True):
return self._candidate, None
@@ -159,6 +174,9 @@ def name(self) -> str:
def format_for_error(self) -> str:
return str(self)
+ def as_serializable_requirement(self) -> Optional[PkgRequirement]:
+ raise NotImplementedError()
+
def get_candidate_lookup(self) -> CandidateLookup:
return None, None
diff --git a/src/pip/_internal/resolution/resolvelib/resolver.py b/src/pip/_internal/resolution/resolvelib/resolver.py
index 47e3defd8cd..6b8ed47033c 100644
--- a/src/pip/_internal/resolution/resolvelib/resolver.py
+++ b/src/pip/_internal/resolution/resolvelib/resolver.py
@@ -13,7 +13,11 @@
from pip._internal.operations.prepare import RequirementPreparer
from pip._internal.req.req_install import InstallRequirement
from pip._internal.req.req_set import RequirementSet
-from pip._internal.resolution.base import BaseResolver, InstallRequirementProvider
+from pip._internal.resolution.base import (
+ BaseResolver,
+ InstallRequirementProvider,
+ RequirementSetWithCandidates,
+)
from pip._internal.resolution.resolvelib.provider import PipProvider
from pip._internal.resolution.resolvelib.reporter import (
PipDebuggingReporter,
@@ -67,13 +71,20 @@ def __init__(
py_version_info=py_version_info,
)
self.ignore_dependencies = ignore_dependencies
+ # TODO: for performance, try to decouple extracting sdist metadata from
+ # actually building the sdist. See https://github.com/pypa/pip/issues/8929.
+ # As mentioned in that issue, PEP 658 support on PyPI would address many cases,
+ # but it would drastically improve performance for many existing packages if we
+ # attempted to extract PKG-INFO or .egg-info from non-wheel files, falling back
+ # to the slower setup.py invocation if not found. LazyZipOverHTTP and
+ # MemoryWheel already implement such a hack for wheel files specifically.
self.dry_run = dry_run
self.upgrade_strategy = upgrade_strategy
self._result: Optional[Result] = None
def resolve(
self, root_reqs: List[InstallRequirement], check_supported_wheels: bool
- ) -> RequirementSet:
+ ) -> RequirementSetWithCandidates:
collected = self.factory.collect_root_requirements(root_reqs)
provider = PipProvider(
factory=self.factory,
@@ -104,7 +115,9 @@ def resolve(
)
raise error from e
- req_set = RequirementSet(check_supported_wheels=check_supported_wheels)
+ req_set = RequirementSetWithCandidates(
+ candidates=result, check_supported_wheels=check_supported_wheels
+ )
for candidate in result.mapping.values():
ireq = candidate.get_install_requirement()
if ireq is None:
diff --git a/tests/functional/test_download.py b/tests/functional/test_download.py
index af55276d1ba..7551248321e 100644
--- a/tests/functional/test_download.py
+++ b/tests/functional/test_download.py
@@ -1,12 +1,22 @@
+import json
import os.path
import shutil
import textwrap
+import uuid
from hashlib import sha256
-from typing import List
+from typing import Any, Callable, Dict, List, Optional, Tuple
import pytest
+from pip._vendor.packaging.requirements import Requirement
from pip._internal.cli.status_codes import ERROR
+from pip._internal.models.direct_url import (
+ ArchiveInfo,
+ DirectUrl,
+ DirInfo,
+ InfoType,
+ VcsInfo,
+)
from pip._internal.utils.urls import path_to_url
from tests.conftest import MockServer, ScriptFactory
from tests.lib import PipTestEnvironment, TestData, create_really_basic_wheel
@@ -1174,3 +1184,284 @@ def test_download_editable(
downloads = os.listdir(download_dir)
assert len(downloads) == 1
assert downloads[0].endswith(".zip")
+
+
+@pytest.fixture(scope="function")
+def json_report(
+ shared_script: PipTestEnvironment, tmpdir: Path
+) -> Callable[..., Dict[str, Any]]:
+ """Execute `pip download --report` and parse the JSON file it writes out."""
+ download_dir = tmpdir / "report"
+ download_dir.mkdir()
+ downloaded_path = download_dir / "report.json"
+
+ def execute_pip_for_report_json(*args: str) -> Dict[str, Any]:
+ shared_script.pip(
+ "download",
+ "--dry-run",
+ f"--report={downloaded_path}",
+ *args,
+ )
+
+ assert downloaded_path.exists()
+
+ with open(downloaded_path, "r") as f:
+ report = json.load(f)
+
+ return report
+
+ return execute_pip_for_report_json
+
+
+@pytest.mark.network
+@pytest.mark.parametrize(
+ "package_name, package_filename, requirement, url_no_fragment, info",
+ [
+ ("simple", "simple-1.0.tar.gz", "simple==1.0", None, ArchiveInfo(hash=None)),
+ (
+ "simplewheel",
+ "simplewheel-1.0-py2.py3-none-any.whl",
+ "simplewheel==1.0",
+ None,
+ ArchiveInfo(hash=None),
+ ),
+ (
+ "pip-test-package",
+ "git+https://github.com/pypa/pip-test-package.git",
+ "pip-test-package==0.1.1",
+ "https://github.com/pypa/pip-test-package.git",
+ VcsInfo(vcs="git", commit_id="5547fa909e83df8bd743d3978d6667497983a4b7"),
+ ),
+ ("symlinks", "symlinks", "symlinks==0.1.dev0", None, DirInfo(editable=False)),
+ (
+ "pex",
+ "https://files.pythonhosted.org/packages/6f/7f/6b1e56fc291df523a02769ebe9b432f63f294475012c2c1f76d4cbb5321f/pex-2.1.61-py2.py3-none-any.whl#sha256=c09fda0f0477f3894f7a7a464b7e4c03d44734de46caddd25291565eed32a882", # noqa: E501
+ "pex==2.1.61",
+ "https://files.pythonhosted.org/packages/6f/7f/6b1e56fc291df523a02769ebe9b432f63f294475012c2c1f76d4cbb5321f/pex-2.1.61-py2.py3-none-any.whl", # noqa: E501
+ ArchiveInfo(
+ hash="sha256=c09fda0f0477f3894f7a7a464b7e4c03d44734de46caddd25291565eed32a882" # noqa: E501
+ ),
+ ),
+ ],
+)
+def test_download_report_direct_url_top_level(
+ json_report: Callable[..., Dict[str, Any]],
+ shared_data: TestData,
+ package_name: str,
+ package_filename: str,
+ requirement: str,
+ url_no_fragment: Optional[str],
+ info: InfoType,
+) -> None:
+ """Test `pip download --report`'s "download_info" JSON field."""
+ # If we are not referring to an explicit URL in our test parameterization, assume we
+ # are referring to one of our test packages.
+ if "://" in package_filename:
+ simple_pkg = package_filename
+ else:
+ simple_pkg = path_to_url(str(shared_data.packages / package_filename))
+
+ report = json_report("--no-index", simple_pkg)
+
+ assert len(report["input_requirements"]) == 1
+ # Wheel file paths provided as inputs will be converted into an equivalent
+ # Requirement string 'a==x.y@scheme://path/to/wheel' instead of just the wheel path.
+ assert report["input_requirements"][0].endswith(simple_pkg)
+
+ candidate = report["candidates"][package_name]
+ assert requirement == candidate["requirement"]
+ direct_url = DirectUrl.from_dict(candidate["download_info"]["direct_url"])
+ assert direct_url == DirectUrl(
+ url_no_fragment or simple_pkg,
+ info=info,
+ )
+
+
+@pytest.mark.network
+def test_download_report_dependencies(
+ json_report: Callable[..., Dict[str, Any]],
+) -> None:
+ """Test the result of a pinned resolve against PyPI."""
+ report = json_report("cryptography==36.0.1", "cffi==1.15.0", "pycparser==2.21")
+ assert sorted(report["input_requirements"]) == [
+ "cffi==1.15.0",
+ "cryptography==36.0.1",
+ "pycparser==2.21",
+ ]
+
+ cryptography = report["candidates"]["cryptography"]
+ assert cryptography["requirement"] == "cryptography==36.0.1"
+ assert cryptography["requires_python"] == ">=3.6"
+ assert cryptography["dependencies"] == {"cffi": "cffi>=1.12"}
+
+ cffi = report["candidates"]["cffi"]
+ assert cffi["requirement"] == "cffi==1.15.0"
+ assert cffi["requires_python"] is None
+ assert cffi["dependencies"] == {"pycparser": "pycparser"}
+
+ pycparser = report["candidates"]["pycparser"]
+ assert pycparser["requirement"] == "pycparser==2.21"
+ assert pycparser["dependencies"] == {}
+ assert pycparser["requires_python"] == "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,>=2.7"
+
+
+@pytest.mark.network
+@pytest.mark.parametrize(
+ "python_version",
+ [
+ "3.10.0",
+ "3.10.1",
+ "3.7.0",
+ "3.8.0",
+ "3.9.0",
+ ],
+)
+def test_download_report_python_version(
+ json_report: Callable[..., Dict[str, Any]],
+ python_version: str,
+) -> None:
+ """Ensure the --python-version variable is respected in the --report JSON output."""
+ report = json_report(
+ f"--python-version={python_version}", "--only-binary=:all:", "wheel"
+ )
+ assert report["python_version"] == f"=={python_version}"
+
+
+@pytest.fixture(scope="function")
+def index_html_content(tmpdir: Path) -> Callable[..., Path]:
+ """Generate a PyPI package index.html within a temporary local directory."""
+ html_dir = tmpdir / "index_html_content"
+ html_dir.mkdir()
+
+ def generate_index_html_subdir(index_html: str) -> Path:
+ """Create a new subdirectory after a UUID and write an index.html."""
+ new_subdir = html_dir / uuid.uuid4().hex
+ new_subdir.mkdir()
+
+ with open(new_subdir / "index.html", "w") as f:
+ f.write(index_html)
+
+ return new_subdir
+
+ return generate_index_html_subdir
+
+
+@pytest.fixture(scope="function")
+def json_report_for_index_content(
+ shared_data: TestData,
+ index_html_content: Callable[..., Path],
+ json_report: Callable[..., Dict[str, Any]],
+) -> Callable[..., Dict[str, Any]]:
+ """Generate a PyPI package index within a local directory pointing to test data."""
+
+ def generate_index_and_report_for_some_packages(
+ packages: Dict[str, List[Tuple[str, str]]], *args: str
+ ) -> Dict[str, Any]:
+ """
+ Produce a PyPI directory structure pointing to a subset of packages in
+ test data, then execute `pip download --report ... -i ...` pointing to our
+ generated index.
+ """
+ # (1) Generate the content for a PyPI index.html.
+ pkg_links = "\n".join(
+ f' {pkg}' for pkg in packages.keys()
+ )
+ index_html = f"""\
+
+
+
+
+ Simple index
+
+
+{pkg_links}
+
+"""
+ # (2) Generate the index.html in a new subdirectory of the temp directory.
+ index_html_subdir = index_html_content(index_html)
+
+ # (3) Generate subdirectories for individual packages, each with their own
+ # index.html.
+ for pkg, links in packages.items():
+ pkg_subdir = index_html_subdir / pkg
+ pkg_subdir.mkdir()
+
+ download_links: List[str] = []
+ for relative_path, additional_tag in links:
+ # For each link to be added to the generated index.html for this
+ # package, copy over the corresponding file in `shared_data.packages`.
+ download_links.append(
+ f' {relative_path}
' # noqa: E501
+ )
+ shutil.copy(
+ shared_data.packages / relative_path, pkg_subdir / relative_path
+ )
+
+ # After collating all the download links and copying over the files, write
+ # an index.html with the generated download links for each copied file.
+ download_links_str = "\n".join(download_links)
+ pkg_index_content = f"""\
+
+
+
+
+ Links for {pkg}
+
+
+ Links for {pkg}
+{download_links_str}
+
+"""
+ with open(pkg_subdir / "index.html", "w") as f:
+ f.write(pkg_index_content)
+
+ return json_report("-i", path_to_url(index_html_subdir), *args)
+
+ return generate_index_and_report_for_some_packages
+
+
+_simple_packages: Dict[str, List[Tuple[str, str]]] = {
+ "simple": [
+ ("simple-1.0.tar.gz", ""),
+ ("simple-2.0.tar.gz", 'data-dist-info-metadata="true"'),
+ ("simple-3.0.tar.gz", 'data-dist-info-metadata="sha256=aabe42af"'),
+ ]
+}
+
+
+@pytest.mark.parametrize(
+ "requirement_to_download, dist_info_metadata",
+ [
+ (
+ "simple==1.0",
+ None,
+ ),
+ (
+ "simple==2.0",
+ ArchiveInfo(hash=None),
+ ),
+ (
+ "simple==3.0",
+ ArchiveInfo(hash="sha256=aabe42af"),
+ ),
+ ],
+)
+def test_download_report_dist_info_metadata(
+ json_report_for_index_content: Callable[..., Dict[str, Any]],
+ requirement_to_download: str,
+ dist_info_metadata: Optional[ArchiveInfo],
+) -> None:
+ """Ensure `pip download --report` reflects PEP 658 metadata."""
+ report = json_report_for_index_content(
+ _simple_packages,
+ requirement_to_download,
+ )
+ project_name = Requirement(requirement_to_download).name
+ direct_url_json = report["candidates"][project_name]["download_info"][
+ "dist_info_metadata"
+ ]
+ if dist_info_metadata is None:
+ assert direct_url_json is None
+ else:
+ direct_url = DirectUrl.from_dict(direct_url_json)
+ assert direct_url.info == dist_info_metadata
diff --git a/tests/functional/test_freeze.py b/tests/functional/test_freeze.py
index bae9eadbd30..83d1f959ee6 100644
--- a/tests/functional/test_freeze.py
+++ b/tests/functional/test_freeze.py
@@ -1,3 +1,4 @@
+import dataclasses
import os
import re
import sys
@@ -1015,7 +1016,13 @@ def test_freeze_pep610_editable(script: PipTestEnvironment) -> None:
with open(direct_url_path) as f:
direct_url = DirectUrl.from_json(f.read())
assert isinstance(direct_url.info, DirInfo)
- direct_url.info.editable = True
+ direct_url = dataclasses.replace(
+ direct_url,
+ info=dataclasses.replace(
+ direct_url.info,
+ editable=True,
+ ),
+ )
with open(direct_url_path, "w") as f:
f.write(direct_url.to_json())
result = script.pip("freeze")
diff --git a/tests/functional/test_list.py b/tests/functional/test_list.py
index b9d0f0fa340..be6ff5f0d9a 100644
--- a/tests/functional/test_list.py
+++ b/tests/functional/test_list.py
@@ -1,3 +1,4 @@
+import dataclasses
import json
import os
@@ -744,7 +745,9 @@ def test_list_pep610_editable(script: PipTestEnvironment) -> None:
with open(direct_url_path) as f:
direct_url = DirectUrl.from_json(f.read())
assert isinstance(direct_url.info, DirInfo)
- direct_url.info.editable = True
+ direct_url = dataclasses.replace(
+ direct_url, info=dataclasses.replace(direct_url.info, editable=True)
+ )
with open(direct_url_path, "w") as f:
f.write(direct_url.to_json())
result = script.pip("list", "--format=json")
diff --git a/tests/lib/server.py b/tests/lib/server.py
index 95cc6a23e34..39da62ca36b 100644
--- a/tests/lib/server.py
+++ b/tests/lib/server.py
@@ -150,14 +150,6 @@ def html5_page(text: str) -> str:
)
-def index_page(spec: Dict[str, str]) -> "WSGIApplication":
- def link(name: str, value: str) -> str:
- return '{}'.format(value, name)
-
- links = "".join(link(*kv) for kv in spec.items())
- return text_html_response(html5_page(links))
-
-
def package_page(spec: Dict[str, str]) -> "WSGIApplication":
def link(name: str, value: str) -> str:
return '{}'.format(value, name)
diff --git a/tests/unit/test_collector.py b/tests/unit/test_collector.py
index f77794b55b9..b7e5a5bd9ce 100644
--- a/tests/unit/test_collector.py
+++ b/tests/unit/test_collector.py
@@ -2,6 +2,7 @@
import logging
import os.path
import re
+import urllib.parse
import urllib.request
import uuid
from textwrap import dedent
@@ -10,13 +11,12 @@
import pytest
from pip._vendor import html5lib, requests
+from pip._vendor.packaging.requirements import Requirement
from pip._internal.exceptions import NetworkConnectionError
from pip._internal.index.collector import (
HTMLPage,
LinkCollector,
- _clean_link,
- _clean_url_path,
_determine_base_url,
_get_html_page,
_get_html_response,
@@ -27,13 +27,35 @@
)
from pip._internal.index.sources import _FlatDirectorySource, _IndexDirectorySource
from pip._internal.models.candidate import InstallationCandidate
+from pip._internal.models.direct_url import ArchiveInfo, DirectUrl
from pip._internal.models.index import PyPI
-from pip._internal.models.link import Link
+from pip._internal.models.link import (
+ Link,
+ LinkHash,
+ LinkWithSource,
+ URLDownloadInfo,
+ _clean_url_path,
+)
from pip._internal.network.session import PipSession
from tests.lib import TestData, make_test_link_collector
from tests.lib.path import Path
+def _clean_link(url: str) -> str:
+ """
+ Make sure a link is fully quoted.
+ For example, if ' ' occurs in the URL, it will be replaced with "%20",
+ and without double-quoting other characters.
+ """
+ # Split the URL into parts according to the general structure
+ # `scheme://netloc/path;parameters?query#fragment`.
+ result = urllib.parse.urlparse(url)
+ # If the netloc is empty, then the URL refers to a local filesystem path.
+ is_local_path = not result.netloc
+ path = _clean_url_path(result.path, is_local_path=is_local_path)
+ return urllib.parse.urlunparse(result._replace(path=path))
+
+
@pytest.mark.parametrize(
"url",
[
@@ -420,7 +442,7 @@ def test_clean_link(url: str, clean_url: str) -> None:
def _test_parse_links_data_attribute(
anchor_html: str, attr: str, expected: Optional[str]
-) -> None:
+) -> Link:
html = (
""
''
@@ -438,6 +460,7 @@ def _test_parse_links_data_attribute(
(link,) = links
actual = getattr(link, attr)
assert actual == expected
+ return link
@pytest.mark.parametrize(
@@ -494,6 +517,78 @@ def test_parse_links__yanked_reason(anchor_html: str, expected: Optional[str]) -
_test_parse_links_data_attribute(anchor_html, "yanked_reason", expected)
+# Requirement objects do not == each other unless they point to the same instance!
+_pkg1_requirement = Requirement("pkg1==1.0")
+
+
+@pytest.mark.parametrize(
+ "anchor_html, expected, download_info",
+ [
+ # Test not present.
+ (
+ '',
+ None,
+ URLDownloadInfo(
+ DirectUrl(
+ "https://example.com/pkg1-1.0.tar.gz", ArchiveInfo(hash=None)
+ ),
+ None,
+ ),
+ ),
+ # Test with value "true".
+ (
+ '',
+ "true",
+ URLDownloadInfo(
+ DirectUrl(
+ "https://example.com/pkg1-1.0.tar.gz", ArchiveInfo(hash=None)
+ ),
+ DirectUrl(
+ url="https://example.com/pkg1-1.0.tar.gz.metadata",
+ info=ArchiveInfo(hash=None),
+ ),
+ ),
+ ),
+ # Test with a provided hash value.
+ (
+ '', # noqa: E501
+ "sha256=aa113592bbe",
+ URLDownloadInfo(
+ DirectUrl(
+ "https://example.com/pkg1-1.0.tar.gz", ArchiveInfo(hash=None)
+ ),
+ DirectUrl(
+ url="https://example.com/pkg1-1.0.tar.gz.metadata",
+ info=ArchiveInfo(hash="sha256=aa113592bbe"),
+ ),
+ ),
+ ),
+ # Test with a provided hash value for both the requirement as well as metadata.
+ (
+ '', # noqa: E501
+ "sha256=aa113592bbe",
+ URLDownloadInfo(
+ DirectUrl(
+ "https://example.com/pkg1-1.0.tar.gz",
+ ArchiveInfo(hash="sha512=abc132409cb"),
+ ),
+ DirectUrl(
+ url="https://example.com/pkg1-1.0.tar.gz.metadata",
+ info=ArchiveInfo(hash="sha256=aa113592bbe"),
+ ),
+ ),
+ ),
+ ],
+)
+def test_parse_links__dist_info_metadata(
+ anchor_html: str,
+ expected: Optional[str],
+ download_info: URLDownloadInfo,
+) -> None:
+ link = _test_parse_links_data_attribute(anchor_html, "dist_info_metadata", expected)
+ assert URLDownloadInfo.from_link_with_source(LinkWithSource(link)) == download_info
+
+
def test_parse_links_caches_same_page_by_url() -> None:
html = (
""
@@ -933,3 +1028,23 @@ def expand_path(path: str) -> str:
expected_temp2_dir = os.path.normcase(temp2_dir)
assert search_scope.find_links == ["~/temp1", expected_temp2_dir]
assert search_scope.index_urls == ["default_url"]
+
+
+@pytest.mark.parametrize(
+ "url, result",
+ [
+ (
+ "https://pypi.org/pip-18.0.tar.gz#sha256=aa113592bbe",
+ LinkHash("sha256", "aa113592bbe"),
+ ),
+ (
+ "https://pypi.org/pip-18.0.tar.gz#md5=aa113592bbe",
+ LinkHash("md5", "aa113592bbe"),
+ ),
+ ("https://pypi.org/pip-18.0.tar.gz#sha256=gaa113592bbe", None),
+ ("https://pypi.org/pip-18.0.tar.gz", None),
+ ("https://pypi.org/pip-18.0.tar.gz#sha500=aa113592bbe", None),
+ ],
+)
+def test_link_hash_parsing(url: str, result: Optional[LinkHash]) -> None:
+ assert LinkHash.split_hash_name_and_value(url) == result
diff --git a/tests/unit/test_direct_url_helpers.py b/tests/unit/test_direct_url_helpers.py
index 8d94aeb50b6..08af0b01fd2 100644
--- a/tests/unit/test_direct_url_helpers.py
+++ b/tests/unit/test_direct_url_helpers.py
@@ -1,3 +1,4 @@
+import dataclasses
from functools import partial
from unittest import mock
@@ -22,14 +23,20 @@ def test_as_pep440_requirement_archive() -> None:
direct_url_as_pep440_direct_reference(direct_url, "pkg")
== "pkg @ file:///home/user/archive.tgz"
)
- direct_url.subdirectory = "subdir"
+ direct_url = dataclasses.replace(direct_url, subdirectory="subdir")
direct_url.validate()
assert (
direct_url_as_pep440_direct_reference(direct_url, "pkg")
== "pkg @ file:///home/user/archive.tgz#subdirectory=subdir"
)
assert isinstance(direct_url.info, ArchiveInfo)
- direct_url.info.hash = "sha1=1b8c5bc61a86f377fea47b4276c8c8a5842d2220"
+ assert direct_url.info.hash is None
+ direct_url = dataclasses.replace(
+ direct_url,
+ info=dataclasses.replace(
+ direct_url.info, hash="sha1=1b8c5bc61a86f377fea47b4276c8c8a5842d2220"
+ ),
+ )
direct_url.validate()
assert (
direct_url_as_pep440_direct_reference(direct_url, "pkg")
@@ -76,7 +83,7 @@ def test_as_pep440_requirement_vcs() -> None:
== "pkg @ git+https:///g.c/u/p.git"
"@1b8c5bc61a86f377fea47b4276c8c8a5842d2220"
)
- direct_url.subdirectory = "subdir"
+ direct_url = dataclasses.replace(direct_url, subdirectory="subdir")
direct_url.validate()
assert (
direct_url_as_pep440_direct_reference(direct_url, "pkg")
From 76520a5cab28213b39fbdded6e881ff487830994 Mon Sep 17 00:00:00 2001
From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com>
Date: Fri, 7 Jan 2022 07:02:19 -0500
Subject: [PATCH 3/4] Add the JSON report to html/man documentation and add
NEWS entry!
---
docs/html/cli/pip_download.rst | 20 ++
docs/html/topics/dependency-resolution.md | 6 +-
docs/html/user_guide.rst | 290 ++++++++++++++++++++++
news/10748.feature.rst | 1 +
4 files changed, 314 insertions(+), 3 deletions(-)
create mode 100644 news/10748.feature.rst
diff --git a/docs/html/cli/pip_download.rst b/docs/html/cli/pip_download.rst
index 4f15314d765..f7deded340f 100644
--- a/docs/html/cli/pip_download.rst
+++ b/docs/html/cli/pip_download.rst
@@ -45,6 +45,12 @@ them. Generic dependencies (e.g. universal wheels, or dependencies with no
platform, abi, or implementation constraints) will still match an over-
constrained download requirement.
+``pip download --report output.json`` is an experimental feature which writes a :ref:`JSON report` of the
+inputs and outputs of pip's internal resolution process to ``output.json``. This can be useful to
+generate a lockfile, check whether transitive dependencies would introduce a conflict, or download
+packages directly from download URLs without having to traverse PyPI again. The ``--dry-run`` option
+can be used in conjunction to just produce a JSON report without actually downloading any packages,
+which is faster.
Options
@@ -224,3 +230,17 @@ Examples
--implementation cp ^
--abi cp36m --abi cp36 --abi abi3 --abi none ^
SomePackage
+
+#. Generate a JSON report of the inputs and outputs of pip's internal resolution process with ``--report`` to ``pip-resolve.json``. See the documentation for :ref:`the JSON report `.
+
+ .. tab:: Unix/macOS
+
+ .. code-block:: shell
+
+ $ python -m pip download --dry-run --report pip-resolve.json SomePackage
+
+ .. tab:: Windows
+
+ .. code-block:: shell
+
+ C:> py -m pip download --dry-run --report pip-resolve.json SomePackage
diff --git a/docs/html/topics/dependency-resolution.md b/docs/html/topics/dependency-resolution.md
index 7dd9848b021..52fbf891f61 100644
--- a/docs/html/topics/dependency-resolution.md
+++ b/docs/html/topics/dependency-resolution.md
@@ -163,9 +163,9 @@ will avoid performing dependency resolution during deployment.
## Dealing with dependency conflicts
-This section provides practical suggestions to pip users who encounter
-a `ResolutionImpossible` error, where pip cannot install their specified
-packages due to conflicting dependencies.
+This section provides practical suggestions to pip users who encounter a `ResolutionImpossible`
+error, where pip cannot install their specified packages due to conflicting dependencies. Note that
+the {ref}`JSON report` may offer more debugging information.
### Understanding your error message
diff --git a/docs/html/user_guide.rst b/docs/html/user_guide.rst
index 6a25a6e6ae3..5b21d066af4 100644
--- a/docs/html/user_guide.rst
+++ b/docs/html/user_guide.rst
@@ -865,6 +865,296 @@ of ability. Some examples that you could consider include:
* ``distlib`` - Packaging and distribution utilities (including functions for
interacting with PyPI).
+Pip now contains support for an experimental feature to dump the output of its resolve process into a :ref:`JSON report`, which can then be processed using the ``packaging`` library.
+
+.. _`JSON report`:
+
+JSON report
+===========
+
+Pip has exposed an experimental feature to print a JSON report of the dependency resolution process's inputs and outputs with ``pip download --dry-run --report pip-output.json``. This report is intended to be consumed as part of automated pip execution pipelines, but can also be used as a debugging tool.
+
+Example command execution:
+
+.. tab:: Unix/macOS
+
+ .. code-block:: console
+
+ $ pip download --dry-run --report pip-output.json tensorboard
+ Collecting tensorboard
+ Obtaining dependency information from tensorboard 2.7.0
+ Collecting tensorboard-plugin-wit>=1.6.0
+ Obtaining dependency information from tensorboard-plugin-wit 1.8.1
+ Collecting google-auth-oauthlib<0.5,>=0.4.1
+ Obtaining dependency information from google-auth-oauthlib 0.4.6
+ Collecting absl-py>=0.4
+ Obtaining dependency information from absl-py 1.0.0
+ Collecting protobuf>=3.6.0
+ Obtaining dependency information from protobuf 3.19.1
+ Collecting setuptools>=41.0.0
+ Obtaining dependency information from setuptools 60.3.1
+ Collecting wheel>=0.26
+ Obtaining dependency information from wheel 0.37.1
+ Collecting werkzeug>=0.11.15
+ Obtaining dependency information from werkzeug 2.0.2
+ Collecting tensorboard-data-server<0.7.0,>=0.6.0
+ Obtaining dependency information from tensorboard-data-server 0.6.1
+ Collecting markdown>=2.6.8
+ Obtaining dependency information from markdown 3.3.6
+ Collecting grpcio>=1.24.3
+ Using cached grpcio-1.43.0.tar.gz (21.5 MB)
+ Preparing metadata (setup.py) ... done
+ Collecting numpy>=1.12.0
+ Using cached numpy-1.22.0.zip (11.3 MB)
+ Installing build dependencies ... done
+ Getting requirements to build wheel ... done
+ Preparing metadata (pyproject.toml) ... done
+ Collecting requests<3,>=2.21.0
+ Obtaining dependency information from requests 2.27.1
+ Collecting google-auth<3,>=1.6.3
+ Obtaining dependency information from google-auth 2.3.3
+ Collecting six
+ Obtaining dependency information from six 1.16.0
+ Collecting pyasn1-modules>=0.2.1
+ Obtaining dependency information from pyasn1-modules 0.2.8
+ Collecting rsa<5,>=3.1.4
+ Obtaining dependency information from rsa 4.8
+ Collecting cachetools<5.0,>=2.0.0
+ Obtaining dependency information from cachetools 4.2.4
+ Collecting requests-oauthlib>=0.7.0
+ Obtaining dependency information from requests-oauthlib 1.3.0
+ Collecting charset-normalizer~=2.0.0
+ Obtaining dependency information from charset-normalizer 2.0.10
+ Collecting certifi>=2017.4.17
+ Obtaining dependency information from certifi 2021.10.8
+ Collecting idna<4,>=2.5
+ Obtaining dependency information from idna 3.3
+ Collecting urllib3<1.27,>=1.21.1
+ Obtaining dependency information from urllib3 1.26.7
+ Collecting pyasn1<0.5.0,>=0.4.6
+ Obtaining dependency information from pyasn1 0.4.8
+ Collecting oauthlib>=3.0.0
+ Obtaining dependency information from oauthlib 3.1.1
+ Python version: '==3.10.1'
+ Input requirements: 'tensorboard'
+ Resolution: 'tensorboard==2.7.0' 'absl-py==1.0.0' 'google-auth==2.3.3' 'google-auth-oauthlib==0.4.6' 'grpcio==1.43.0' 'markdown==3.3.6' 'numpy==1.22.0' 'protobuf==3.19.1' 'requests==2.27.1' 'tensorboard-data-server==0.6.1' 'tensorboard-plugin-wit==1.8.1' 'werkzeug==2.0.2' 'wheel==0.37.1' 'cachetools==4.2.4' 'certifi==2021.10.8' 'charset-normalizer==2.0.10' 'idna==3.3' 'pyasn1-modules==0.2.8' 'requests-oauthlib==1.3.0' 'rsa==4.8' 'six==1.16.0' 'urllib3==1.26.7' 'oauthlib==3.1.1' 'pyasn1==0.4.8' 'setuptools==60.3.1'
+ JSON report written to 'pip-output.json'.
+
+.. tab:: Windows
+
+ .. code-block:: console
+
+ C:\> pip download --dry-run --report pip-output.json tensorboard
+ Collecting tensorboard
+ Obtaining dependency information from tensorboard 2.7.0
+ Collecting tensorboard-plugin-wit>=1.6.0
+ Obtaining dependency information from tensorboard-plugin-wit 1.8.1
+ Collecting google-auth-oauthlib<0.5,>=0.4.1
+ Obtaining dependency information from google-auth-oauthlib 0.4.6
+ Collecting absl-py>=0.4
+ Obtaining dependency information from absl-py 1.0.0
+ Collecting protobuf>=3.6.0
+ Obtaining dependency information from protobuf 3.19.1
+ Collecting setuptools>=41.0.0
+ Obtaining dependency information from setuptools 60.3.1
+ Collecting wheel>=0.26
+ Obtaining dependency information from wheel 0.37.1
+ Collecting werkzeug>=0.11.15
+ Obtaining dependency information from werkzeug 2.0.2
+ Collecting tensorboard-data-server<0.7.0,>=0.6.0
+ Obtaining dependency information from tensorboard-data-server 0.6.1
+ Collecting markdown>=2.6.8
+ Obtaining dependency information from markdown 3.3.6
+ Collecting grpcio>=1.24.3
+ Using cached grpcio-1.43.0.tar.gz (21.5 MB)
+ Preparing metadata (setup.py) ... done
+ Collecting numpy>=1.12.0
+ Using cached numpy-1.22.0.zip (11.3 MB)
+ Installing build dependencies ... done
+ Getting requirements to build wheel ... done
+ Preparing metadata (pyproject.toml) ... done
+ Collecting requests<3,>=2.21.0
+ Obtaining dependency information from requests 2.27.1
+ Collecting google-auth<3,>=1.6.3
+ Obtaining dependency information from google-auth 2.3.3
+ Collecting six
+ Obtaining dependency information from six 1.16.0
+ Collecting pyasn1-modules>=0.2.1
+ Obtaining dependency information from pyasn1-modules 0.2.8
+ Collecting rsa<5,>=3.1.4
+ Obtaining dependency information from rsa 4.8
+ Collecting cachetools<5.0,>=2.0.0
+ Obtaining dependency information from cachetools 4.2.4
+ Collecting requests-oauthlib>=0.7.0
+ Obtaining dependency information from requests-oauthlib 1.3.0
+ Collecting charset-normalizer~=2.0.0
+ Obtaining dependency information from charset-normalizer 2.0.10
+ Collecting certifi>=2017.4.17
+ Obtaining dependency information from certifi 2021.10.8
+ Collecting idna<4,>=2.5
+ Obtaining dependency information from idna 3.3
+ Collecting urllib3<1.27,>=1.21.1
+ Obtaining dependency information from urllib3 1.26.7
+ Collecting pyasn1<0.5.0,>=0.4.6
+ Obtaining dependency information from pyasn1 0.4.8
+ Collecting oauthlib>=3.0.0
+ Obtaining dependency information from oauthlib 3.1.1
+ Python version: '==3.10.1'
+ Input requirements: 'tensorboard'
+ Resolution: 'tensorboard==2.7.0' 'absl-py==1.0.0' 'google-auth==2.3.3' 'google-auth-oauthlib==0.4.6' 'grpcio==1.43.0' 'markdown==3.3.6' 'numpy==1.22.0' 'protobuf==3.19.1' 'requests==2.27.1' 'tensorboard-data-server==0.6.1' 'tensorboard-plugin-wit==1.8.1' 'werkzeug==2.0.2' 'wheel==0.37.1' 'cachetools==4.2.4' 'certifi==2021.10.8' 'charset-normalizer==2.0.10' 'idna==3.3' 'pyasn1-modules==0.2.8' 'requests-oauthlib==1.3.0' 'rsa==4.8' 'six==1.16.0' 'urllib3==1.26.7' 'oauthlib==3.1.1' 'pyasn1==0.4.8' 'setuptools==60.3.1'
+ JSON report written to 'pip-output.json'.
+
+The contents of ``pip-output.json`` will look like:
+
+.. code-block::
+
+ {
+ "experimental": true,
+ "input_requirements": [
+ "tensorboard"
+ ],
+ "python_version": "==3.10.1",
+ "candidates": {
+ "tensorboard": {
+ "requirement": "tensorboard==2.7.0",
+ "download_info": {
+ "direct_url": {
+ "url": "https://files.pythonhosted.org/packages/2d/eb/80f75ab480cfbd032442f06ec7c15ef88376c5ef7fd6f6bf2e0e03b47e31/tensorboard-2.7.0-py3-none-any.whl",
+ "archive_info": {
+ "hash": "sha256=239f78a4a8dff200ce585a030c787773a8c1184d5c159252f5f85bac4e3c3b38"
+ }
+ },
+ "dist_info_metadata": null
+ },
+ "dependencies": {
+ "tensorboard-plugin-wit": "tensorboard-plugin-wit>=1.6.0",
+ "google-auth-oauthlib": "google-auth-oauthlib<0.5,>=0.4.1",
+ "absl-py": "absl-py>=0.4",
+ "protobuf": "protobuf>=3.6.0",
+ "setuptools": "setuptools>=41.0.0",
+ "wheel": "wheel>=0.26",
+ "werkzeug": "werkzeug>=0.11.15",
+ "tensorboard-data-server": "tensorboard-data-server<0.7.0,>=0.6.0",
+ "markdown": "markdown>=2.6.8",
+ "grpcio": "grpcio>=1.24.3",
+ "numpy": "numpy>=1.12.0",
+ "requests": "requests<3,>=2.21.0",
+ "google-auth": "google-auth<3,>=1.6.3"
+ },
+ "requires_python": ">=3.6"
+ },
+ "absl-py": {
+ "requirement": "absl-py==1.0.0",
+ "download_info": {
+ "direct_url": {
+ "url": "https://files.pythonhosted.org/packages/2c/03/e3e19d3faf430ede32e41221b294e37952e06acc96781c417ac25d4a0324/absl_py-1.0.0-py3-none-any.whl",
+ "archive_info": {
+ "hash": "sha256=84e6dcdc69c947d0c13e5457d056bd43cade4c2393dce00d684aedea77ddc2a3"
+ }
+ },
+ "dist_info_metadata": null
+ },
+ "dependencies": {
+ "six": "six"
+ },
+ "requires_python": ">=3.6"
+ },
+ (...truncated)
+
+The output can be processed with `jq `_ to produce e.g. a requirements file that pins the hashes of each dependency which provides such a hash:
+
+.. tab:: Unix/macOS
+
+ .. code-block:: console
+
+ $ jq -r <./pip-output.json '.candidates[] | {req: .requirement, hash: .download_info.direct_url.archive_info.hash} | .req + ((.hash | " --hash " + sub("="; ":")) // "")'
+ tensorboard==2.7.0 --hash sha256:239f78a4a8dff200ce585a030c787773a8c1184d5c159252f5f85bac4e3c3b38
+ absl-py==1.0.0 --hash sha256:84e6dcdc69c947d0c13e5457d056bd43cade4c2393dce00d684aedea77ddc2a3
+ google-auth==2.3.3 --hash sha256:a348a50b027679cb7dae98043ac8dbcc1d7951f06d8387496071a1e05a2465c0
+ google-auth-oauthlib==0.4.6 --hash sha256:3f2a6e802eebbb6fb736a370fbf3b055edcb6b52878bf2f26330b5e041316c73
+ grpcio==1.43.0 --hash sha256:735d9a437c262ab039d02defddcb9f8f545d7009ae61c0114e19dda3843febe5
+ markdown==3.3.6 --hash sha256:9923332318f843411e9932237530df53162e29dc7a4e2b91e35764583c46c9a3
+ numpy==1.22.0 --hash sha256:a955e4128ac36797aaffd49ab44ec74a71c11d6938df83b1285492d277db5397
+ protobuf==3.19.1 --hash sha256:e813b1c9006b6399308e917ac5d298f345d95bb31f46f02b60cd92970a9afa17
+ requests==2.27.1 --hash sha256:f22fa1e554c9ddfd16e6e41ac79759e17be9e492b3587efa038054674760e72d
+ tensorboard-data-server==0.6.1 --hash sha256:809fe9887682d35c1f7d1f54f0f40f98bb1f771b14265b453ca051e2ce58fca7
+ tensorboard-plugin-wit==1.8.1 --hash sha256:ff26bdd583d155aa951ee3b152b3d0cffae8005dc697f72b44a8e8c2a77a8cbe
+ werkzeug==2.0.2 --hash sha256:63d3dc1cf60e7b7e35e97fa9861f7397283b75d765afcaefd993d6046899de8f
+ wheel==0.37.1 --hash sha256:4bdcd7d840138086126cd09254dc6195fb4fc6f01c050a1d7236f2630db1d22a
+ cachetools==4.2.4 --hash sha256:92971d3cb7d2a97efff7c7bb1657f21a8f5fb309a37530537c71b1774189f2d1
+ certifi==2021.10.8 --hash sha256:d62a0163eb4c2344ac042ab2bdf75399a71a2d8c7d47eac2e2ee91b9d6339569
+ charset-normalizer==2.0.10 --hash sha256:cb957888737fc0bbcd78e3df769addb41fd1ff8cf950dc9e7ad7793f1bf44455
+ idna==3.3 --hash sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff
+ pyasn1-modules==0.2.8 --hash sha256:a50b808ffeb97cb3601dd25981f6b016cbb3d31fbf57a8b8a87428e6158d0c74
+ requests-oauthlib==1.3.0 --hash sha256:7f71572defaecd16372f9006f33c2ec8c077c3cfa6f5911a9a90202beb513f3d
+ rsa==4.8 --hash sha256:95c5d300c4e879ee69708c428ba566c59478fd653cc3a22243eeb8ed846950bb
+ six==1.16.0 --hash sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254
+ urllib3==1.26.7 --hash sha256:c4fdf4019605b6e5423637e01bc9fe4daef873709a7973e195ceba0a62bbc844
+ oauthlib==3.1.1 --hash sha256:42bf6354c2ed8c6acb54d971fce6f88193d97297e18602a3a886603f9d7730cc
+ pyasn1==0.4.8 --hash sha256:39c7e2ec30515947ff4e87fb6f456dfc6e84857d34be479c9d4a4ba4bf46aa5d
+ setuptools==60.3.1 --hash sha256:2932bfeb248c648dc411ea9714d5a6de7a33ef1a0db2f0fce644d8172b0479e8
+
+.. tab:: Windows
+
+ .. code-block:: console
+
+ C:\> jq -r <./pip-output.json '.candidates[] | {req: .requirement, hash: .download_info.direct_url.archive_info.hash} | .req + ((.hash | " --hash " + sub("="; ":")) // "")'
+ tensorboard==2.7.0 --hash sha256:239f78a4a8dff200ce585a030c787773a8c1184d5c159252f5f85bac4e3c3b38
+ absl-py==1.0.0 --hash sha256:84e6dcdc69c947d0c13e5457d056bd43cade4c2393dce00d684aedea77ddc2a3
+ google-auth==2.3.3 --hash sha256:a348a50b027679cb7dae98043ac8dbcc1d7951f06d8387496071a1e05a2465c0
+ google-auth-oauthlib==0.4.6 --hash sha256:3f2a6e802eebbb6fb736a370fbf3b055edcb6b52878bf2f26330b5e041316c73
+ grpcio==1.43.0 --hash sha256:735d9a437c262ab039d02defddcb9f8f545d7009ae61c0114e19dda3843febe5
+ markdown==3.3.6 --hash sha256:9923332318f843411e9932237530df53162e29dc7a4e2b91e35764583c46c9a3
+ numpy==1.22.0 --hash sha256:a955e4128ac36797aaffd49ab44ec74a71c11d6938df83b1285492d277db5397
+ protobuf==3.19.1 --hash sha256:e813b1c9006b6399308e917ac5d298f345d95bb31f46f02b60cd92970a9afa17
+ requests==2.27.1 --hash sha256:f22fa1e554c9ddfd16e6e41ac79759e17be9e492b3587efa038054674760e72d
+ tensorboard-data-server==0.6.1 --hash sha256:809fe9887682d35c1f7d1f54f0f40f98bb1f771b14265b453ca051e2ce58fca7
+ tensorboard-plugin-wit==1.8.1 --hash sha256:ff26bdd583d155aa951ee3b152b3d0cffae8005dc697f72b44a8e8c2a77a8cbe
+ werkzeug==2.0.2 --hash sha256:63d3dc1cf60e7b7e35e97fa9861f7397283b75d765afcaefd993d6046899de8f
+ wheel==0.37.1 --hash sha256:4bdcd7d840138086126cd09254dc6195fb4fc6f01c050a1d7236f2630db1d22a
+ cachetools==4.2.4 --hash sha256:92971d3cb7d2a97efff7c7bb1657f21a8f5fb309a37530537c71b1774189f2d1
+ certifi==2021.10.8 --hash sha256:d62a0163eb4c2344ac042ab2bdf75399a71a2d8c7d47eac2e2ee91b9d6339569
+ charset-normalizer==2.0.10 --hash sha256:cb957888737fc0bbcd78e3df769addb41fd1ff8cf950dc9e7ad7793f1bf44455
+ idna==3.3 --hash sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff
+ pyasn1-modules==0.2.8 --hash sha256:a50b808ffeb97cb3601dd25981f6b016cbb3d31fbf57a8b8a87428e6158d0c74
+ requests-oauthlib==1.3.0 --hash sha256:7f71572defaecd16372f9006f33c2ec8c077c3cfa6f5911a9a90202beb513f3d
+ rsa==4.8 --hash sha256:95c5d300c4e879ee69708c428ba566c59478fd653cc3a22243eeb8ed846950bb
+ six==1.16.0 --hash sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254
+ urllib3==1.26.7 --hash sha256:c4fdf4019605b6e5423637e01bc9fe4daef873709a7973e195ceba0a62bbc844
+ oauthlib==3.1.1 --hash sha256:42bf6354c2ed8c6acb54d971fce6f88193d97297e18602a3a886603f9d7730cc
+ pyasn1==0.4.8 --hash sha256:39c7e2ec30515947ff4e87fb6f456dfc6e84857d34be479c9d4a4ba4bf46aa5d
+ setuptools==60.3.1 --hash sha256:2932bfeb248c648dc411ea9714d5a6de7a33ef1a0db2f0fce644d8172b0479e8
+
+JSON schema
+-----------
+
+The JSON report is described below with fields given fake mypy type annotations corresponding to the output of `json.load() `_ upon reading the JSON report. The report contains multiple top-level fields:
+
+* ``experimental: bool``: set to ``True`` as the format is not yet stable.
+* ``input_requirements: List[str]``: strings describing the requirements provided to the pip resolver. Can be parsed with `packaging.requirements.Requirement `_.
+* ``python_version: str``: a string describing the python interpreter version the resolve was performed for. Can be parsed with `packaging.specifiers.SpecifierSet `_. Currently always an exact ``==`` constraint.
+* ``candidates: Dict``: each package that would have been downloaded with ``pip download`` is represented in the ``candidates`` dict. Each key is the ``name`` of a requirement, since each dependency package name is satisfied by exactly one candidate in the final resolve.
+
+Candidates
+----------
+
+Each element of the ``candidates`` dict has the following fields:
+
+* ``requirement: str``: an ``==`` requirement string for the exact version of the candidate that would have been fetched by ``pip download``. Can be parsed with `packaging.requirements.Requirement `_.
+* ``requires_python: Optional[str]``: a constraint on the executing python interpreter version exerted by this candidate. Can be parsed with `packaging.specifiers.SpecifierSet `_.
+* ``dependencies: List[str]``: all the dependencies required by this candidate, as requirement strings which can be parsed with `packaging.requirements.Requirement `_. Each requirement will have been satisfied by another member of the overall ``candidates`` dict.
+* ``download_info: Dict``: a link where the requirement can be directly downloaded from, along with any metadata.
+
+Download Info
+-------------
+
+The ``download_info`` object has the following fields:
+
+* ``direct_url: Dict``: a deserializable representation of a Direct URL as per :pep:`610` for this package's location, which may be a remote URL or local directory.
+* ``dist_info_metadata: Optional[Dict]``: a deserializable representation of a Direct URL as per :pep:`610` for this package's *metadata*, which may be provided for individual package downloads by a package index provided with ``-i`` which implements :pep:`658`.
+
+ If this field's value is non-``None``, it will only ever provide the ``archive_info`` key of the Direct URL JSON schema from :pep:`610`, and that key's ``hash`` field may be empty if :pep:`658`'s ``...`` was provided in the anchor tag that was parsed to obtain this candidate's download info, instead of providing a specific checksum value for the candidate's metadata with e.g. ``data-dist-info-metadata="sha256=e8413ab19..."``.
+
.. _changes-to-the-pip-dependency-resolver-in-20-2-2020:
.. _`Resolver changes 2020`:
diff --git a/news/10748.feature.rst b/news/10748.feature.rst
new file mode 100644
index 00000000000..20578ed6735
--- /dev/null
+++ b/news/10748.feature.rst
@@ -0,0 +1 @@
+Add ``--dry-run`` and ``--report`` to ``pip download`` to get a JSON resolution report.
From c068eb5e838f4c5b83f29587a379a708bd99739f Mon Sep 17 00:00:00 2001
From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com>
Date: Fri, 14 Jan 2022 23:19:45 -0500
Subject: [PATCH 4/4] add LinkHash.{from_archive_info(),__post_init__()}
these two objects are intended to be fungible, but play different roles, since LinkHash actually
parses its input and ArchiveInfo does not. ArchiveInfo's JSON (de)?serialization does not employ
hash name or value validation, while LinkHash does not offer a JSON serialization.
---
src/pip/_internal/models/link.py | 17 +++++++++++++++-
tests/unit/test_collector.py | 33 ++++++++++++++++++++++++++++++++
2 files changed, 49 insertions(+), 1 deletion(-)
diff --git a/src/pip/_internal/models/link.py b/src/pip/_internal/models/link.py
index 6f1aa62e83a..a51f9598fd5 100644
--- a/src/pip/_internal/models/link.py
+++ b/src/pip/_internal/models/link.py
@@ -37,7 +37,12 @@
class LinkHash:
"""Links to content may have embedded hash values. This class parses those.
- `name` must be any member of `_SUPPORTED_HASHES`."""
+ `name` must be any member of `_SUPPORTED_HASHES`.
+
+ This class can be converted to and from `ArchiveInfo`. While ArchiveInfo intends to
+ be JSON-serializable to conform to PEP 610, this class contains the logic for
+ parsing a hash name and value for correctness, and then checking whether that hash
+ conforms to a schema with `.is_hash_allowed()`."""
name: str
value: str
@@ -52,6 +57,9 @@ class LinkHash:
)
)
+ def __post_init__(self) -> None:
+ assert self._hash_re.match(f"{self.name}={self.value}")
+
@classmethod
@functools.lru_cache(maxsize=None)
def split_hash_name_and_value(cls, url: str) -> Optional["LinkHash"]:
@@ -66,6 +74,13 @@ def to_archive_info(self) -> ArchiveInfo:
"""Convert to ArchiveInfo to form a DirectUrl instance (see PEP 610)."""
return ArchiveInfo(hash=f"{self.name}={self.value}")
+ @classmethod
+ def from_archive_info(cls, info: ArchiveInfo) -> Optional["LinkHash"]:
+ """Parse an ArchiveInfo hash into a LinkHash instance."""
+ if info.hash is None:
+ return None
+ return cls.split_hash_name_and_value(info.hash)
+
def is_hash_allowed(self, hashes: Optional[Hashes]) -> bool:
"""
Return True if the current hash is allowed by `hashes`.
diff --git a/tests/unit/test_collector.py b/tests/unit/test_collector.py
index b7e5a5bd9ce..e0d7277403d 100644
--- a/tests/unit/test_collector.py
+++ b/tests/unit/test_collector.py
@@ -1048,3 +1048,36 @@ def expand_path(path: str) -> str:
)
def test_link_hash_parsing(url: str, result: Optional[LinkHash]) -> None:
assert LinkHash.split_hash_name_and_value(url) == result
+
+
+@pytest.mark.parametrize(
+ "archive_info, link_hash",
+ [
+ (
+ ArchiveInfo(hash=None),
+ None,
+ ),
+ (
+ ArchiveInfo(hash="sha256=aabe42af"),
+ LinkHash(name="sha256", value="aabe42af"),
+ ),
+ # Test invalid hash strings, which ArchiveInfo doesn't validate.
+ (
+ # Invalid hash name.
+ ArchiveInfo(hash="sha500=aabe42af"),
+ None,
+ ),
+ (
+ # Invalid hash value.
+ ArchiveInfo(hash="sha256=g42afbe"),
+ None,
+ ),
+ ],
+)
+def test_link_hash_archive_info_fungibility(
+ archive_info: ArchiveInfo,
+ link_hash: Optional[LinkHash],
+) -> None:
+ assert LinkHash.from_archive_info(archive_info) == link_hash
+ if link_hash is not None:
+ assert link_hash.to_archive_info() == archive_info