Skip to content

Commit 3ec5889

Browse files
implement --report
- create LinkWithSource to retain attrs from InstallRequirement - add tests for report output for top-level requirements - add tests for more of the report JSON format - add passing tests for JSON report output including PEP 658! - add docstrings to several classes and functions, including tests! - move the --report implementation into resolvelib - use an abstract base class instead of a Union for InfoType - use frozen dataclasses for InfoType subclasses
1 parent 39ccd3f commit 3ec5889

20 files changed

+1107
-237
lines changed

src/pip/_internal/commands/download.py

+41-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import json
12
import logging
23
import os
34
from optparse import Values
@@ -7,7 +8,10 @@
78
from pip._internal.cli.cmdoptions import make_target_python
89
from pip._internal.cli.req_command import RequirementCommand, with_cleanup
910
from pip._internal.cli.status_codes import SUCCESS
11+
from pip._internal.exceptions import CommandError
1012
from pip._internal.operations.build.build_tracker import get_build_tracker
13+
from pip._internal.resolution.base import RequirementSetWithCandidates
14+
from pip._internal.resolution.resolvelib.reporter import ResolutionResult
1115
from pip._internal.utils.misc import ensure_dir, normalize_path, write_output
1216
from pip._internal.utils.temp_dir import TempDirectory
1317

@@ -66,7 +70,22 @@ def add_options(self) -> None:
6670
"--dry-run",
6771
dest="dry_run",
6872
action="store_true",
69-
help="Avoid actually downloading wheels.",
73+
help=(
74+
"Avoid actually downloading wheels or sdists. "
75+
"Intended to be used with --report."
76+
),
77+
)
78+
79+
self.cmd_opts.add_option(
80+
"--report",
81+
"--resolution-report",
82+
dest="json_report_file",
83+
metavar="file",
84+
default=None,
85+
help=(
86+
"Print a JSON object representing the resolve into <file>. "
87+
"Often used with --dry-run."
88+
),
7089
)
7190

7291
cmdoptions.add_target_python_options(self.cmd_opts)
@@ -146,4 +165,25 @@ def run(self, options: Values, args: List[str]) -> int:
146165
if downloaded:
147166
write_output("Successfully downloaded %s", " ".join(downloaded))
148167

168+
# The rest of this method pertains to generating the ResolutionReport with
169+
# --report.
170+
if not options.json_report_file:
171+
return SUCCESS
172+
if not isinstance(requirement_set, RequirementSetWithCandidates):
173+
raise CommandError(
174+
"The legacy resolver is being used via "
175+
"--use-deprecated=legacy-resolver."
176+
"The legacy resolver does not retain detailed dependency information, "
177+
"so `pip download --report` cannot be used with it. "
178+
)
179+
180+
resolution_result = ResolutionResult.generate_resolve_report(
181+
reqs, requirement_set
182+
)
183+
184+
# Write the full report data to the JSON output file.
185+
with open(options.json_report_file, "w") as f:
186+
json.dump(resolution_result.to_dict(), f, indent=4)
187+
write_output(f"JSON report written to '{options.json_report_file}'.")
188+
149189
return SUCCESS

src/pip/_internal/index/collector.py

+4-98
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
import itertools
99
import logging
1010
import os
11-
import re
1211
import urllib.parse
1312
import urllib.request
1413
import xml.etree.ElementTree
@@ -33,12 +32,12 @@
3332
from pip._vendor.requests.exceptions import RetryError, SSLError
3433

3534
from pip._internal.exceptions import NetworkConnectionError
36-
from pip._internal.models.link import Link
35+
from pip._internal.models.link import HTMLElement, Link
3736
from pip._internal.models.search_scope import SearchScope
3837
from pip._internal.network.session import PipSession
3938
from pip._internal.network.utils import raise_for_status
4039
from pip._internal.utils.filetypes import is_archive_file
41-
from pip._internal.utils.misc import pairwise, redact_auth_from_url
40+
from pip._internal.utils.misc import redact_auth_from_url
4241
from pip._internal.vcs import vcs
4342

4443
from .sources import CandidatesFromPage, LinkSource, build_source
@@ -50,7 +49,6 @@
5049

5150
logger = logging.getLogger(__name__)
5251

53-
HTMLElement = xml.etree.ElementTree.Element
5452
ResponseHeaders = MutableMapping[str, str]
5553

5654

@@ -182,94 +180,6 @@ def _determine_base_url(document: HTMLElement, page_url: str) -> str:
182180
return page_url
183181

184182

185-
def _clean_url_path_part(part: str) -> str:
186-
"""
187-
Clean a "part" of a URL path (i.e. after splitting on "@" characters).
188-
"""
189-
# We unquote prior to quoting to make sure nothing is double quoted.
190-
return urllib.parse.quote(urllib.parse.unquote(part))
191-
192-
193-
def _clean_file_url_path(part: str) -> str:
194-
"""
195-
Clean the first part of a URL path that corresponds to a local
196-
filesystem path (i.e. the first part after splitting on "@" characters).
197-
"""
198-
# We unquote prior to quoting to make sure nothing is double quoted.
199-
# Also, on Windows the path part might contain a drive letter which
200-
# should not be quoted. On Linux where drive letters do not
201-
# exist, the colon should be quoted. We rely on urllib.request
202-
# to do the right thing here.
203-
return urllib.request.pathname2url(urllib.request.url2pathname(part))
204-
205-
206-
# percent-encoded: /
207-
_reserved_chars_re = re.compile("(@|%2F)", re.IGNORECASE)
208-
209-
210-
def _clean_url_path(path: str, is_local_path: bool) -> str:
211-
"""
212-
Clean the path portion of a URL.
213-
"""
214-
if is_local_path:
215-
clean_func = _clean_file_url_path
216-
else:
217-
clean_func = _clean_url_path_part
218-
219-
# Split on the reserved characters prior to cleaning so that
220-
# revision strings in VCS URLs are properly preserved.
221-
parts = _reserved_chars_re.split(path)
222-
223-
cleaned_parts = []
224-
for to_clean, reserved in pairwise(itertools.chain(parts, [""])):
225-
cleaned_parts.append(clean_func(to_clean))
226-
# Normalize %xx escapes (e.g. %2f -> %2F)
227-
cleaned_parts.append(reserved.upper())
228-
229-
return "".join(cleaned_parts)
230-
231-
232-
def _clean_link(url: str) -> str:
233-
"""
234-
Make sure a link is fully quoted.
235-
For example, if ' ' occurs in the URL, it will be replaced with "%20",
236-
and without double-quoting other characters.
237-
"""
238-
# Split the URL into parts according to the general structure
239-
# `scheme://netloc/path;parameters?query#fragment`.
240-
result = urllib.parse.urlparse(url)
241-
# If the netloc is empty, then the URL refers to a local filesystem path.
242-
is_local_path = not result.netloc
243-
path = _clean_url_path(result.path, is_local_path=is_local_path)
244-
return urllib.parse.urlunparse(result._replace(path=path))
245-
246-
247-
def _create_link_from_element(
248-
element_attribs: Dict[str, Optional[str]],
249-
page_url: str,
250-
base_url: str,
251-
) -> Optional[Link]:
252-
"""
253-
Convert an anchor element's attributes in a simple repository page to a Link.
254-
"""
255-
href = element_attribs.get("href")
256-
if not href:
257-
return None
258-
259-
url = _clean_link(urllib.parse.urljoin(base_url, href))
260-
pyrequire = element_attribs.get("data-requires-python")
261-
yanked_reason = element_attribs.get("data-yanked")
262-
263-
link = Link(
264-
url,
265-
comes_from=page_url,
266-
requires_python=pyrequire,
267-
yanked_reason=yanked_reason,
268-
)
269-
270-
return link
271-
272-
273183
class CacheablePageContent:
274184
def __init__(self, page: "HTMLPage") -> None:
275185
assert page.cache_link_parsing
@@ -326,7 +236,7 @@ def _parse_links_html5lib(page: "HTMLPage") -> Iterable[Link]:
326236
url = page.url
327237
base_url = _determine_base_url(document, url)
328238
for anchor in document.findall(".//a"):
329-
link = _create_link_from_element(
239+
link = Link.from_element(
330240
anchor.attrib,
331241
page_url=url,
332242
base_url=base_url,
@@ -353,11 +263,7 @@ def parse_links(page: "HTMLPage", use_deprecated_html5lib: bool) -> Iterable[Lin
353263
url = page.url
354264
base_url = parser.base_url or url
355265
for anchor in parser.anchors:
356-
link = _create_link_from_element(
357-
anchor,
358-
page_url=url,
359-
base_url=base_url,
360-
)
266+
link = Link.from_element(anchor, page_url=url, base_url=base_url)
361267
if link is None:
362268
continue
363269
yield link

src/pip/_internal/metadata/base.py

+3
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,9 @@ def __repr__(self) -> str:
120120
def __str__(self) -> str:
121121
return f"{self.raw_name} {self.version}"
122122

123+
def as_serializable_requirement(self) -> Requirement:
124+
raise NotImplementedError()
125+
123126
@property
124127
def location(self) -> Optional[str]:
125128
"""Where the distribution is loaded from.

src/pip/_internal/metadata/pkg_resources.py

+3
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,9 @@ def from_wheel(cls, wheel: Wheel, name: str) -> BaseDistribution:
113113
)
114114
return cls(dist)
115115

116+
def as_serializable_requirement(self) -> Requirement:
117+
return self._dist.as_requirement()
118+
116119
@property
117120
def location(self) -> Optional[str]:
118121
return self._dist.location

src/pip/_internal/models/direct_url.py

+57-52
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
""" PEP 610 """
2+
import abc
23
import json
34
import re
45
import urllib.parse
5-
from typing import Any, Dict, Iterable, Optional, Type, TypeVar, Union
6+
from dataclasses import dataclass
7+
from typing import Any, ClassVar, Dict, Iterable, Optional, Type, TypeVar
68

79
__all__ = [
810
"DirectUrl",
@@ -47,8 +49,39 @@ def _get_required(
4749
return value
4850

4951

50-
def _exactly_one_of(infos: Iterable[Optional["InfoType"]]) -> "InfoType":
51-
infos = [info for info in infos if info is not None]
52+
def _filter_none(**kwargs: Any) -> Dict[str, Any]:
53+
"""Make dict excluding None values."""
54+
return {k: v for k, v in kwargs.items() if v is not None}
55+
56+
57+
class InfoType(metaclass=abc.ABCMeta):
58+
"""Superclass for the types of metadata that can be stored within a "direct URL"."""
59+
60+
name: ClassVar[str]
61+
62+
@classmethod
63+
@abc.abstractmethod
64+
def _from_dict(cls: Type[T], d: Optional[Dict[str, Any]]) -> Optional[T]:
65+
"""Parse an instance of this class from a JSON-serializable dict."""
66+
67+
@abc.abstractmethod
68+
def _to_dict(self) -> Dict[str, Any]:
69+
"""Produce a JSON-serializable dict which can be parsed with `._from_dict()`."""
70+
71+
@classmethod
72+
def from_dict(cls, d: Dict[str, Any]) -> "InfoType":
73+
"""Parse exactly one of the known subclasses from the dict `d`."""
74+
return _exactly_one_of(
75+
[
76+
ArchiveInfo._from_dict(_get(d, dict, "archive_info")),
77+
DirInfo._from_dict(_get(d, dict, "dir_info")),
78+
VcsInfo._from_dict(_get(d, dict, "vcs_info")),
79+
]
80+
)
81+
82+
83+
def _exactly_one_of(infos: Iterable[Optional[InfoType]]) -> InfoType:
84+
infos = list(filter(None, infos))
5285
if not infos:
5386
raise DirectUrlValidationError(
5487
"missing one of archive_info, dir_info, vcs_info"
@@ -61,23 +94,15 @@ def _exactly_one_of(infos: Iterable[Optional["InfoType"]]) -> "InfoType":
6194
return infos[0]
6295

6396

64-
def _filter_none(**kwargs: Any) -> Dict[str, Any]:
65-
"""Make dict excluding None values."""
66-
return {k: v for k, v in kwargs.items() if v is not None}
67-
68-
69-
class VcsInfo:
70-
name = "vcs_info"
97+
@dataclass(frozen=True)
98+
class VcsInfo(InfoType):
99+
vcs: str
100+
commit_id: str
101+
requested_revision: Optional[str] = None
102+
resolved_revision: Optional[str] = None
103+
resolved_revision_type: Optional[str] = None
71104

72-
def __init__(
73-
self,
74-
vcs: str,
75-
commit_id: str,
76-
requested_revision: Optional[str] = None,
77-
) -> None:
78-
self.vcs = vcs
79-
self.requested_revision = requested_revision
80-
self.commit_id = commit_id
105+
name: ClassVar[str] = "vcs_info"
81106

82107
@classmethod
83108
def _from_dict(cls, d: Optional[Dict[str, Any]]) -> Optional["VcsInfo"]:
@@ -97,14 +122,11 @@ def _to_dict(self) -> Dict[str, Any]:
97122
)
98123

99124

100-
class ArchiveInfo:
101-
name = "archive_info"
125+
@dataclass(frozen=True)
126+
class ArchiveInfo(InfoType):
127+
hash: Optional[str] = None
102128

103-
def __init__(
104-
self,
105-
hash: Optional[str] = None,
106-
) -> None:
107-
self.hash = hash
129+
name: ClassVar[str] = "archive_info"
108130

109131
@classmethod
110132
def _from_dict(cls, d: Optional[Dict[str, Any]]) -> Optional["ArchiveInfo"]:
@@ -116,14 +138,11 @@ def _to_dict(self) -> Dict[str, Any]:
116138
return _filter_none(hash=self.hash)
117139

118140

119-
class DirInfo:
120-
name = "dir_info"
141+
@dataclass(frozen=True)
142+
class DirInfo(InfoType):
143+
editable: bool = False
121144

122-
def __init__(
123-
self,
124-
editable: bool = False,
125-
) -> None:
126-
self.editable = editable
145+
name: ClassVar[str] = "dir_info"
127146

128147
@classmethod
129148
def _from_dict(cls, d: Optional[Dict[str, Any]]) -> Optional["DirInfo"]:
@@ -135,19 +154,11 @@ def _to_dict(self) -> Dict[str, Any]:
135154
return _filter_none(editable=self.editable or None)
136155

137156

138-
InfoType = Union[ArchiveInfo, DirInfo, VcsInfo]
139-
140-
157+
@dataclass(frozen=True)
141158
class DirectUrl:
142-
def __init__(
143-
self,
144-
url: str,
145-
info: InfoType,
146-
subdirectory: Optional[str] = None,
147-
) -> None:
148-
self.url = url
149-
self.info = info
150-
self.subdirectory = subdirectory
159+
url: str
160+
info: InfoType
161+
subdirectory: Optional[str] = None
151162

152163
def _remove_auth_from_netloc(self, netloc: str) -> str:
153164
if "@" not in netloc:
@@ -184,13 +195,7 @@ def from_dict(cls, d: Dict[str, Any]) -> "DirectUrl":
184195
return DirectUrl(
185196
url=_get_required(d, str, "url"),
186197
subdirectory=_get(d, str, "subdirectory"),
187-
info=_exactly_one_of(
188-
[
189-
ArchiveInfo._from_dict(_get(d, dict, "archive_info")),
190-
DirInfo._from_dict(_get(d, dict, "dir_info")),
191-
VcsInfo._from_dict(_get(d, dict, "vcs_info")),
192-
]
193-
),
198+
info=InfoType.from_dict(d),
194199
)
195200

196201
def to_dict(self) -> Dict[str, Any]:

0 commit comments

Comments
 (0)