Skip to content

Commit 353b8b0

Browse files
create LinkHash and check out dist-info-metadata (PEP 658)
- use PEP 658 dist-info-metadata when --use-feature=fast-deps is on - add testing
1 parent c86f9f1 commit 353b8b0

File tree

10 files changed

+684
-142
lines changed

10 files changed

+684
-142
lines changed

src/pip/_internal/index/collector.py

+4-99
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,8 @@
88
import itertools
99
import logging
1010
import os
11-
import re
1211
import urllib.parse
1312
import urllib.request
14-
import xml.etree.ElementTree
1513
from html.parser import HTMLParser
1614
from optparse import Values
1715
from typing import (
@@ -33,12 +31,12 @@
3331
from pip._vendor.requests.exceptions import RetryError, SSLError
3432

3533
from pip._internal.exceptions import NetworkConnectionError
36-
from pip._internal.models.link import Link
34+
from pip._internal.models.link import HTMLElement, Link
3735
from pip._internal.models.search_scope import SearchScope
3836
from pip._internal.network.session import PipSession
3937
from pip._internal.network.utils import raise_for_status
4038
from pip._internal.utils.filetypes import is_archive_file
41-
from pip._internal.utils.misc import pairwise, redact_auth_from_url
39+
from pip._internal.utils.misc import redact_auth_from_url
4240
from pip._internal.vcs import vcs
4341

4442
from .sources import CandidatesFromPage, LinkSource, build_source
@@ -50,7 +48,6 @@
5048

5149
logger = logging.getLogger(__name__)
5250

53-
HTMLElement = xml.etree.ElementTree.Element
5451
ResponseHeaders = MutableMapping[str, str]
5552

5653

@@ -182,94 +179,6 @@ def _determine_base_url(document: HTMLElement, page_url: str) -> str:
182179
return page_url
183180

184181

185-
def _clean_url_path_part(part: str) -> str:
186-
"""
187-
Clean a "part" of a URL path (i.e. after splitting on "@" characters).
188-
"""
189-
# We unquote prior to quoting to make sure nothing is double quoted.
190-
return urllib.parse.quote(urllib.parse.unquote(part))
191-
192-
193-
def _clean_file_url_path(part: str) -> str:
194-
"""
195-
Clean the first part of a URL path that corresponds to a local
196-
filesystem path (i.e. the first part after splitting on "@" characters).
197-
"""
198-
# We unquote prior to quoting to make sure nothing is double quoted.
199-
# Also, on Windows the path part might contain a drive letter which
200-
# should not be quoted. On Linux where drive letters do not
201-
# exist, the colon should be quoted. We rely on urllib.request
202-
# to do the right thing here.
203-
return urllib.request.pathname2url(urllib.request.url2pathname(part))
204-
205-
206-
# percent-encoded: /
207-
_reserved_chars_re = re.compile("(@|%2F)", re.IGNORECASE)
208-
209-
210-
def _clean_url_path(path: str, is_local_path: bool) -> str:
211-
"""
212-
Clean the path portion of a URL.
213-
"""
214-
if is_local_path:
215-
clean_func = _clean_file_url_path
216-
else:
217-
clean_func = _clean_url_path_part
218-
219-
# Split on the reserved characters prior to cleaning so that
220-
# revision strings in VCS URLs are properly preserved.
221-
parts = _reserved_chars_re.split(path)
222-
223-
cleaned_parts = []
224-
for to_clean, reserved in pairwise(itertools.chain(parts, [""])):
225-
cleaned_parts.append(clean_func(to_clean))
226-
# Normalize %xx escapes (e.g. %2f -> %2F)
227-
cleaned_parts.append(reserved.upper())
228-
229-
return "".join(cleaned_parts)
230-
231-
232-
def _clean_link(url: str) -> str:
233-
"""
234-
Make sure a link is fully quoted.
235-
For example, if ' ' occurs in the URL, it will be replaced with "%20",
236-
and without double-quoting other characters.
237-
"""
238-
# Split the URL into parts according to the general structure
239-
# `scheme://netloc/path;parameters?query#fragment`.
240-
result = urllib.parse.urlparse(url)
241-
# If the netloc is empty, then the URL refers to a local filesystem path.
242-
is_local_path = not result.netloc
243-
path = _clean_url_path(result.path, is_local_path=is_local_path)
244-
return urllib.parse.urlunparse(result._replace(path=path))
245-
246-
247-
def _create_link_from_element(
248-
element_attribs: Dict[str, Optional[str]],
249-
page_url: str,
250-
base_url: str,
251-
) -> Optional[Link]:
252-
"""
253-
Convert an anchor element's attributes in a simple repository page to a Link.
254-
"""
255-
href = element_attribs.get("href")
256-
if not href:
257-
return None
258-
259-
url = _clean_link(urllib.parse.urljoin(base_url, href))
260-
pyrequire = element_attribs.get("data-requires-python")
261-
yanked_reason = element_attribs.get("data-yanked")
262-
263-
link = Link(
264-
url,
265-
comes_from=page_url,
266-
requires_python=pyrequire,
267-
yanked_reason=yanked_reason,
268-
)
269-
270-
return link
271-
272-
273182
class CacheablePageContent:
274183
def __init__(self, page: "HTMLPage") -> None:
275184
assert page.cache_link_parsing
@@ -326,7 +235,7 @@ def _parse_links_html5lib(page: "HTMLPage") -> Iterable[Link]:
326235
url = page.url
327236
base_url = _determine_base_url(document, url)
328237
for anchor in document.findall(".//a"):
329-
link = _create_link_from_element(
238+
link = Link.from_element(
330239
anchor.attrib,
331240
page_url=url,
332241
base_url=base_url,
@@ -353,11 +262,7 @@ def parse_links(page: "HTMLPage", use_deprecated_html5lib: bool) -> Iterable[Lin
353262
url = page.url
354263
base_url = parser.base_url or url
355264
for anchor in parser.anchors:
356-
link = _create_link_from_element(
357-
anchor,
358-
page_url=url,
359-
base_url=base_url,
360-
)
265+
link = Link.from_element(anchor, page_url=url, base_url=base_url)
361266
if link is None:
362267
continue
363268
yield link

src/pip/_internal/metadata/__init__.py

+21
Original file line numberDiff line numberDiff line change
@@ -103,3 +103,24 @@ def get_wheel_distribution(wheel: Wheel, canonical_name: str) -> BaseDistributio
103103
:param canonical_name: Normalized project name of the given wheel.
104104
"""
105105
return select_backend().Distribution.from_wheel(wheel, canonical_name)
106+
107+
108+
def get_metadata_distribution(
109+
metadata_path: str,
110+
filename: str,
111+
canonical_name: str,
112+
) -> BaseDistribution:
113+
"""Get the representation of the specified METADATA file.
114+
115+
This returns a Distribution instance from the chosen backend based on the contents
116+
of the file at ``metadata_path``.
117+
118+
:param metadata_path: Path to the METADATA file.
119+
:param filename: Filename for the dist this metadata represents.
120+
:param canonical_name: Normalized project name of the given dist.
121+
"""
122+
return select_backend().Distribution.from_metadata_file(
123+
metadata_path,
124+
filename,
125+
canonical_name,
126+
)

src/pip/_internal/metadata/base.py

+15
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,21 @@ def from_directory(cls, directory: str) -> "BaseDistribution":
100100
"""
101101
raise NotImplementedError()
102102

103+
@classmethod
104+
def from_metadata_file(
105+
cls,
106+
metadata_path: str,
107+
filename: str,
108+
project_name: str,
109+
) -> "BaseDistribution":
110+
"""Load the distribution from the contents of a METADATA file.
111+
112+
:param metadata: The path to a METADATA file.
113+
:param filename: File name for the dist with this metadata.
114+
:param project_name: Name of the project this dist represents.
115+
"""
116+
raise NotImplementedError()
117+
103118
@classmethod
104119
def from_wheel(cls, wheel: "Wheel", name: str) -> "BaseDistribution":
105120
"""Load the distribution from a given wheel.

src/pip/_internal/metadata/importlib/_dists.py

+11
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,17 @@ def from_directory(cls, directory: str) -> BaseDistribution:
115115
dist = importlib.metadata.Distribution.at(info_location)
116116
return cls(dist, info_location, info_location.parent)
117117

118+
@classmethod
119+
def from_metadata_file(
120+
cls,
121+
metadata_path: str,
122+
filename: str,
123+
project_name: str,
124+
) -> BaseDistribution:
125+
metadata_location = pathlib.Path(metadata_path)
126+
dist = importlib.metadata.Distribution.at(metadata_location.parent)
127+
return cls(dist, metadata_location.parent, None)
128+
118129
@classmethod
119130
def from_wheel(cls, wheel: Wheel, name: str) -> BaseDistribution:
120131
try:

src/pip/_internal/metadata/pkg_resources.py

+19
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,25 @@ def from_directory(cls, directory: str) -> BaseDistribution:
9292
dist = dist_cls(base_dir, project_name=dist_name, metadata=metadata)
9393
return cls(dist)
9494

95+
@classmethod
96+
def from_metadata_file(
97+
cls,
98+
metadata_path: str,
99+
filename: str,
100+
project_name: str,
101+
) -> BaseDistribution:
102+
with open(metadata_path, "rb") as f:
103+
metadata = f.read()
104+
metadata_text = {
105+
"METADATA": metadata,
106+
}
107+
dist = pkg_resources.DistInfoDistribution(
108+
location=filename,
109+
metadata=WheelMetadata(metadata_text, filename),
110+
project_name=project_name,
111+
)
112+
return cls(dist)
113+
95114
@classmethod
96115
def from_wheel(cls, wheel: Wheel, name: str) -> BaseDistribution:
97116
try:

0 commit comments

Comments
 (0)