Skip to content

Commit cb9636e

Browse files
use a nice dataclass to decouple hash parsing from Link
1 parent 6d3c6d3 commit cb9636e

File tree

3 files changed

+65
-66
lines changed

3 files changed

+65
-66
lines changed

src/pip/_internal/commands/download.py

+12-45
Original file line numberDiff line numberDiff line change
@@ -11,65 +11,32 @@
1111
from pip._internal.cli.cmdoptions import make_target_python
1212
from pip._internal.cli.req_command import RequirementCommand, with_cleanup
1313
from pip._internal.cli.status_codes import SUCCESS
14-
from pip._internal.models.link import Link
14+
from pip._internal.models.link import Link, LinkHash
1515
from pip._internal.req.req_tracker import get_requirement_tracker
1616
from pip._internal.utils.misc import ensure_dir, normalize_path, write_output
1717
from pip._internal.utils.temp_dir import TempDirectory
1818

1919
logger = logging.getLogger(__name__)
2020

2121

22-
@dataclass
23-
class RequirementHash:
24-
hash_name: str
25-
hash_value: str
26-
27-
@classmethod
28-
def from_dist_info_metadata(
29-
cls,
30-
dist_info_metadata: Optional[str],
31-
) -> Optional["RequirementHash"]:
32-
if dist_info_metadata is None:
33-
return None
34-
if dist_info_metadata == "true":
35-
return None
36-
# FIXME: don't use private `_hash_re`!
37-
hash_match = Link._hash_re.match(dist_info_metadata)
38-
if hash_match is None:
39-
return None
40-
hash_name, hash_value = hash_match.groups()
41-
return cls(hash_name=hash_name, hash_value=hash_value)
42-
43-
@classmethod
44-
def from_link(cls, link: Link) -> Optional["RequirementHash"]:
45-
if not link.is_hash_allowed:
46-
return None
47-
hash_name = link.hash_name
48-
hash_value = link.hash
49-
assert hash_name is not None
50-
assert hash_value is not None
51-
return cls(hash_name=hash_name, hash_value=hash_value)
52-
53-
def as_json(self) -> Dict[str, str]:
54-
return {
55-
"hash_name": self.hash_name,
56-
"hash_value": self.hash_value,
57-
}
58-
59-
60-
@dataclass
22+
@dataclass(frozen=True)
6123
class DistInfoMetadata:
6224
"""???/From PEP 658"""
6325

6426
metadata_url: str
65-
metadata_hash: Optional[RequirementHash]
27+
metadata_hash: Optional[LinkHash]
6628

6729
@classmethod
6830
def from_link(cls, link: Link) -> Optional["DistInfoMetadata"]:
6931
if link.dist_info_metadata is None:
7032
return None
33+
7134
metadata_url = f"{link.url_without_fragment}.metadata"
72-
metadata_hash = RequirementHash.from_dist_info_metadata(link.dist_info_metadata)
35+
if link.dist_info_metadata == "true":
36+
metadata_hash = None
37+
else:
38+
metadata_hash = LinkHash.split_hash_name_and_value(link.dist_info_metadata)
39+
7340
return cls(metadata_url=metadata_url, metadata_hash=metadata_hash)
7441

7542
def as_json(self) -> Dict[str, Union[str, Optional[Dict[str, str]]]]:
@@ -81,11 +48,11 @@ def as_json(self) -> Dict[str, Union[str, Optional[Dict[str, str]]]]:
8148
}
8249

8350

84-
@dataclass
51+
@dataclass(frozen=True)
8552
class RequirementDownloadInfo:
8653
req: Requirement
8754
url: str
88-
file_hash: Optional[RequirementHash]
55+
file_hash: Optional[LinkHash]
8956
dist_info_metadata: Optional[DistInfoMetadata]
9057

9158
@classmethod
@@ -97,7 +64,7 @@ def from_req_and_link(
9764
return cls(
9865
req=req,
9966
url=link.url,
100-
file_hash=RequirementHash.from_link(link),
67+
file_hash=link.get_link_hash(),
10168
dist_info_metadata=DistInfoMetadata.from_link(link),
10269
)
10370

src/pip/_internal/models/link.py

+50-17
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import re
77
import urllib.parse
88
import xml.etree.ElementTree
9+
from dataclasses import dataclass
910
from typing import TYPE_CHECKING, Dict, List, NamedTuple, Optional, Tuple, Union
1011

1112
from pip._internal.utils.filetypes import WHEEL_EXTENSION
@@ -31,6 +32,42 @@
3132
_SUPPORTED_HASHES = ("sha1", "sha224", "sha384", "sha256", "sha512", "md5")
3233

3334

35+
# FIXME: test this!!
36+
@dataclass(frozen=True)
37+
class LinkHash:
38+
name: str
39+
value: str
40+
41+
# TODO: consider beginning/ending this with \b?
42+
# TODO: consider re.IGNORECASE?
43+
_hash_re = re.compile(
44+
r"({choices})=([a-f0-9]+)".format(choices="|".join(_SUPPORTED_HASHES))
45+
)
46+
47+
@classmethod
48+
@functools.lru_cache(maxsize=None)
49+
def split_hash_name_and_value(cls, url: str) -> Optional["LinkHash"]:
50+
match = cls._hash_re.search(url)
51+
if match is None:
52+
return None
53+
name, value = match.groups()
54+
return cls(name=name, value=value)
55+
56+
def is_hash_allowed(self, hashes: Optional[Hashes]) -> bool:
57+
"""
58+
Return True if the current hash is allowed by `hashes`.
59+
"""
60+
if hashes is None:
61+
return False
62+
return hashes.is_hash_allowed(self.name, hex_digest=self.value)
63+
64+
def as_json(self) -> Dict[str, str]:
65+
return {
66+
"name": self.name,
67+
"value": self.value,
68+
}
69+
70+
3471
def _clean_url_path_part(part: str) -> str:
3572
"""
3673
Clean a "part" of a URL path (i.e. after splitting on "@" characters).
@@ -266,22 +303,21 @@ def subdirectory_fragment(self) -> Optional[str]:
266303
return None
267304
return match.group(1)
268305

269-
_hash_re = re.compile(
270-
r"({choices})=([a-f0-9]+)".format(choices="|".join(_SUPPORTED_HASHES))
271-
)
306+
def get_link_hash(self) -> Optional[LinkHash]:
307+
return LinkHash.split_hash_name_and_value(self._url)
272308

273309
@property
274310
def hash(self) -> Optional[str]:
275-
match = self._hash_re.search(self._url)
276-
if match:
277-
return match.group(2)
311+
link_hash = self.get_link_hash()
312+
if link_hash is not None:
313+
return link_hash.value
278314
return None
279315

280316
@property
281317
def hash_name(self) -> Optional[str]:
282-
match = self._hash_re.search(self._url)
283-
if match:
284-
return match.group(1)
318+
link_hash = self.get_link_hash()
319+
if link_hash is not None:
320+
return link_hash.name
285321
return None
286322

287323
@property
@@ -311,19 +347,16 @@ def is_yanked(self) -> bool:
311347

312348
@property
313349
def has_hash(self) -> bool:
314-
return self.hash_name is not None
350+
return self.get_link_hash() is not None
315351

316352
def is_hash_allowed(self, hashes: Optional[Hashes]) -> bool:
317353
"""
318-
Return True if the link has a hash and it is allowed.
354+
Return True if the link has a hash and it is allowed by `hashes`.
319355
"""
320-
if hashes is None or not self.has_hash:
356+
link_hash = self.get_link_hash()
357+
if link_hash is None:
321358
return False
322-
# Assert non-None so mypy knows self.hash_name and self.hash are str.
323-
assert self.hash_name is not None
324-
assert self.hash is not None
325-
326-
return hashes.is_hash_allowed(self.hash_name, hex_digest=self.hash)
359+
return link_hash.is_hash_allowed(hashes)
327360

328361

329362
class _CleanResult(NamedTuple):

tests/unit/test_collector.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import logging
33
import os.path
44
import re
5+
import urllib.parse
56
import urllib.request
67
import uuid
78
from textwrap import dedent
@@ -15,8 +16,6 @@
1516
from pip._internal.index.collector import (
1617
HTMLPage,
1718
LinkCollector,
18-
_clean_link,
19-
_clean_url_path,
2019
_determine_base_url,
2120
_get_html_page,
2221
_get_html_response,
@@ -28,7 +27,7 @@
2827
from pip._internal.index.sources import _FlatDirectorySource, _IndexDirectorySource
2928
from pip._internal.models.candidate import InstallationCandidate
3029
from pip._internal.models.index import PyPI
31-
from pip._internal.models.link import Link
30+
from pip._internal.models.link import Link, _clean_link, _clean_url_path
3231
from pip._internal.network.session import PipSession
3332
from tests.lib import TestData, make_test_link_collector
3433
from tests.lib.path import Path
@@ -415,7 +414,7 @@ def test_clean_url_path_with_local_path(path: str, expected: str) -> None:
415414
],
416415
)
417416
def test_clean_link(url: str, clean_url: str) -> None:
418-
assert _clean_link(url) == clean_url
417+
assert _clean_link(Link(url)).parsed == urllib.parse.urlsplit(clean_url)
419418

420419

421420
def _test_parse_links_data_attribute(

0 commit comments

Comments
 (0)