diff --git a/news/6272.bugfix b/news/6272.bugfix new file mode 100644 index 00000000000..401b7b344a3 --- /dev/null +++ b/news/6272.bugfix @@ -0,0 +1 @@ +Parse text in HTML anchors when collecting links to conform with PEP 503. diff --git a/src/pip/_internal/index.py b/src/pip/_internal/index.py index 8bdf041fdf0..7614f6b6d99 100644 --- a/src/pip/_internal/index.py +++ b/src/pip/_internal/index.py @@ -34,7 +34,7 @@ from pip._internal.utils.logging import indent_log from pip._internal.utils.misc import ( ARCHIVE_EXTENSIONS, SUPPORTED_EXTENSIONS, WHEEL_EXTENSION, normalize_path, - redact_password_from_url, + splitext, redact_password_from_url, ) from pip._internal.utils.packaging import check_requires_python from pip._internal.utils.typing import MYPY_CHECK_RUNNING @@ -54,6 +54,7 @@ SecureOrigin = Tuple[str, str, Optional[str]] BuildTag = Tuple[Any, ...] # either emply tuple or Tuple[int, str] CandidateSortingKey = Tuple[int, _BaseVersion, BuildTag, Optional[int]] + Anchor = Tuple[str, Link] __all__ = ['FormatControl', 'PackageFinder'] @@ -568,7 +569,7 @@ def find_all_candidates(self, project_name): This checks index_urls and find_links. All versions found are returned as an InstallationCandidate list. - See _link_package_versions for details on which files are accepted + See _anchor_package_versions for details on which files are accepted """ index_locations = self._get_index_urls_locations(project_name) index_file_loc, index_url_loc = self._sort_locations(index_locations) @@ -576,10 +577,6 @@ def find_all_candidates(self, project_name): self.find_links, expand_dir=True, ) - file_locations = (Link(url) for url in itertools.chain( - index_file_loc, fl_file_loc, - )) - # We trust every url that the user has given us whether it was given # via --index-url or --find-links. # We want to filter out any thing which does not have a secure origin. @@ -600,9 +597,11 @@ def find_all_candidates(self, project_name): canonical_name = canonicalize_name(project_name) formats = self.format_control.get_allowed_formats(canonical_name) search = Search(project_name, canonical_name, formats) + + find_links_locations = (Link(url, '-f') for url in self.find_links) find_links_versions = self._package_versions( # We trust every directly linked archive in find_links - (Link(url, '-f') for url in self.find_links), + ((link.filename, link) for link in find_links_locations), search ) @@ -611,10 +610,15 @@ def find_all_candidates(self, project_name): logger.debug('Analyzing links from page %s', page.url) with indent_log(): page_versions.extend( - self._package_versions(page.iter_links(), search) + self._package_versions(page.iter_anchors(), search) ) - file_versions = self._package_versions(file_locations, search) + file_locations = ( + Link(url) for url in itertools.chain(index_file_loc, fl_file_loc) + ) + file_versions = self._package_versions( + ((link.filename, link) for link in file_locations), search, + ) if file_versions: file_versions.sort(reverse=True) logger.debug( @@ -748,32 +752,32 @@ def _get_pages(self, locations, project_name): _py_version_re = re.compile(r'-py([123]\.?[0-9]?)$') - def _sort_links(self, links): - # type: (Iterable[Link]) -> List[Link] + def _sort_anchors(self, anchors): + # type: (Iterable[Anchor]) -> List[Anchor] """ Returns elements of links in order, non-egg links first, egg links second, while eliminating duplicates """ eggs, no_eggs = [], [] seen = set() # type: Set[Link] - for link in links: + for text, link in anchors: if link not in seen: seen.add(link) if link.egg_fragment: - eggs.append(link) + eggs.append((text, link)) else: - no_eggs.append(link) + no_eggs.append((text, link)) return no_eggs + eggs def _package_versions( self, - links, # type: Iterable[Link] - search # type: Search + anchors, # type: Iterable[Anchor] + search, # type: Search ): # type: (...) -> List[Optional[InstallationCandidate]] result = [] - for link in self._sort_links(links): - v = self._link_package_versions(link, search) + for anchor in self._sort_anchors(anchors): + v = self._anchor_package_versions(anchor, search) if v is not None: result.append(v) return result @@ -784,15 +788,16 @@ def _log_skipped_link(self, link, reason): logger.debug('Skipping link %s; %s', link, reason) self.logged_links.add(link) - def _link_package_versions(self, link, search): - # type: (Link, Search) -> Optional[InstallationCandidate] + def _anchor_package_versions(self, anchor, search): + # type: (Anchor, Search) -> Optional[InstallationCandidate] """Return an InstallationCandidate or None""" + text, link = anchor version = None if link.egg_fragment: egg_info = link.egg_fragment - ext = link.ext + _, ext = splitext(text) else: - egg_info, ext = link.splitext() + egg_info, ext = splitext(text) if not ext: self._log_skipped_link(link, 'not a file') return None @@ -806,12 +811,12 @@ def _link_package_versions(self, link, search): link, 'No binaries permitted for %s' % search.supplied, ) return None - if "macosx10" in link.path and ext == '.zip': + if "macosx10" in text and ext == '.zip': self._log_skipped_link(link, 'macosx10 one') return None if ext == WHEEL_EXTENSION: try: - wheel = Wheel(link.filename) + wheel = Wheel(text) except InvalidWheelFilename: self._log_skipped_link(link, 'invalid wheel filename') return None @@ -853,7 +858,7 @@ def _link_package_versions(self, link, search): support_this_python = check_requires_python(link.requires_python) except specifiers.InvalidSpecifier: logger.debug("Package %s has an invalid Requires-Python entry: %s", - link.filename, link.requires_python) + text, link.requires_python) support_this_python = True if not support_this_python: @@ -962,9 +967,9 @@ def __init__(self, content, url, headers=None): def __str__(self): return redact_password_from_url(self.url) - def iter_links(self): - # type: () -> Iterable[Link] - """Yields all links in the page""" + def iter_anchors(self): + # type: () -> Iterable[Anchor] + """Yields all anchor information in the page""" document = html5lib.parse( self.content, transport_encoding=_get_encoding_from_headers(self.headers), @@ -977,7 +982,8 @@ def iter_links(self): url = _clean_link(urllib_parse.urljoin(base_url, href)) pyrequire = anchor.get('data-requires-python') pyrequire = unescape(pyrequire) if pyrequire else None - yield Link(url, self.url, requires_python=pyrequire) + link = Link(url, self.url, requires_python=pyrequire) + yield (anchor.text, link) Search = namedtuple('Search', 'supplied canonical formats') diff --git a/tests/unit/test_finder.py b/tests/unit/test_finder.py index c8ae693dd50..b537fb829c3 100644 --- a/tests/unit/test_finder.py +++ b/tests/unit/test_finder.py @@ -478,13 +478,13 @@ def setup(self): ) @pytest.mark.parametrize( - 'url', + 'name, url', [ - 'http:/yo/pytest-1.0.tar.gz', - 'http:/yo/pytest-1.0-py2.py3-none-any.whl', + ('pytest-1.0.tar.gz', 'does-not-matter-tar-gz-url'), + ('pytest-1.0-py2.py3-none-any.whl', 'does-not-matter-whl-url'), ], ) - def test_link_package_versions_match(self, url): + def test_anchor_package_versions_match(self, name, url): """Test that 'pytest' archives match for 'pytest'""" link = Link(url) search = Search( @@ -492,28 +492,28 @@ def test_link_package_versions_match(self, url): canonical=self.canonical_name, formats=['source', 'binary'], ) - result = self.finder._link_package_versions(link, search) + result = self.finder._anchor_package_versions((name, link), search) expected = InstallationCandidate(self.search_name, self.version, link) assert result == expected, result @pytest.mark.parametrize( - 'url', + 'name', [ # TODO: Uncomment this test case when #1217 is fixed. - # 'http:/yo/pytest-xdist-1.0.tar.gz', - 'http:/yo/pytest2-1.0.tar.gz', - 'http:/yo/pytest_xdist-1.0-py2.py3-none-any.whl', + # 'pytest-xdist-1.0.tar.gz', + 'pytest2-1.0.tar.gz', + 'pytest_xdist-1.0-py2.py3-none-any.whl', ], ) - def est_link_package_versions_substring_fails(self, url): + def test_anchor_package_versions_substring_fails(self, name): """Test that 'pytest archives won't match for 'pytest'.""" - link = Link(url) + anchor = (name, Link('does-not-matter')) search = Search( supplied=self.search_name, canonical=self.canonical_name, formats=['source', 'binary'], ) - result = self.finder._link_package_versions(link, search) + result = self.finder._anchor_package_versions(anchor, search) assert result is None, result