diff --git a/.gitignore b/.gitignore index d2d5eadfbb1..b139a392213 100644 --- a/.gitignore +++ b/.gitignore @@ -51,3 +51,8 @@ tests/data/common_wheels/ # Profiling related artifacts *.prof + +# Custom PR/Issue body files +ISSUE_BODY.md +PR_BODY.md +PR_FIX_PLAN.md diff --git a/docs/html/cli/pip_install.rst b/docs/html/cli/pip_install.rst index 7baeb28f032..354368838b7 100644 --- a/docs/html/cli/pip_install.rst +++ b/docs/html/cli/pip_install.rst @@ -215,11 +215,22 @@ found. pip looks for packages in a number of places: on PyPI (or the index given as ``--index-url``, if not disabled via ``--no-index``), in the local filesystem, and in any additional repositories specified via ``--find-links`` or -``--extra-index-url``. There is no priority in the locations that are searched. +``--extra-index-url``. + +By default, there is no priority in the locations that are searched. Rather they are all checked, and the "best" match for the requirements (in terms of version number - see the :ref:`specification ` for details) is selected. +This behavior can be modified using the ``--index-strategy`` option: + +- ``best-match`` (default): Searches all indexes and picks the version that + best matches the requirement. +- ``first-match``: Prioritizes indexes in the order they are provided + (first ``--find-links``, then ``--index-url``, then each ``--extra-index-url``). + The search stops as soon as an index provides a matching package. This is + useful for mitigating dependency confusion attacks. + See the :ref:`pip install Examples`. .. _`0-ssl certificate verification`: @@ -497,6 +508,20 @@ Examples py -m pip install --extra-index-url http://my.package.repo/simple SomePackage + Mitigate dependency confusion by stopping at the first matching index: + + .. tab:: Unix/macOS + + .. code-block:: shell + + python -m pip install --index-strategy first-match --extra-index-url http://my.package.repo/simple SomePackage + + .. tab:: Windows + + .. code-block:: shell + + py -m pip install --index-strategy first-match --extra-index-url http://my.package.repo/simple SomePackage + #. Find pre-release and development versions, in addition to stable versions. By default, pip only finds stable versions. diff --git a/news/8606.feature.rst b/news/8606.feature.rst new file mode 100644 index 00000000000..6c9e8cd615a --- /dev/null +++ b/news/8606.feature.rst @@ -0,0 +1 @@ +Implement ``--index-strategy`` to allow users to prioritize package indexes in the order they are provided. This helps mitigate dependency confusion attacks by stopping the search after the first index that yields a match. diff --git a/src/pip/_internal/cli/cmdoptions.py b/src/pip/_internal/cli/cmdoptions.py index 48ebde3971a..f026162fc38 100644 --- a/src/pip/_internal/cli/cmdoptions.py +++ b/src/pip/_internal/cli/cmdoptions.py @@ -402,6 +402,21 @@ def extra_index_url() -> Option: ) +def index_strategy() -> Option: + return Option( + "--index-strategy", + dest="index_strategy", + choices=["first-match", "best-match"], + default="best-match", + help="Select the strategy used to select packages from indexes. " + "Choices: first-match, best-match. " + "Default: best-match. " + "first-match: stop searching indexes after finding the package in the " + "first index (respecting order of --index-url and --extra-index-url). " + "best-match: search all indexes for the best version.", + ) + + no_index: Callable[..., Option] = partial( Option, "--no-index", @@ -1249,6 +1264,7 @@ def check_list_path_option(options: Values) -> None: index_url, extra_index_url, no_index, + index_strategy, find_links, uploaded_prior_to, ], diff --git a/src/pip/_internal/cli/req_command.py b/src/pip/_internal/cli/req_command.py index 60b82fb9928..82932c8e519 100644 --- a/src/pip/_internal/cli/req_command.py +++ b/src/pip/_internal/cli/req_command.py @@ -437,6 +437,7 @@ def _build_package_finder( release_control=options.release_control, prefer_binary=options.prefer_binary, ignore_requires_python=ignore_requires_python, + index_strategy=options.index_strategy, ) return PackageFinder.create( diff --git a/src/pip/_internal/index/package_finder.py b/src/pip/_internal/index/package_finder.py index aa7c2ebd48e..5a5a9e3b13b 100644 --- a/src/pip/_internal/index/package_finder.py +++ b/src/pip/_internal/index/package_finder.py @@ -5,7 +5,6 @@ import datetime import enum import functools -import itertools import logging import re from collections.abc import Iterable @@ -382,6 +381,7 @@ class CandidatePreferences: prefer_binary: bool = False release_control: ReleaseControl | None = None + index_strategy: str = "best-match" @dataclass(frozen=True) @@ -698,6 +698,7 @@ def create( candidate_prefs = CandidatePreferences( prefer_binary=selection_prefs.prefer_binary, release_control=selection_prefs.release_control, + index_strategy=selection_prefs.index_strategy, ) return cls( @@ -899,23 +900,35 @@ def find_all_candidates(self, project_name: str) -> list[InstallationCandidate]: ), ) - page_candidates_it = itertools.chain.from_iterable( - source.page_candidates() - for sources in collected_sources - for source in sources - if source is not None - ) - page_candidates = list(page_candidates_it) + page_candidates: list[InstallationCandidate] = [] + file_links: list[Link] = [] + + if self._candidate_prefs.index_strategy == "first-match": + # 1. find-links: collect ALL of them (they are prioritized) + for source in collected_sources.find_links: + if source is not None: + page_candidates.extend(source.page_candidates()) + file_links.extend(source.file_links()) + + # 2. index-urls: stop at the first one that PROVIDES candidates + for source in collected_sources.index_urls: + if source is not None: + curr_pages = list(source.page_candidates()) + curr_files = list(source.file_links()) + if curr_pages or curr_files: + page_candidates.extend(curr_pages) + file_links.extend(curr_files) + break + else: + for sources in collected_sources: + for source in sources: + if source is not None: + page_candidates.extend(source.page_candidates()) + file_links.extend(source.file_links()) - file_links_it = itertools.chain.from_iterable( - source.file_links() - for sources in collected_sources - for source in sources - if source is not None - ) file_candidates = self.evaluate_links( link_evaluator, - sorted(file_links_it, reverse=True), + sorted(file_links, reverse=True), ) if logger.isEnabledFor(logging.DEBUG) and file_candidates: diff --git a/src/pip/_internal/models/selection_prefs.py b/src/pip/_internal/models/selection_prefs.py index 04ef63ab543..52acd2b22e7 100644 --- a/src/pip/_internal/models/selection_prefs.py +++ b/src/pip/_internal/models/selection_prefs.py @@ -18,6 +18,7 @@ class SelectionPreferences: "format_control", "prefer_binary", "ignore_requires_python", + "index_strategy", ] # Don't include an allow_yanked default value to make sure each call @@ -31,6 +32,7 @@ def __init__( format_control: FormatControl | None = None, prefer_binary: bool = False, ignore_requires_python: bool | None = None, + index_strategy: str = "best-match", ) -> None: """Create a SelectionPreferences object. @@ -45,6 +47,9 @@ def __init__( dist over a new source dist. :param ignore_requires_python: Whether to ignore incompatible "Requires-Python" values in links. Defaults to False. + :param index_strategy: Strategies for how to select packages from indexes. + "first-match" stops searching after the first index with hits. + "best-match" searches all indexes for the best version. """ if ignore_requires_python is None: ignore_requires_python = False @@ -54,3 +59,4 @@ def __init__( self.format_control = format_control self.prefer_binary = prefer_binary self.ignore_requires_python = ignore_requires_python + self.index_strategy = index_strategy diff --git a/tests/functional/test_index_strategy.py b/tests/functional/test_index_strategy.py new file mode 100644 index 00000000000..157154dc059 --- /dev/null +++ b/tests/functional/test_index_strategy.py @@ -0,0 +1,74 @@ +from tests.lib import PipTestEnvironment, TestData + + +def test_index_strategy_first_match_functional( + script: PipTestEnvironment, data: TestData +) -> None: + """ + Functional test for --index-strategy first-match. + Index 1: data.index_url("simple") -> contains simple 1.0 + Index 2: data.index_url("yanked") -> contains simple 1.0, 2.0, 3.0 + + Note: We use --no-build-isolation to avoid needing setuptools from + the test indexes, since we're only testing index selection logic. + """ + # Verify best-match (default) picks 2.0 (3.0 is yanked) + result = script.pip( + "install", + "simple", + "--dry-run", + "--no-build-isolation", + "--index-url", + data.index_url("simple"), + "--extra-index-url", + data.index_url("yanked"), + ) + assert ( + "Would install simple-2.0" in result.stdout + ), f"Actual output: {result.stdout}" + + # Verify first-match picks 1.0 from the first index (index-url) + result = script.pip( + "install", + "simple", + "--dry-run", + "--no-build-isolation", + "--index-strategy", + "first-match", + "--index-url", + data.index_url("simple"), + "--extra-index-url", + data.index_url("yanked"), + ) + assert ( + "Would install simple-1.0" in result.stdout + ), f"Actual output: {result.stdout}" + + +def test_index_strategy_find_links_combo( + script: PipTestEnvironment, data: TestData +) -> None: + """ + Verify that find-links are still collected in first-match mode. + Find-links: data.find_links -> contains 3.0 + Index-url: data.index_url("simple") -> contains 1.0 + Even in first-match mode, find-links should be searched first and 3.0 picked. + + Note: We use --no-build-isolation to avoid needing setuptools from + the test indexes, since we're only testing index selection logic. + """ + result = script.pip( + "install", + "simple", + "--dry-run", + "--no-build-isolation", + "--index-strategy", + "first-match", + "--find-links", + data.find_links, + "--index-url", + data.index_url("simple"), + ) + assert ( + "Would install simple-3.0" in result.stdout + ), f"Actual output: {result.stdout}" diff --git a/tests/lib/__init__.py b/tests/lib/__init__.py index 50c68f7f008..aeda00c5988 100644 --- a/tests/lib/__init__.py +++ b/tests/lib/__init__.py @@ -96,6 +96,7 @@ def make_test_finder( session: PipSession | None = None, target_python: TargetPython | None = None, uploaded_prior_to: datetime.datetime | None = None, + index_strategy: str = "best-match", ) -> PackageFinder: """ Create a PackageFinder for testing purposes. @@ -114,6 +115,7 @@ def make_test_finder( selection_prefs = SelectionPreferences( allow_yanked=True, release_control=release_control, + index_strategy=index_strategy, ) return PackageFinder.create( diff --git a/tests/unit/test_index_strategy.py b/tests/unit/test_index_strategy.py new file mode 100644 index 00000000000..5089e2b1da3 --- /dev/null +++ b/tests/unit/test_index_strategy.py @@ -0,0 +1,69 @@ +from tests.lib import TestData, make_test_finder + + +def test_index_strategy_best_match(data: TestData) -> None: + """Test the default 'best-match' strategy searches all indexes.""" + finder = make_test_finder( + index_urls=[data.index_url("simple"), data.index_url("yanked")], + index_strategy="best-match", + ) + # data.index_url("simple") has simple 1.0 + # data.index_url("yanked") has simple 1.0, 2.0, 3.0 + versions = finder.find_all_candidates("simple") + + # Best match should return versions from all indexes + version_strs = [str(v.version) for v in versions] + assert "1.0" in version_strs + assert "2.0" in version_strs + assert "3.0" in version_strs + # We expect 4 candidates: + # 1.0 from simple index, and 1.0, 2.0, 3.0 from yanked index + assert len(version_strs) == 4 + + +def test_index_strategy_first_match(data: TestData) -> None: + """Test the 'first-match' strategy stops after the first index with hits.""" + # Order: Index 1 (v1.0) then Index 2 (v1.0, v2.0, v3.0) + finder = make_test_finder( + index_urls=[data.index_url("simple"), data.index_url("yanked")], + index_strategy="first-match", + ) + + versions = finder.find_all_candidates("simple") + + # Should stop after Index 1 + version_strs = [str(v.version) for v in versions] + assert version_strs == ["1.0"] + + +def test_index_strategy_first_match_reversed(data: TestData) -> None: + """Test first-match stops at the first index even if it contains better versions.""" + # Order: Index 1 (v1.0, v2.0, v3.0) then Index 2 (v1.0) + finder = make_test_finder( + index_urls=[data.index_url("yanked"), data.index_url("simple")], + index_strategy="first-match", + ) + + versions = finder.find_all_candidates("simple") + + # Should stop after Index 1 + version_strs = sorted([str(v.version) for v in versions]) + assert version_strs == ["1.0", "2.0", "3.0"] + # Should not have versions from Index 2 (even though 1.0 is duplicate) + assert len(versions) == 3 + + +def test_index_strategy_find_links_priority(data: TestData) -> None: + """Test that find-links are always collected even in first-match mode.""" + finder = make_test_finder( + find_links=[data.find_links], + index_urls=[data.index_url("simple")], + index_strategy="first-match", + ) + + versions = finder.find_all_candidates("simple") + + # Should collect find-links PLUS the first matching index + version_strs = sorted([str(v.version) for v in versions]) + # find_links (1.0, 2.0, 3.0) + index_url (1.0) + assert version_strs == ["1.0", "1.0", "2.0", "3.0"]