Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,8 @@ tests/data/common_wheels/

# Profiling related artifacts
*.prof

# Custom PR/Issue body files
ISSUE_BODY.md
PR_BODY.md
PR_FIX_PLAN.md
Comment on lines +54 to +58
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please don't make changes to the repo's .gitignore, if you need to git ignore local files you should create your own user level git ignore, e.g. https://dev.to/fronkan/a-personal-gitignore-even-for-a-single-repository-4o7h

27 changes: 26 additions & 1 deletion docs/html/cli/pip_install.rst
Original file line number Diff line number Diff line change
Expand Up @@ -215,11 +215,22 @@ found.
pip looks for packages in a number of places: on PyPI (or the index given as
``--index-url``, if not disabled via ``--no-index``), in the local filesystem,
and in any additional repositories specified via ``--find-links`` or
``--extra-index-url``. There is no priority in the locations that are searched.
``--extra-index-url``.

By default, there is no priority in the locations that are searched.
Rather they are all checked, and the "best" match for the requirements (in
terms of version number - see the
:ref:`specification <pypug:version-specifiers>` for details) is selected.

This behavior can be modified using the ``--index-strategy`` option:

- ``best-match`` (default): Searches all indexes and picks the version that
best matches the requirement.
- ``first-match``: Prioritizes indexes in the order they are provided
(first ``--find-links``, then ``--index-url``, then each ``--extra-index-url``).
The search stops as soon as an index provides a matching package. This is
useful for mitigating dependency confusion attacks.

See the :ref:`pip install Examples<pip install Examples>`.

.. _`0-ssl certificate verification`:
Expand Down Expand Up @@ -497,6 +508,20 @@ Examples

py -m pip install --extra-index-url http://my.package.repo/simple SomePackage

Mitigate dependency confusion by stopping at the first matching index:

.. tab:: Unix/macOS

.. code-block:: shell

python -m pip install --index-strategy first-match --extra-index-url http://my.package.repo/simple SomePackage

.. tab:: Windows

.. code-block:: shell

py -m pip install --index-strategy first-match --extra-index-url http://my.package.repo/simple SomePackage


#. Find pre-release and development versions, in addition to stable versions. By default, pip only finds stable versions.

Expand Down
1 change: 1 addition & 0 deletions news/8606.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Implement ``--index-strategy`` to allow users to prioritize package indexes in the order they are provided. This helps mitigate dependency confusion attacks by stopping the search after the first index that yields a match.
16 changes: 16 additions & 0 deletions src/pip/_internal/cli/cmdoptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -402,6 +402,21 @@ def extra_index_url() -> Option:
)


def index_strategy() -> Option:
return Option(
"--index-strategy",
dest="index_strategy",
choices=["first-match", "best-match"],
default="best-match",
help="Select the strategy used to select packages from indexes. "
"Choices: first-match, best-match. "
"Default: best-match. "
"first-match: stop searching indexes after finding the package in the "
"first index (respecting order of --index-url and --extra-index-url). "
"best-match: search all indexes for the best version.",
)


no_index: Callable[..., Option] = partial(
Option,
"--no-index",
Expand Down Expand Up @@ -1249,6 +1264,7 @@ def check_list_path_option(options: Values) -> None:
index_url,
extra_index_url,
no_index,
index_strategy,
find_links,
uploaded_prior_to,
],
Expand Down
1 change: 1 addition & 0 deletions src/pip/_internal/cli/req_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -437,6 +437,7 @@ def _build_package_finder(
release_control=options.release_control,
prefer_binary=options.prefer_binary,
ignore_requires_python=ignore_requires_python,
index_strategy=options.index_strategy,
)

return PackageFinder.create(
Expand Down
43 changes: 28 additions & 15 deletions src/pip/_internal/index/package_finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import datetime
import enum
import functools
import itertools
import logging
import re
from collections.abc import Iterable
Expand Down Expand Up @@ -382,6 +381,7 @@ class CandidatePreferences:

prefer_binary: bool = False
release_control: ReleaseControl | None = None
index_strategy: str = "best-match"


@dataclass(frozen=True)
Expand Down Expand Up @@ -698,6 +698,7 @@ def create(
candidate_prefs = CandidatePreferences(
prefer_binary=selection_prefs.prefer_binary,
release_control=selection_prefs.release_control,
index_strategy=selection_prefs.index_strategy,
)

return cls(
Expand Down Expand Up @@ -899,23 +900,35 @@ def find_all_candidates(self, project_name: str) -> list[InstallationCandidate]:
),
)

page_candidates_it = itertools.chain.from_iterable(
source.page_candidates()
for sources in collected_sources
for source in sources
if source is not None
)
page_candidates = list(page_candidates_it)
page_candidates: list[InstallationCandidate] = []
file_links: list[Link] = []

if self._candidate_prefs.index_strategy == "first-match":
# 1. find-links: collect ALL of them (they are prioritized)
for source in collected_sources.find_links:
if source is not None:
page_candidates.extend(source.page_candidates())
file_links.extend(source.file_links())

# 2. index-urls: stop at the first one that PROVIDES candidates
for source in collected_sources.index_urls:
if source is not None:
curr_pages = list(source.page_candidates())
curr_files = list(source.file_links())
if curr_pages or curr_files:
page_candidates.extend(curr_pages)
file_links.extend(curr_files)
break
else:
for sources in collected_sources:
for source in sources:
if source is not None:
page_candidates.extend(source.page_candidates())
file_links.extend(source.file_links())

file_links_it = itertools.chain.from_iterable(
source.file_links()
for sources in collected_sources
for source in sources
if source is not None
)
file_candidates = self.evaluate_links(
link_evaluator,
sorted(file_links_it, reverse=True),
sorted(file_links, reverse=True),
)

if logger.isEnabledFor(logging.DEBUG) and file_candidates:
Expand Down
6 changes: 6 additions & 0 deletions src/pip/_internal/models/selection_prefs.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ class SelectionPreferences:
"format_control",
"prefer_binary",
"ignore_requires_python",
"index_strategy",
]

# Don't include an allow_yanked default value to make sure each call
Expand All @@ -31,6 +32,7 @@ def __init__(
format_control: FormatControl | None = None,
prefer_binary: bool = False,
ignore_requires_python: bool | None = None,
index_strategy: str = "best-match",
) -> None:
"""Create a SelectionPreferences object.

Expand All @@ -45,6 +47,9 @@ def __init__(
dist over a new source dist.
:param ignore_requires_python: Whether to ignore incompatible
"Requires-Python" values in links. Defaults to False.
:param index_strategy: Strategies for how to select packages from indexes.
"first-match" stops searching after the first index with hits.
"best-match" searches all indexes for the best version.
"""
if ignore_requires_python is None:
ignore_requires_python = False
Expand All @@ -54,3 +59,4 @@ def __init__(
self.format_control = format_control
self.prefer_binary = prefer_binary
self.ignore_requires_python = ignore_requires_python
self.index_strategy = index_strategy
74 changes: 74 additions & 0 deletions tests/functional/test_index_strategy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
from tests.lib import PipTestEnvironment, TestData


def test_index_strategy_first_match_functional(
script: PipTestEnvironment, data: TestData
) -> None:
"""
Functional test for --index-strategy first-match.
Index 1: data.index_url("simple") -> contains simple 1.0
Index 2: data.index_url("yanked") -> contains simple 1.0, 2.0, 3.0

Note: We use --no-build-isolation to avoid needing setuptools from
the test indexes, since we're only testing index selection logic.
"""
# Verify best-match (default) picks 2.0 (3.0 is yanked)
result = script.pip(
"install",
"simple",
"--dry-run",
"--no-build-isolation",
"--index-url",
data.index_url("simple"),
"--extra-index-url",
data.index_url("yanked"),
)
assert (
"Would install simple-2.0" in result.stdout
), f"Actual output: {result.stdout}"

# Verify first-match picks 1.0 from the first index (index-url)
result = script.pip(
"install",
"simple",
"--dry-run",
"--no-build-isolation",
"--index-strategy",
"first-match",
"--index-url",
data.index_url("simple"),
"--extra-index-url",
data.index_url("yanked"),
)
assert (
"Would install simple-1.0" in result.stdout
), f"Actual output: {result.stdout}"


def test_index_strategy_find_links_combo(
script: PipTestEnvironment, data: TestData
) -> None:
"""
Verify that find-links are still collected in first-match mode.
Find-links: data.find_links -> contains 3.0
Index-url: data.index_url("simple") -> contains 1.0
Even in first-match mode, find-links should be searched first and 3.0 picked.

Note: We use --no-build-isolation to avoid needing setuptools from
the test indexes, since we're only testing index selection logic.
"""
result = script.pip(
"install",
"simple",
"--dry-run",
"--no-build-isolation",
"--index-strategy",
"first-match",
"--find-links",
data.find_links,
"--index-url",
data.index_url("simple"),
)
assert (
"Would install simple-3.0" in result.stdout
), f"Actual output: {result.stdout}"
2 changes: 2 additions & 0 deletions tests/lib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ def make_test_finder(
session: PipSession | None = None,
target_python: TargetPython | None = None,
uploaded_prior_to: datetime.datetime | None = None,
index_strategy: str = "best-match",
) -> PackageFinder:
"""
Create a PackageFinder for testing purposes.
Expand All @@ -114,6 +115,7 @@ def make_test_finder(
selection_prefs = SelectionPreferences(
allow_yanked=True,
release_control=release_control,
index_strategy=index_strategy,
)

return PackageFinder.create(
Expand Down
69 changes: 69 additions & 0 deletions tests/unit/test_index_strategy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
from tests.lib import TestData, make_test_finder


def test_index_strategy_best_match(data: TestData) -> None:
"""Test the default 'best-match' strategy searches all indexes."""
finder = make_test_finder(
index_urls=[data.index_url("simple"), data.index_url("yanked")],
index_strategy="best-match",
)
# data.index_url("simple") has simple 1.0
# data.index_url("yanked") has simple 1.0, 2.0, 3.0
versions = finder.find_all_candidates("simple")

# Best match should return versions from all indexes
version_strs = [str(v.version) for v in versions]
assert "1.0" in version_strs
assert "2.0" in version_strs
assert "3.0" in version_strs
# We expect 4 candidates:
# 1.0 from simple index, and 1.0, 2.0, 3.0 from yanked index
assert len(version_strs) == 4


def test_index_strategy_first_match(data: TestData) -> None:
"""Test the 'first-match' strategy stops after the first index with hits."""
# Order: Index 1 (v1.0) then Index 2 (v1.0, v2.0, v3.0)
finder = make_test_finder(
index_urls=[data.index_url("simple"), data.index_url("yanked")],
index_strategy="first-match",
)

versions = finder.find_all_candidates("simple")

# Should stop after Index 1
version_strs = [str(v.version) for v in versions]
assert version_strs == ["1.0"]


def test_index_strategy_first_match_reversed(data: TestData) -> None:
"""Test first-match stops at the first index even if it contains better versions."""
# Order: Index 1 (v1.0, v2.0, v3.0) then Index 2 (v1.0)
finder = make_test_finder(
index_urls=[data.index_url("yanked"), data.index_url("simple")],
index_strategy="first-match",
)

versions = finder.find_all_candidates("simple")

# Should stop after Index 1
version_strs = sorted([str(v.version) for v in versions])
assert version_strs == ["1.0", "2.0", "3.0"]
# Should not have versions from Index 2 (even though 1.0 is duplicate)
assert len(versions) == 3


def test_index_strategy_find_links_priority(data: TestData) -> None:
"""Test that find-links are always collected even in first-match mode."""
finder = make_test_finder(
find_links=[data.find_links],
index_urls=[data.index_url("simple")],
index_strategy="first-match",
)

versions = finder.find_all_candidates("simple")

# Should collect find-links PLUS the first matching index
version_strs = sorted([str(v.version) for v in versions])
# find_links (1.0, 2.0, 3.0) + index_url (1.0)
assert version_strs == ["1.0", "1.0", "2.0", "3.0"]