Skip to content

Commit 54de6f9

Browse files
duckinatorichard26notatallshaw
authored
Make pip cache {purge, remove} delete additional unneeded files (#9058)
These commands now remove: - wheel cache folders without `.whl` files. - empty folders in the HTTP cache. - `selfcheck.json`, which pip does not use anymore. Co-authored-by: Richard Si <sichard26@gmail.com> Co-authored-by: Damian Shaw <damian.peter.shaw@gmail.com>
1 parent 9548a41 commit 54de6f9

File tree

5 files changed

+381
-3
lines changed

5 files changed

+381
-3
lines changed

news/9058.feature.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
``pip cache purge`` and ``pip cache remove`` now clean up empty directories
2+
and legacy files left by older pip versions.

src/pip/_internal/commands/cache.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,31 @@ def remove_cache_items(self, options: Values, args: list[str]) -> None:
189189
bytes_removed += os.stat(filename).st_size
190190
os.unlink(filename)
191191
logger.verbose("Removed %s", filename)
192+
193+
http_dirs = filesystem.subdirs_without_files(self._cache_dir(options, "http"))
194+
wheel_dirs = filesystem.subdirs_without_wheels(
195+
self._cache_dir(options, "wheels")
196+
)
197+
dirs = [*http_dirs, *wheel_dirs]
198+
199+
for subdir in dirs:
200+
try:
201+
for file in subdir.iterdir():
202+
file.unlink(missing_ok=True)
203+
subdir.rmdir()
204+
except FileNotFoundError:
205+
# If the directory is already gone, that's fine.
206+
pass
207+
logger.verbose("Removed %s", subdir)
208+
209+
# selfcheck.json is no longer used by pip.
210+
selfcheck_json = self._cache_dir(options, "selfcheck.json")
211+
if os.path.isfile(selfcheck_json):
212+
os.remove(selfcheck_json)
213+
logger.verbose("Removed legacy selfcheck.json file")
214+
192215
logger.info("Files removed: %s (%s)", len(files), format_size(bytes_removed))
216+
logger.info("Directories removed: %s", len(dirs))
193217

194218
def purge_cache(self, options: Values, args: list[str]) -> None:
195219
if args:

src/pip/_internal/utils/filesystem.py

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,9 @@
77
import sys
88
from collections.abc import Generator
99
from contextlib import contextmanager
10+
from pathlib import Path
1011
from tempfile import NamedTemporaryFile
11-
from typing import Any, BinaryIO, cast
12+
from typing import Any, BinaryIO, Callable, cast
1213

1314
from pip._internal.utils.compat import get_path_uid
1415
from pip._internal.utils.misc import format_size
@@ -162,3 +163,41 @@ def copy_directory_permissions(directory: str, target_file: BinaryIO) -> None:
162163
os.chmod(target_file.fileno(), mode)
163164
elif os.chmod in os.supports_follow_symlinks:
164165
os.chmod(target_file.name, mode, follow_symlinks=False)
166+
167+
168+
def _subdirs_without_generic(
169+
path: str, predicate: Callable[[str, list[str]], bool]
170+
) -> Generator[Path]:
171+
"""Yields every subdirectory of +path+ that has no files matching the
172+
predicate under it."""
173+
174+
directories = []
175+
excluded = set()
176+
177+
for root_str, _, filenames in os.walk(Path(path).resolve()):
178+
root = Path(root_str)
179+
if predicate(root_str, filenames):
180+
# This directory should be excluded, so exclude it and all of its
181+
# parent directories.
182+
# The last item in root.parents is ".", so we ignore it.
183+
#
184+
# Wrapping this in `list()` is only needed for Python 3.9.
185+
excluded.update(list(root.parents)[:-1])
186+
excluded.add(root)
187+
directories.append(root)
188+
189+
for d in sorted(directories, reverse=True):
190+
if d not in excluded:
191+
yield d
192+
193+
194+
def subdirs_without_files(path: str) -> Generator[Path]:
195+
"""Yields every subdirectory of +path+ that has no files under it."""
196+
return _subdirs_without_generic(path, lambda root, filenames: len(filenames) > 0)
197+
198+
199+
def subdirs_without_wheels(path: str) -> Generator[Path]:
200+
"""Yields every subdirectory of +path+ that has no .whl files under it."""
201+
return _subdirs_without_generic(
202+
path, lambda root, filenames: any(x.endswith(".whl") for x in filenames)
203+
)

tests/functional/test_cache.py

Lines changed: 110 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1+
import json
12
import os
3+
import re
24
import shutil
35
from glob import glob
46
from typing import Callable
@@ -256,7 +258,7 @@ def test_cache_purge_with_empty_cache(script: PipTestEnvironment) -> None:
256258
and exit without an error code."""
257259
result = script.pip("cache", "purge", allow_stderr_warning=True)
258260
assert result.stderr == "WARNING: No matching packages\n"
259-
assert result.stdout == "Files removed: 0 (0 bytes)\n"
261+
assert result.stdout == "Files removed: 0 (0 bytes)\nDirectories removed: 0\n"
260262

261263

262264
@pytest.mark.usefixtures("populate_wheel_cache")
@@ -265,7 +267,7 @@ def test_cache_remove_with_bad_pattern(script: PipTestEnvironment) -> None:
265267
and exit without an error code."""
266268
result = script.pip("cache", "remove", "aaa", allow_stderr_warning=True)
267269
assert result.stderr == 'WARNING: No matching packages for pattern "aaa"\n'
268-
assert result.stdout == "Files removed: 0 (0 bytes)\n"
270+
assert result.stdout == "Files removed: 0 (0 bytes)\nDirectories removed: 0\n"
269271

270272

271273
def test_cache_list_too_many_args(script: PipTestEnvironment) -> None:
@@ -413,3 +415,109 @@ def test_cache_abort_when_no_cache_dir(
413415
"ERROR: pip cache commands can not function"
414416
" since cache is disabled." in result.stderr.splitlines()
415417
)
418+
419+
420+
@pytest.fixture
421+
def populate_wheel_cache_with_empty_dirs(wheel_cache_dir: str) -> None:
422+
metadata_dir = os.path.join(wheel_cache_dir, "metadata_only")
423+
os.makedirs(metadata_dir)
424+
with open(os.path.join(metadata_dir, "metadata.json"), "w"):
425+
pass
426+
427+
empty_dir = os.path.join(wheel_cache_dir, "completely_empty")
428+
os.makedirs(empty_dir)
429+
430+
nested_empty = os.path.join(wheel_cache_dir, "nested", "empty", "dirs")
431+
os.makedirs(nested_empty)
432+
433+
434+
@pytest.fixture
435+
def populate_http_cache_with_empty_dirs(cache_dir: str) -> None:
436+
http_cache_dir = os.path.join(cache_dir, "http")
437+
empty1 = os.path.join(http_cache_dir, "empty1")
438+
empty2 = os.path.join(http_cache_dir, "empty2", "nested")
439+
440+
os.makedirs(empty1)
441+
os.makedirs(empty2)
442+
443+
444+
@pytest.fixture
445+
def create_selfcheck_json(cache_dir: str) -> None:
446+
selfcheck_path = os.path.join(cache_dir, "selfcheck.json")
447+
with open(selfcheck_path, "w") as statefile:
448+
json.dump(
449+
{
450+
"/some/prefix": {
451+
"last_check": "2020-01-01T00:00:00",
452+
"pypi_version": "20.0.1",
453+
}
454+
},
455+
statefile,
456+
)
457+
458+
459+
@pytest.mark.usefixtures(
460+
"populate_wheel_cache_with_empty_dirs",
461+
"populate_http_cache_with_empty_dirs",
462+
"create_selfcheck_json",
463+
)
464+
def test_cache_purge_removes_empty_dirs_and_legacy_files(
465+
script: PipTestEnvironment,
466+
cache_dir: str,
467+
wheel_cache_dir: str,
468+
) -> None:
469+
"""Test pip cache purge/remove with empty dirs and legacy files.
470+
471+
Verifies purge removes:
472+
- Wheel cache directories without .whl files
473+
- HTTP cache empty directories
474+
- Legacy selfcheck.json file
475+
- Reports correct directory counts
476+
Also tests that 'cache remove' works similarly.
477+
"""
478+
selfcheck_path = os.path.join(cache_dir, "selfcheck.json")
479+
http_cache_dir = os.path.join(cache_dir, "http")
480+
metadata_dir = os.path.join(wheel_cache_dir, "metadata_only")
481+
482+
# Verify setup
483+
assert os.path.exists(selfcheck_path)
484+
assert os.path.exists(metadata_dir)
485+
assert os.path.exists(os.path.join(http_cache_dir, "empty1"))
486+
487+
result = script.pip("cache", "purge", "--verbose", allow_stderr_warning=True)
488+
489+
# Verify all cleanup happened
490+
assert not os.path.exists(selfcheck_path)
491+
assert "Removed legacy selfcheck.json file" in result.stdout
492+
assert not os.path.exists(metadata_dir)
493+
assert not os.path.exists(os.path.join(wheel_cache_dir, "completely_empty"))
494+
assert not os.path.exists(os.path.join(http_cache_dir, "empty1"))
495+
assert "Directories removed:" in result.stdout
496+
497+
# Verify directory count is positive
498+
dir_count = int(re.findall(r"Directories removed: (\d+)", result.stdout)[0])
499+
assert dir_count > 0
500+
501+
502+
def test_cache_purge_with_mixed_content(
503+
script: PipTestEnvironment,
504+
populate_wheel_cache: list[tuple[str, str]],
505+
wheel_cache_dir: str,
506+
) -> None:
507+
"""Test purge removes both wheel files and empty directories."""
508+
# Add an empty directory alongside the wheels
509+
empty_dir = os.path.join(wheel_cache_dir, "empty_subdir")
510+
os.makedirs(empty_dir)
511+
512+
result = script.pip("cache", "purge", "--verbose")
513+
514+
# Verify wheels and empty directory were removed
515+
for _name, filepath in populate_wheel_cache:
516+
assert not os.path.exists(filepath)
517+
assert not os.path.exists(empty_dir)
518+
519+
# Verify counts in output
520+
assert "Files removed:" in result.stdout
521+
assert "Directories removed:" in result.stdout
522+
files_removed = int(re.findall(r"Files removed: (\d+)", result.stdout)[0])
523+
assert files_removed == 4

0 commit comments

Comments
 (0)