From b85a0715f6d4855ae19c4e06616f45caf9904961 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Wed, 1 Apr 2026 17:58:50 -0500 Subject: [PATCH 1/6] S3: delete directories in batches --- readthedocs/storage/s3_storage.py | 37 ++++++++++++------------------- 1 file changed, 14 insertions(+), 23 deletions(-) diff --git a/readthedocs/storage/s3_storage.py b/readthedocs/storage/s3_storage.py index d1b710e5635..f10b03e419f 100644 --- a/readthedocs/storage/s3_storage.py +++ b/readthedocs/storage/s3_storage.py @@ -17,6 +17,7 @@ from django.core.exceptions import SuspiciousFileOperation from storages.backends.s3boto3 import S3Boto3Storage from storages.backends.s3boto3 import S3ManifestStaticStorage +from storages.utils import clean_name from readthedocs.storage.mixins import RTDBaseStorage from readthedocs.storage.rclone import RCloneS3Remote @@ -47,42 +48,32 @@ def _rclone(self): def join(self, directory, filepath): return safe_join(directory, filepath) - @staticmethod - def _dirpath(path): - """ - Make the path to end with `/`. - - It may just be Azure, but for listdir to work correctly, this is needed. - """ - path = str(path) - if not path.endswith("/"): - path += "/" - - return path - def delete_directory(self, path): """ Delete all files under a certain path from storage. Many storage backends (S3, Azure storage) don't care about "directories". The directory effectively doesn't exist if there are no files in it. - However, in these backends, there is no "rmdir" operation so you have to recursively - delete all files. + However, in these backends, there is no "rmdir" operation, + but boto3 allows batch actions over a prefix, so we can delete + all files with a certain prefix to achieve the same result + with fewer API calls than deleting each file one by one. :param path: the path to the directory to remove """ + # S3Boto3Storage does this normalization before interacting with + # the path in all its methods, following the same pattern here. + path = self._normalize_name(clean_name(path)) + + # The path needs to end with a slash. + if not path.endswith("/"): + path += "/" + if path in ("", "/"): raise SuspiciousFileOperation("Deleting all storage cannot be right") log.debug("Deleting path from media storage", path=path) - folders, files = self.listdir(self._dirpath(path)) - for folder_name in folders: - if folder_name: - # Recursively delete the subdirectory - self.delete_directory(self.join(path, folder_name)) - for filename in files: - if filename: - self.delete(self.join(path, filename)) + self.bucket.objects.filter(Prefix=path).delete() class S3BuildMediaStorage(OverrideHostnameMixin, RTDS3Storage): From 411d03e96e52ac95ead63945900ac33f86856d26 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 1 Apr 2026 23:23:26 +0000 Subject: [PATCH 2/6] Add tests for S3BuildMediaStorage.delete_directory Agent-Logs-Url: https://github.com/readthedocs/readthedocs.org/sessions/057a8084-523c-4b8e-8ec5-36a36d682330 Co-authored-by: stsewd <4975310+stsewd@users.noreply.github.com> --- readthedocs/storage/tests/test_s3_storage.py | 40 ++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 readthedocs/storage/tests/test_s3_storage.py diff --git a/readthedocs/storage/tests/test_s3_storage.py b/readthedocs/storage/tests/test_s3_storage.py new file mode 100644 index 00000000000..6fa18fb437e --- /dev/null +++ b/readthedocs/storage/tests/test_s3_storage.py @@ -0,0 +1,40 @@ +from unittest import mock + +import pytest +from django.core.exceptions import SuspiciousFileOperation +from django.test import TestCase, override_settings + + +@override_settings( + S3_MEDIA_STORAGE_BUCKET="readthedocs-test", +) +class TestS3BuildMediaStorageDeleteDirectory(TestCase): + def setUp(self): + from readthedocs.storage.s3_storage import S3BuildMediaStorage + + self.storage = S3BuildMediaStorage() + self.mock_bucket = mock.MagicMock() + # Override the internal _bucket attribute (used by the bucket property) + self.storage._bucket = self.mock_bucket + + def test_delete_directory(self): + self.storage.delete_directory("projects/my-project/en/latest/") + self.mock_bucket.objects.filter.assert_called_once_with( + Prefix="projects/my-project/en/latest/" + ) + self.mock_bucket.objects.filter.return_value.delete.assert_called_once() + + def test_delete_directory_adds_trailing_slash(self): + self.storage.delete_directory("projects/my-project/en/latest") + self.mock_bucket.objects.filter.assert_called_once_with( + Prefix="projects/my-project/en/latest/" + ) + self.mock_bucket.objects.filter.return_value.delete.assert_called_once() + + def test_delete_directory_raises_for_root_path(self): + with pytest.raises(SuspiciousFileOperation): + self.storage.delete_directory("/") + + def test_delete_directory_raises_for_empty_path(self): + with pytest.raises(SuspiciousFileOperation): + self.storage.delete_directory("") From 2d9e45add8df4b30f8e56f11ab807ebb0a33bb83 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Thu, 2 Apr 2026 12:09:46 -0500 Subject: [PATCH 3/6] Clean after copilot --- readthedocs/storage/tests/test_s3_storage.py | 33 +++++++++++--------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/readthedocs/storage/tests/test_s3_storage.py b/readthedocs/storage/tests/test_s3_storage.py index 6fa18fb437e..99e90183298 100644 --- a/readthedocs/storage/tests/test_s3_storage.py +++ b/readthedocs/storage/tests/test_s3_storage.py @@ -2,34 +2,39 @@ import pytest from django.core.exceptions import SuspiciousFileOperation -from django.test import TestCase, override_settings +from django.test import TestCase + +from readthedocs.storage.s3_storage import RTDS3Storage -@override_settings( - S3_MEDIA_STORAGE_BUCKET="readthedocs-test", -) class TestS3BuildMediaStorageDeleteDirectory(TestCase): - def setUp(self): - from readthedocs.storage.s3_storage import S3BuildMediaStorage + """ + Test custom overrides on S3 storage backends. - self.storage = S3BuildMediaStorage() - self.mock_bucket = mock.MagicMock() - # Override the internal _bucket attribute (used by the bucket property) - self.storage._bucket = self.mock_bucket + Since the overrides are in the RTDBaseStorage class, + we test using the S3BuildMediaStorage, which inherits from RTDBaseStorage and + """ + + def setUp(self): + self.storage = RTDS3Storage() def test_delete_directory(self): + mock_bucket = mock.MagicMock() + self.storage._bucket = mock_bucket self.storage.delete_directory("projects/my-project/en/latest/") - self.mock_bucket.objects.filter.assert_called_once_with( + mock_bucket.objects.filter.assert_called_once_with( Prefix="projects/my-project/en/latest/" ) - self.mock_bucket.objects.filter.return_value.delete.assert_called_once() + mock_bucket.objects.filter.return_value.delete.assert_called_once() def test_delete_directory_adds_trailing_slash(self): + mock_bucket = mock.MagicMock() + self.storage._bucket = mock_bucket self.storage.delete_directory("projects/my-project/en/latest") - self.mock_bucket.objects.filter.assert_called_once_with( + mock_bucket.objects.filter.assert_called_once_with( Prefix="projects/my-project/en/latest/" ) - self.mock_bucket.objects.filter.return_value.delete.assert_called_once() + mock_bucket.objects.filter.return_value.delete.assert_called_once() def test_delete_directory_raises_for_root_path(self): with pytest.raises(SuspiciousFileOperation): From f890c5d832bb6b3930f9f575dd3c44f81d592640 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Thu, 2 Apr 2026 12:11:13 -0500 Subject: [PATCH 4/6] We have a common class to test with --- readthedocs/storage/tests/test_s3_storage.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/readthedocs/storage/tests/test_s3_storage.py b/readthedocs/storage/tests/test_s3_storage.py index 99e90183298..8e1375ee68c 100644 --- a/readthedocs/storage/tests/test_s3_storage.py +++ b/readthedocs/storage/tests/test_s3_storage.py @@ -8,12 +8,6 @@ class TestS3BuildMediaStorageDeleteDirectory(TestCase): - """ - Test custom overrides on S3 storage backends. - - Since the overrides are in the RTDBaseStorage class, - we test using the S3BuildMediaStorage, which inherits from RTDBaseStorage and - """ def setUp(self): self.storage = RTDS3Storage() From ccec37dff01a97434acb0e7c91d9ea7b72bde688 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Tue, 7 Apr 2026 17:36:54 -0500 Subject: [PATCH 5/6] Updates from review --- readthedocs/storage/s3_storage.py | 5 +++-- readthedocs/storage/tests/test_s3_storage.py | 12 +++++++++++- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/readthedocs/storage/s3_storage.py b/readthedocs/storage/s3_storage.py index f10b03e419f..5b3f3f38de6 100644 --- a/readthedocs/storage/s3_storage.py +++ b/readthedocs/storage/s3_storage.py @@ -69,10 +69,11 @@ def delete_directory(self, path): if not path.endswith("/"): path += "/" - if path in ("", "/"): + location = self.location.rstrip("/") + "/" + if path == location: raise SuspiciousFileOperation("Deleting all storage cannot be right") - log.debug("Deleting path from media storage", path=path) + log.debug("Deleting path from storage", path=path) self.bucket.objects.filter(Prefix=path).delete() diff --git a/readthedocs/storage/tests/test_s3_storage.py b/readthedocs/storage/tests/test_s3_storage.py index 8e1375ee68c..6cf2504caf2 100644 --- a/readthedocs/storage/tests/test_s3_storage.py +++ b/readthedocs/storage/tests/test_s3_storage.py @@ -7,7 +7,7 @@ from readthedocs.storage.s3_storage import RTDS3Storage -class TestS3BuildMediaStorageDeleteDirectory(TestCase): +class TestRTDS3Storage(TestCase): def setUp(self): self.storage = RTDS3Storage() @@ -37,3 +37,13 @@ def test_delete_directory_raises_for_root_path(self): def test_delete_directory_raises_for_empty_path(self): with pytest.raises(SuspiciousFileOperation): self.storage.delete_directory("") + + def test_delete_directory_raises_for_root_path_with_location(self): + self.storage.location = "projects" + with pytest.raises(SuspiciousFileOperation): + self.storage.delete_directory("/") + + def test_delete_directory_raises_for_empty_path_with_location(self): + self.storage.location = "projects" + with pytest.raises(SuspiciousFileOperation): + self.storage.delete_directory("") From 85d36030f22a56af073fabb5309d5e19e34b0e18 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Tue, 7 Apr 2026 19:01:57 -0500 Subject: [PATCH 6/6] Fix test --- readthedocs/storage/tests/test_s3_storage.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/readthedocs/storage/tests/test_s3_storage.py b/readthedocs/storage/tests/test_s3_storage.py index 6cf2504caf2..42a80e7eaf5 100644 --- a/readthedocs/storage/tests/test_s3_storage.py +++ b/readthedocs/storage/tests/test_s3_storage.py @@ -1,7 +1,7 @@ from unittest import mock import pytest -from django.core.exceptions import SuspiciousFileOperation +from django.core.exceptions import SuspiciousFileOperation, SuspiciousOperation from django.test import TestCase from readthedocs.storage.s3_storage import RTDS3Storage @@ -40,7 +40,7 @@ def test_delete_directory_raises_for_empty_path(self): def test_delete_directory_raises_for_root_path_with_location(self): self.storage.location = "projects" - with pytest.raises(SuspiciousFileOperation): + with pytest.raises(SuspiciousOperation): self.storage.delete_directory("/") def test_delete_directory_raises_for_empty_path_with_location(self):