Skip to content

Commit 83d1d17

Browse files
authored
Add S3OptimizedUploadStorage (#128)
1 parent 393647f commit 83d1d17

File tree

3 files changed

+139
-0
lines changed

3 files changed

+139
-0
lines changed

README.rst

+20
Original file line numberDiff line numberDiff line change
@@ -195,3 +195,23 @@ uploaded to AWS S3 directly and not to your Django application server.
195195
:target: https://codecov.io/gh/codingjoe/django-s3file
196196
.. |GitHub license| image:: https://img.shields.io/badge/license-MIT-blue.svg
197197
:target: https://raw.githubusercontent.com/codingjoe/django-s3file/master/LICENSE
198+
199+
Using optimized S3Boto3Storage
200+
------------------------------
201+
202+
Since ``S3Boto3Storage`` supports storing data from any other fileobj,
203+
it uses a generalized ``_save`` function. This leads to the frontend uploading
204+
the file to S3 and then copying it byte-by-byte to perform a move operation just
205+
to rename the uploaded object. For large files this leads to additional loading
206+
times for the user.
207+
208+
That's why S3File provides an optimized version of this method at
209+
``storages_optimized.S3OptimizedUploadStorage``. It uses the more efficient
210+
``copy`` method from S3, given that we know that we only copy from one S3 location to another.
211+
212+
.. code:: python
213+
214+
from s3file.storages_optimized import S3OptimizedUploadStorage
215+
216+
class MyStorage(S3OptimizedUploadStorage): # Subclass and use like any other storage
217+
default_acl = 'private'

s3file/storages_optimized.py

+43
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
from storages.backends.s3boto3 import S3Boto3Storage
2+
3+
4+
class S3OptimizedUploadStorage(S3Boto3Storage):
5+
"""
6+
Class for an optimized S3 storage.
7+
8+
This storage prevents unnecessary operation to copy with the general ``upload_fileobj``
9+
command when the object already is a S3 object where the faster copy command can be used.
10+
11+
The assumption is that ``content`` contains a S3 object from which we can copy.
12+
13+
See also discussion here: https://github.com/codingjoe/django-s3file/discussions/126
14+
"""
15+
16+
def _save(self, name, content):
17+
# Basically copy the implementation of _save of S3Boto3Storage
18+
# and replace the obj.upload_fileobj with a copy function
19+
cleaned_name = self._clean_name(name)
20+
name = self._normalize_name(cleaned_name)
21+
params = self._get_write_parameters(name, content)
22+
23+
if (
24+
self.gzip
25+
and params["ContentType"] in self.gzip_content_types
26+
and "ContentEncoding" not in params
27+
):
28+
content = self._compress_content(content)
29+
params["ContentEncoding"] = "gzip"
30+
31+
obj = self.bucket.Object(name)
32+
# content.seek(0, os.SEEK_SET) # Disable unnecessary seek operation
33+
# obj.upload_fileobj(content, ExtraArgs=params) # Disable upload function
34+
35+
if not hasattr(content, "obj") or not hasattr(content.obj, "key"):
36+
raise TypeError(
37+
"The content object must be a S3 object and contain a valid key."
38+
)
39+
40+
# Copy the file instead uf uploading
41+
obj.copy({"Bucket": self.bucket.name, "Key": content.obj.key}, ExtraArgs=params)
42+
43+
return cleaned_name

tests/test_storages.py

+76
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
import pytest
2+
from django.core.files.base import ContentFile
3+
4+
from s3file.storages_optimized import S3OptimizedUploadStorage
5+
6+
7+
class S3OptimizedMockStorage(S3OptimizedUploadStorage):
8+
created_objects = {}
9+
10+
def _compress_content(self, content):
11+
return content
12+
13+
class bucket:
14+
name = "test-bucket"
15+
16+
class Object:
17+
def __init__(self, key):
18+
self.key = key
19+
self.copy_from_bucket = None
20+
self.copy_from_key = None
21+
S3OptimizedMockStorage.created_objects[self.key] = self
22+
23+
def copy(self, s3_object, ExtraArgs):
24+
self.copy_from_bucket = s3_object["Bucket"]
25+
self.copy_from_key = s3_object["Key"]
26+
27+
28+
class TestStorages:
29+
url = "/__s3_mock__/"
30+
31+
def test_post__save_optimized(self):
32+
storage = S3OptimizedMockStorage()
33+
obj = storage.bucket.Object("tmp/s3file/s3_file.txt")
34+
35+
class Content:
36+
def __init__(self, obj):
37+
self.obj = obj
38+
39+
content = Content(obj)
40+
key = storage._save("tmp/s3file/s3_file_copied.txt", content)
41+
stored_object = storage.created_objects[
42+
"custom/location/tmp/s3file/s3_file_copied.txt"
43+
]
44+
45+
assert key == "tmp/s3file/s3_file_copied.txt"
46+
assert stored_object.copy_from_bucket == storage.bucket.name
47+
assert stored_object.copy_from_key == "tmp/s3file/s3_file.txt"
48+
49+
def test_post__save_optimized_gzip(self):
50+
storage = S3OptimizedMockStorage()
51+
obj = storage.bucket.Object("tmp/s3file/s3_file.css")
52+
storage.gzip = True
53+
54+
class Content:
55+
def __init__(self, obj):
56+
self.obj = obj
57+
58+
content = Content(obj)
59+
key = storage._save("tmp/s3file/s3_file_copied.css", content)
60+
stored_object = storage.created_objects[
61+
"custom/location/tmp/s3file/s3_file_copied.css"
62+
]
63+
64+
assert key == "tmp/s3file/s3_file_copied.css"
65+
assert stored_object.copy_from_bucket == storage.bucket.name
66+
assert stored_object.copy_from_key == "tmp/s3file/s3_file.css"
67+
68+
def test_post__save_optimized_fail(self):
69+
storage = S3OptimizedMockStorage()
70+
71+
with pytest.raises(TypeError) as excinfo:
72+
storage._save("tmp/s3file/s3_file_copied.txt", ContentFile(b"s3file"))
73+
74+
assert "The content object must be a S3 object and contain a valid key." in str(
75+
excinfo.value
76+
)

0 commit comments

Comments
 (0)