Skip to content

Commit f955568

Browse files
committed
Actually use the ACLs with S3 + Add (half of) ITs for ACLs
1 parent e1664c1 commit f955568

File tree

3 files changed

+49
-29
lines changed

3 files changed

+49
-29
lines changed

medusa/storage/s3_base_storage.py

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ def __init__(self, config):
116116

117117
self.connection_extra_args = self._make_connection_arguments(config)
118118
self.transfer_config = self._make_transfer_config(config)
119+
self.canned_acl = config.canned_acl
119120

120121
self.executor = concurrent.futures.ThreadPoolExecutor(int(config.concurrent_transfers))
121122

@@ -259,14 +260,18 @@ async def _list_blobs(self, prefix=None) -> t.List[AbstractBlob]:
259260
@retry(stop_max_attempt_number=MAX_UP_DOWN_LOAD_RETRIES, wait_fixed=5000)
260261
async def _upload_object(self, data: io.BytesIO, object_key: str, headers: t.Dict[str, str]) -> AbstractBlob:
261262

262-
kms_args = {}
263+
extra_args = {}
263264
if self.kms_id is not None:
264-
kms_args['ServerSideEncryption'] = 'aws:kms'
265-
kms_args['SSEKMSKeyId'] = self.kms_id
265+
extra_args['ServerSideEncryption'] = 'aws:kms'
266+
extra_args['SSEKMSKeyId'] = self.kms_id
266267

267268
storage_class = self.get_storage_class()
268269
if storage_class is not None:
269-
kms_args['StorageClass'] = storage_class
270+
extra_args['StorageClass'] = storage_class
271+
272+
# doing this to a bucket w/o ACLS enabled causes AccessControlListNotSupported error
273+
if self.canned_acl is not None:
274+
extra_args['ACL'] = self.canned_acl
270275

271276
logging.debug(
272277
'[S3 Storage] Uploading object from stream -> s3://{}/{}'.format(
@@ -281,7 +286,7 @@ async def _upload_object(self, data: io.BytesIO, object_key: str, headers: t.Dic
281286
Bucket=self.bucket_name,
282287
Key=object_key,
283288
Body=data,
284-
**kms_args,
289+
**extra_args,
285290
)
286291
except Exception as e:
287292
logging.error(e)
@@ -352,14 +357,16 @@ async def _upload_blob(self, src: str, dest: str) -> ManifestObject:
352357
# check if objects resides in a sub-folder (e.g. secondary index). if it does, use the sub-folder in object path
353358
object_key = AbstractStorage.path_maybe_with_parent(dest, src_path)
354359

355-
kms_args = {}
360+
extra_args = {}
356361
if self.kms_id is not None:
357-
kms_args['ServerSideEncryption'] = 'aws:kms'
358-
kms_args['SSEKMSKeyId'] = self.kms_id
362+
extra_args['ServerSideEncryption'] = 'aws:kms'
363+
extra_args['SSEKMSKeyId'] = self.kms_id
359364

360365
storage_class = self.get_storage_class()
361366
if storage_class is not None:
362-
kms_args['StorageClass'] = storage_class
367+
extra_args['StorageClass'] = storage_class
368+
if self.canned_acl is not None:
369+
extra_args['ACL'] = self.canned_acl
363370

364371
file_size = os.stat(src).st_size
365372
logging.debug(
@@ -373,7 +380,7 @@ async def _upload_blob(self, src: str, dest: str) -> ManifestObject:
373380
'Bucket': self.bucket_name,
374381
'Key': object_key,
375382
'Config': self.transfer_config,
376-
'ExtraArgs': kms_args,
383+
'ExtraArgs': extra_args,
377384
}
378385
# we are going to combine asyncio with boto's threading
379386
# we do this by submitting the upload into an executor

tests/integration/features/integration_tests.feature

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1151,36 +1151,37 @@ Feature: Integration tests
11511151
Scenario Outline: Perform a differential backup with explicit storage class, then verify it
11521152
Given I have a fresh ccm cluster "<client encryption>" running named "scenario32"
11531153
Given I will use "<storage class>" as storage class in the storage
1154+
Given I will use "<canned ACL>" canned ACL when uploading objects
11541155
Given I am using "<storage>" as storage provider in ccm cluster "<client encryption>" with gRPC server
11551156
When I create the "test" table with secondary index in keyspace "medusa"
11561157
When I load 100 rows in the "medusa.test" table
11571158
When I run a "ccm node1 nodetool -- -Dcom.sun.jndi.rmiURLParsing=legacy flush" command
11581159
When I perform a backup in "differential" mode of the node named "first_backup" with md5 checks "disabled"
11591160
Then I can see the backup named "first_backup" when I list the backups
11601161
Then I can verify the backup named "first_backup" with md5 checks "disabled" successfully
1161-
Then I can see 2 SSTables with "<storage class>" in the SSTable pool for the "test" table in keyspace "medusa"
1162+
Then I can see 2 SSTables with "<storage class>" and "<canned ACL>" in the SSTable pool for the "test" table in keyspace "medusa"
11621163

11631164
@s3
11641165
Examples: S3 storage
1165-
| storage | client encryption | storage class |
1166-
| s3_us_west_oregon | without_client_encryption | STANDARD |
1167-
| s3_us_west_oregon | without_client_encryption | REDUCED_REDUNDANCY |
1168-
| s3_us_west_oregon | without_client_encryption | STANDARD_IA |
1169-
| s3_us_west_oregon | without_client_encryption | ONEZONE_IA |
1170-
| s3_us_west_oregon | without_client_encryption | INTELLIGENT_TIERING |
1166+
| storage | client encryption | storage class | canned ACL |
1167+
| s3_us_west_oregon | without_client_encryption | STANDARD | bucket-owner-read |
1168+
| s3_us_west_oregon | without_client_encryption | REDUCED_REDUNDANCY | bucket-owner-read |
1169+
| s3_us_west_oregon | without_client_encryption | STANDARD_IA | bucket-owner-read |
1170+
| s3_us_west_oregon | without_client_encryption | ONEZONE_IA | bucket-owner-read |
1171+
| s3_us_west_oregon | without_client_encryption | INTELLIGENT_TIERING | bucket-owner-read |
11711172

11721173
@gcs
11731174
Examples: Google Cloud Storage
1174-
| storage | client encryption | storage class |
1175-
| google_storage | without_client_encryption | STANDARD |
1175+
| storage | client encryption | storage class | canned ACL |
1176+
| google_storage | without_client_encryption | STANDARD | None |
11761177
# this is buggy for now, the library does not propagate the custom storage class headers
1177-
# | google_storage | without_client_encryption | NEARLINE |
1178-
# | google_storage | without_client_encryption | COLDLINE |
1179-
# | google_storage | without_client_encryption | ARCHIVE |
1178+
# | google_storage | without_client_encryption | NEARLINE | None |
1179+
# | google_storage | without_client_encryption | COLDLINE | None |
1180+
# | google_storage | without_client_encryption | ARCHIVE | None |
11801181

11811182
@azure
11821183
Examples: Azure Blob Storage
1183-
| storage | client encryption | storage class |
1184-
| azure_blobs | without_client_encryption | HOT |
1185-
| azure_blobs | without_client_encryption | COOL |
1186-
| azure_blobs | without_client_encryption | COLD |
1184+
| storage | client encryption | storage class | canned ACL |
1185+
| azure_blobs | without_client_encryption | HOT | None |
1186+
| azure_blobs | without_client_encryption | COOL | None |
1187+
| azure_blobs | without_client_encryption | COLD | None |

tests/integration/features/steps/integration_steps.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -395,6 +395,11 @@ def i_will_use_storage_class(context, storage_class):
395395
context.storage_class = storage_class
396396

397397

398+
@given(r'I will use "{canned_acl}" canned ACL when uploading objects')
399+
def i_will_use_canned_acl(context, canned_acl):
400+
context.canned_acl = canned_acl
401+
402+
398403
@given(r'I am using "{storage_provider}" as storage provider in ccm cluster "{client_encryption}"')
399404
def i_am_using_storage_provider(context, storage_provider, client_encryption):
400405
context.storage_provider = storage_provider
@@ -538,6 +543,8 @@ def get_args(context, storage_provider, client_encryption, cassandra_url, use_mg
538543
storage_args = {"prefix": storage_prefix}
539544
if hasattr(context, "storage_class"):
540545
storage_args.update({"storage_class": context.storage_class})
546+
if hasattr(context, "canned_acl"):
547+
storage_args.update({"canned_acl": context.canned_acl})
541548

542549
cassandra_args = {
543550
"is_ccm": str(is_ccm),
@@ -1280,11 +1287,10 @@ def _i_can_see_nb_sstables_in_the_sstable_pool(
12801287
_i_can_see_nb_sstables_with_storage_class_in_the_sstable_pool(context, nb_sstables, None, table_name, keyspace)
12811288

12821289

1283-
# Then I can see 2 SSTables with "<storage class>" in the SSTable pool for the "test" table in keyspace "medusa"
1284-
@then(r'I can see {nb_sstables} SSTables with "{storage_class}" in the SSTable pool '
1290+
@then(r'I can see {nb_sstables} SSTables with "{storage_class}" and "{canned_acl}" in the SSTable pool '
12851291
r'for the "{table_name}" table in keyspace "{keyspace}"')
12861292
def _i_can_see_nb_sstables_with_storage_class_in_the_sstable_pool(
1287-
context, nb_sstables, storage_class, table_name, keyspace
1293+
context, nb_sstables, storage_class, canned_acl, table_name, keyspace
12881294
):
12891295
with Storage(config=context.medusa_config.storage) as storage:
12901296
path = os.path.join(
@@ -1302,6 +1308,12 @@ def _i_can_see_nb_sstables_with_storage_class_in_the_sstable_pool(
13021308
logging.info(f'{storage_class.upper()} vs {sstable.storage_class.upper()}')
13031309
assert storage_class.upper() == sstable.storage_class.upper()
13041310

1311+
# to make checking ACLs work, we'd need to make the driver call
1312+
# response = s3.get_object_acl(Bucket=bucket_name, Key=object_key)
1313+
# but that assumes we have a bucket with ACLs enabled
1314+
if canned_acl is not None:
1315+
pass
1316+
13051317

13061318
@then(
13071319
r'backup named "{backup_name}" has {nb_files} files '

0 commit comments

Comments
 (0)