Skip to content

Commit 9d945bf

Browse files
Alexandr Bogomyakovarodrime
authored andcommitted
Fixing gsutil upload/download. Divide file list into chunks.
1 parent 6802d29 commit 9d945bf

File tree

4 files changed

+20
-5
lines changed

4 files changed

+20
-5
lines changed

medusa/backup.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,8 @@
3131
from medusa.index import add_backup_start_to_index, add_backup_finish_to_index, set_latest_backup_in_index
3232
from medusa.monitoring import Monitoring
3333
from medusa.storage.s3_storage import is_aws_s3
34-
from medusa.storage import Storage, format_bytes_str, ManifestObject
34+
from medusa.storage.google_storage import GSUTIL_MAX_FILES_PER_CHUNK
35+
from medusa.storage import Storage, format_bytes_str, ManifestObject, divide_chunks
3536

3637

3738
class NodeBackupCache(object):
@@ -322,7 +323,11 @@ def backup_snapshots(storage, manifest, node_backup, node_backup_cache, snapshot
322323

323324
manifest_objects = list()
324325
if len(needs_backup) > 0:
325-
manifest_objects = storage.storage_driver.upload_blobs(needs_backup, dst_path)
326+
# If there is a plenty of files to upload it should be
327+
# splitted to batches due to 'gsutil cp' which
328+
# can't handle too much source files via STDIN.
329+
for src_batch in divide_chunks(needs_backup, GSUTIL_MAX_FILES_PER_CHUNK):
330+
manifest_objects += storage.storage_driver.upload_blobs(src_batch, dst_path)
326331

327332
# Reintroducing already backed up objects in the manifest in differential
328333
for obj in already_backed_up:

medusa/download.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@
1818
import pathlib
1919
import sys
2020

21-
from medusa.storage import Storage
21+
from medusa.storage import Storage, divide_chunks
22+
from medusa.storage.google_storage import GSUTIL_MAX_FILES_PER_CHUNK
2223

2324

2425
def download_data(storageconfig, backup, fqtns_to_restore, destination):
@@ -43,8 +44,9 @@ def download_data(storageconfig, backup, fqtns_to_restore, destination):
4344
subfolder.mkdir(parents=False)
4445

4546
if len(srcs) > 0 and (len(fqtns_to_restore) == 0 or fqtn in fqtns_to_restore):
46-
logging.info('Downloading backup data')
47-
storage.storage_driver.download_blobs(srcs, dst)
47+
logging.debug('Downloading %s files to %s', len(srcs), dst)
48+
for src_batch in divide_chunks(srcs, GSUTIL_MAX_FILES_PER_CHUNK):
49+
storage.storage_driver.download_blobs(src_batch, dst)
4850
elif len(srcs) == 0 and (len(fqtns_to_restore) == 0 or fqtn in fqtns_to_restore):
4951
logging.debug('There is nothing to download for {}'.format(fqtn))
5052
else:

medusa/storage/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,11 @@
4444
INDEX_BLOB_WITH_TIMESTAMP_PATTERN = re.compile('.*(started|finished)_(.*)_([0-9]+).timestamp$')
4545

4646

47+
def divide_chunks(values, step):
48+
for i in range(0, len(values), step):
49+
yield values[i:i + step]
50+
51+
4752
def format_bytes_str(value):
4853
for unit_shift, unit in enumerate(['B', 'KB', 'MB', 'GB', 'TB']):
4954
if value >> (unit_shift * 10) < 1024:

medusa/storage/google_storage.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,9 @@
3030
from medusa.storage.google_cloud_storage.gsutil import GSUtil
3131

3232

33+
GSUTIL_MAX_FILES_PER_CHUNK = 64
34+
35+
3336
class GoogleStorage(AbstractStorage):
3437

3538
def connect_storage(self):

0 commit comments

Comments
 (0)