Skip to content

Commit e3ff3ca

Browse files
committed
Add a convert option to zip the AM transfers
Fixes #13. - Add a `--zip` / `-z` option flag to the convert command that zips the AM transfers after they are created - Update the `make_read_only()` method so it can accept a file path or a directory path - Update check for an existing transfer to look for a zip file or a directory
1 parent 5dfd309 commit e3ff3ca

3 files changed

Lines changed: 132 additions & 49 deletions

File tree

vandockit/__main__.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -100,10 +100,10 @@ def _validate(path):
100100
return (validator.get_summary_msg(), validator.has_errors())
101101

102102

103-
def _convert(source_path, dest_path):
103+
def _convert(source_path, dest_path, **kwargs):
104104
try:
105105
converter = PackageConverter(source_path)
106-
converter.convert(dest_path)
106+
converter.convert(dest_path, **kwargs)
107107
except Exception as exception:
108108
# Log and re-raise exceptions
109109
logging.critical(exception)
@@ -145,7 +145,14 @@ def validate(path):
145145
"source_path", type=click.Path(exists=True, file_okay=False, readable=True)
146146
)
147147
@click.argument("dest_path", type=click.Path())
148-
def convert(source_path, dest_path):
148+
@click.option(
149+
"--zip",
150+
"-z",
151+
"zip_transfers",
152+
is_flag=True,
153+
help="Zip each AM standard transfer directory after conversion",
154+
)
155+
def convert(source_path, dest_path, zip_transfers):
149156
"""
150157
Convert the VanDocs transfer package at SOURCE_PATH to one Archivematica
151158
standard transfer directory per container in DEST_PATH. Validates the
@@ -160,7 +167,9 @@ def convert(source_path, dest_path):
160167
(summary_msg, has_errors) = _validate(source_path)
161168

162169
if not has_errors:
163-
(summary_msg, has_errors) = _convert(source_path, dest_path)
170+
(summary_msg, has_errors) = _convert(
171+
source_path, dest_path, zip=zip_transfers
172+
)
164173

165174
_print_summary(summary_msg, has_errors)
166175

vandockit/converters.py

Lines changed: 87 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -135,19 +135,20 @@ def create_subdirs(self, path, subdirs):
135135

136136
return path
137137

138-
def make_read_only(self, target_dir):
139-
"""Recursively remove write access from directories and files in
140-
directory dir to prevent modification"""
141-
142-
for item in target_dir.iterdir():
143-
if item.is_dir():
144-
# Recurse into sub-directories
138+
def make_read_only(self, path):
139+
"""
140+
Remove write permissions from the file or directory at path to prevent
141+
modification. If path is a directory, recursively remove write
142+
permissions from all of its contents.
143+
"""
144+
145+
if path.is_dir():
146+
for item in path.iterdir():
145147
self.make_read_only(item)
146-
else:
147-
item.chmod(0o444)
148148

149-
# Make target_dir itself read-only
150-
target_dir.chmod(0o555)
149+
path.chmod(0o555)
150+
else:
151+
path.chmod(0o444)
151152

152153

153154
class PackageConverter(BaseConverter):
@@ -198,12 +199,12 @@ def get_summary_msg(self):
198199

199200
return msg
200201

201-
def convert(self, dest_path):
202+
def convert(self, dest_path, **kwargs):
202203
self.timer["start"] = time.time()
203204
dest_path = Path(dest_path)
204205

205206
for container in self.get_containers():
206-
container.write_am_std_transfer(dest_path)
207+
container.write_am_std_transfer(dest_path, **kwargs)
207208

208209
if container.has_errors():
209210
self.errors += container.errors
@@ -223,25 +224,26 @@ def get_log_prefix(self):
223224
def get_am_transfer_name(self):
224225
return f"{self.parent.get_transfer_number()}_{self.name}"
225226

227+
def transfer_exists(self, dest_path):
228+
"""Check if a transfer directory or zip file with the same name already
229+
exists in the destination path."""
230+
231+
transfer_name = self.get_am_transfer_name()
232+
233+
if (dest_path / transfer_name).exists() or (
234+
dest_path / f"{transfer_name}.zip"
235+
).exists():
236+
return True
237+
238+
return False
239+
226240
def create_am_transfer_dir(self, dest_path):
227241
"""Create an Archivematica transfer directory using the name format
228242
[transfer_number]_[container_name]"""
229243

230244
am_transfer_name = self.get_am_transfer_name()
231245
am_transfer_dir = dest_path / am_transfer_name
232246

233-
if am_transfer_dir.exists():
234-
msg = (
235-
f'Transfer "{am_transfer_name}" already exists. Please move or'
236-
+ " delete the existing transfer directory to create a new"
237-
+ " transfer."
238-
)
239-
240-
self.errors += 1
241-
logging.error("\n".join(msg).format(am_transfer_name))
242-
243-
return
244-
245247
self.create_subdirs(dest_path, am_transfer_name)
246248

247249
logging.info(
@@ -427,25 +429,73 @@ def copy_desc_md_files(self, am_transfer_dir):
427429

428430
self.copy_files(dmd_files, subdoc_dir)
429431

430-
def write_am_std_transfer(self, dest_path):
431-
am_transfer_dir = self.create_am_transfer_dir(dest_path)
432+
def zip_dir(self, path):
433+
"""
434+
Zip the directory at path and return the zip file's path. The original
435+
directory is deleted after the zip file is created.
436+
"""
437+
438+
try:
439+
zip_path = shutil.make_archive(
440+
path,
441+
"zip",
442+
path.parent,
443+
path.name,
444+
)
445+
except OSError:
446+
logging.critical(
447+
self.get_log_prefix()
448+
+ f"Couldn't create zip file '{path.name}.zip' from dir"
449+
f" '{path}'"
450+
)
451+
452+
# Halt script
453+
raise
454+
455+
# Delete the unzipped transfer directory after creating the zip file
456+
try:
457+
shutil.rmtree(path)
458+
except OSError:
459+
logging.critical(
460+
self.get_log_prefix() + f"Couldn't delete '{path}'"
461+
)
462+
463+
# Halt script
464+
raise
465+
466+
return Path(zip_path)
467+
468+
def write_am_std_transfer(self, dest_path, **kwargs):
469+
# Skip this container if the transfer already exists
470+
if self.transfer_exists(dest_path):
471+
self.errors += 1
472+
logging.error(
473+
f'Transfer "{self.get_am_transfer_name()}" already exists.'
474+
+ " Please move or delete the existing transfer to create a"
475+
+ " new transfer."
476+
)
432477

433-
# Skip this container if the target am_transfer_dir already exists
434-
if not am_transfer_dir:
435478
return
436479

437-
self.copy_submission_docs(am_transfer_dir)
438-
self.write_location_file(am_transfer_dir)
439-
self.write_am_checksum_file(am_transfer_dir)
480+
transfer_path = self.create_am_transfer_dir(dest_path)
481+
482+
self.copy_submission_docs(transfer_path)
483+
self.write_location_file(transfer_path)
484+
self.write_am_checksum_file(transfer_path)
440485

441486
# 2022-03-10: At CVA's request disable the creation of metadata.csv
442487
# because the current VanDocs data doesn't map accurately to Dublin
443488
# Core
444489
#
445-
# self.write_am_metadata(am_transfer_dir)
490+
# self.write_am_metadata(transfer_path)
446491

447-
self.copy_preservation_objects(am_transfer_dir)
448-
self.copy_desc_md_files(am_transfer_dir)
449-
self.make_read_only(am_transfer_dir)
492+
self.copy_preservation_objects(transfer_path)
493+
self.copy_desc_md_files(transfer_path)
450494

451-
return am_transfer_dir
495+
# Zip the transfer directory if the --zip option was specified.
496+
if kwargs.get("zip", False):
497+
transfer_path = self.zip_dir(transfer_path)
498+
499+
self.make_read_only(transfer_path)
500+
501+
return transfer_path

vandockit/tests/test_converters.py

Lines changed: 32 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
import csv
1919
import xml.etree.ElementTree as ET
20+
from pathlib import Path
2021

2122
import pytest
2223

@@ -161,15 +162,14 @@ def test_create_subdirs_os_error(self, dest_dir, vd_base_converter):
161162
with pytest.raises(OSError):
162163
vd_base_converter.create_subdirs(dest_dir, "spam")
163164

164-
def test_make_read_only(self, dest_dir, vd_base_converter):
165+
def test_make_read_only_dir(self, dest_dir, vd_base_converter):
165166
test_dir = dest_dir / "a_dir"
166167
test_dir.mkdir()
167168
test_subdir = test_dir / "a_subdir"
168169
test_subdir.mkdir()
169170
test_file = test_dir / "a_file.txt"
170171
test_file.touch()
171172

172-
# Remove write permissions for directory and files
173173
vd_base_converter.make_read_only(test_dir)
174174

175175
# Can't add a new file
@@ -181,6 +181,15 @@ def test_make_read_only(self, dest_dir, vd_base_converter):
181181
with pytest.raises(PermissionError):
182182
test_file.write_text("foo")
183183

184+
def test_make_read_only_file(self, dest_dir, vd_base_converter):
185+
test_file = dest_dir / "a_file.txt"
186+
test_file.touch()
187+
188+
vd_base_converter.make_read_only(test_file)
189+
190+
with pytest.raises(PermissionError):
191+
test_file.write_text("foo")
192+
184193

185194
class TestPackageConverter:
186195
def test_get_submission_docs(self, vd_package_converter):
@@ -285,25 +294,27 @@ def test_get_am_transfer_name(
285294

286295
assert check_name == vd_container_converter.get_am_transfer_name()
287296

288-
def test_create_am_transfer_dir(
297+
def test_transfer_exists(
289298
self, dest_dir, vd_container_converter, test_container_data
290299
):
291-
vd_container_converter.create_am_transfer_dir(dest_dir)
300+
assert vd_container_converter.transfer_exists(dest_dir) is False
301+
292302
check_path = dest_dir / "{}_{}".format(
293303
test_container_data["transfer_number"], test_container_data["name"]
294304
)
305+
check_path.mkdir()
295306

296-
assert check_path.exists() and check_path.is_dir()
307+
assert vd_container_converter.transfer_exists(dest_dir) is True
297308

298-
def test_create_am_transfer_dir_already_exists(
309+
def test_create_am_transfer_dir(
299310
self, dest_dir, vd_container_converter, test_container_data
300311
):
312+
vd_container_converter.create_am_transfer_dir(dest_dir)
301313
check_path = dest_dir / "{}_{}".format(
302314
test_container_data["transfer_number"], test_container_data["name"]
303315
)
304-
check_path.mkdir()
305316

306-
assert vd_container_converter.create_am_transfer_dir(dest_dir) is None
317+
assert check_path.exists() and check_path.is_dir()
307318

308319
def test_copy_submission_docs(
309320
self,
@@ -471,3 +482,16 @@ def test_copy_desc_md_files(
471482
assert {
472483
doc["md_filename"] for doc in test_container_data["documents"]
473484
} == {i.name for i in sd_dir.glob("*_Metadata.xml")}
485+
486+
def test_zip_dir(
487+
self,
488+
dest_dir,
489+
vd_container_converter,
490+
):
491+
path = vd_container_converter.create_am_transfer_dir(dest_dir)
492+
zip_path = vd_container_converter.zip_dir(path)
493+
494+
assert zip_path == Path(f"{path}.zip")
495+
assert zip_path.is_file()
496+
assert zip_path.stat().st_size > 0
497+
assert not path.exists()

0 commit comments

Comments
 (0)