From 084aff93c50ef39e4876cebf0b53d61b59d315bb Mon Sep 17 00:00:00 2001 From: Vincent Barbaresi Date: Mon, 12 Oct 2020 16:25:20 +0200 Subject: [PATCH 1/2] AzureBlobTarget: use file basename for the temp download_file_location When the target blob is inside a path (for instance "path/to/target") and we use `download_when_reading=True`, the directory layout doesn't exist and it fails with: ``` File "src/luigi/luigi/contrib/azureblob.py", line 194, in __enter__ self.client.download_as_file(self.container, self.blob, self.download_file_location) File "src/luigi/luigi/contrib/azureblob.py", line 101, in download_as_file return self.connection.get_blob_to_path(container, blob, location) File "src/luigi/venv/lib/python3.6/site-packages/azure/storage/blob/baseblobservice.py", line 1765, in get_blob_to_path with open(file_path, open_mode) as stream: FileNotFoundError: [Errno 2] No such file or directory: '/var/folders/7q/l5knvjqx3pg569hwsdrzjw480000gn/T/2020-10-12 14:20:01.950869689rh5q2/path/to/movie-cheesy.txt' ``` Use `os.path.basename(blob)` to keep only the blob name as file name instead of the full path (it's written in a temporary directory anyway) --- luigi/contrib/azureblob.py | 3 ++- test/contrib/azureblob_test.py | 11 ++++++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/luigi/contrib/azureblob.py b/luigi/contrib/azureblob.py index 20de24224a..60a2f88569 100644 --- a/luigi/contrib/azureblob.py +++ b/luigi/contrib/azureblob.py @@ -179,7 +179,8 @@ def __init__(self, container, blob, client, download_when_reading, **kwargs): self.closed = False self.download_when_reading = download_when_reading self.azure_blob_options = kwargs - self.download_file_location = os.path.join(tempfile.mkdtemp(prefix=str(datetime.datetime.utcnow())), blob) + self.download_file_location = os.path.join(tempfile.mkdtemp(prefix=str(datetime.datetime.utcnow())), + os.path.basename(blob)) self.fid = None def read(self, n=None): diff --git a/test/contrib/azureblob_test.py b/test/contrib/azureblob_test.py index 2c06086de0..8c894a8e80 100644 --- a/test/contrib/azureblob_test.py +++ b/test/contrib/azureblob_test.py @@ -117,7 +117,7 @@ def test_upload_copy_move_remove_blob(self): class MovieScriptTask(luigi.Task): def output(self): - return AzureBlobTarget("luigi-test", "movie-cheesy.txt", client, download_when_reading=False) + return AzureBlobTarget("luigi-test", "path/to/movie-cheesy.txt", client, download_when_reading=False) def run(self): client.connection.create_container("luigi-test") @@ -170,3 +170,12 @@ def tearDown(self): def test_AzureBlobTarget(self): luigi.build([FinalTask()], local_scheduler=True, log_level='NOTSET') + + def test_AzureBlobTarget_download_when_reading(self): + task = MovieScriptTask() + task.run() + target = task.output() + # Test reading a target blob with a context manager and download_when_reading=True + target.download_when_reading = True + with target.open("r") as f: + assert "James Bond" in f.read() From a43404281d285689094ade63a441fa7539f1cf32 Mon Sep 17 00:00:00 2001 From: Vincent Barbaresi Date: Sat, 16 Jan 2021 18:16:02 +0100 Subject: [PATCH 2/2] use the new official Azurite image --- scripts/ci/install_start_azurite.sh | 5 +++-- scripts/ci/stop_azurite.sh | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/scripts/ci/install_start_azurite.sh b/scripts/ci/install_start_azurite.sh index 9fccbbaad5..9ff556ed34 100755 --- a/scripts/ci/install_start_azurite.sh +++ b/scripts/ci/install_start_azurite.sh @@ -1,5 +1,6 @@ #!/usr/bin/env bash -docker pull arafato/azurite +AZURITE_IMAGE=mcr.microsoft.com/azure-storage/azurite +docker pull $AZURITE_IMAGE mkdir -p blob_emulator $1/stop_azurite.sh -docker run -e executable=blob -d -t -p 10000:10000 -v blob_emulator:/opt/azurite/folder arafato/azurite \ No newline at end of file +docker run -e executable=blob -d -t -p 10000:10000 -v blob_emulator:/opt/azurite/folder $AZURITE_IMAGE diff --git a/scripts/ci/stop_azurite.sh b/scripts/ci/stop_azurite.sh index 834f5e7bd6..6fffe7d48d 100755 --- a/scripts/ci/stop_azurite.sh +++ b/scripts/ci/stop_azurite.sh @@ -1,2 +1,2 @@ #!/usr/bin/env bash -docker stop $(docker ps -q --filter ancestor=arafato/azurite) \ No newline at end of file +docker stop $(docker ps -q --filter ancestor=mcr.microsoft.com/azure-storage/azurite)