Skip to content

Commit 80effb5

Browse files
committed
Merge remote-tracking branch 'origin/master' into HEAD
2 parents a434042 + 7b7ff90 commit 80effb5

File tree

7 files changed

+81
-70
lines changed

7 files changed

+81
-70
lines changed

README.rst

+2
Original file line numberDiff line numberDiff line change
@@ -182,12 +182,14 @@ Some more companies are using Luigi but haven't had a chance yet to write about
182182
* `Hopper <https://www.hopper.com/>`_
183183
* `VOYAGE GROUP/Zucks <https://zucks.co.jp/en/>`_
184184
* `Textpert <https://www.textpert.ai/>`_
185+
* `Tracktics <https://www.tracktics.com/>`_
185186
* `Whizar <https://www.whizar.com/>`_
186187
* `xtream <https://www.xtreamers.io/>`__
187188
* `Skyscanner <https://www.skyscanner.net/>`_
188189
* `Jodel <https://www.jodel.com/>`_
189190
* `Mekar <https://mekar.id/en/>`_
190191
* `M3 <https://corporate.m3.com/en/>`_
192+
* `Assist Digital <https://www.assistdigital.com/>`_
191193

192194
We're more than happy to have your company added here. Just send a PR on GitHub.
193195

doc/central_scheduler.rst

+7-7
Original file line numberDiff line numberDiff line change
@@ -78,20 +78,20 @@ The task history has the following pages:
7878

7979
.. figure:: history.png
8080
:alt: Recent history screenshot
81-
* ``/history/by_id/:id``
81+
* ``/history/by_id/{id}``
8282
detailed information about a run, including:
8383
parameter values, the host on which it ran, and timing information.
8484
Example screenshot:
8585

8686
.. figure:: history_by_id.png
8787
:alt: By id screenshot
88-
* ``/history/by_name/:name``
89-
a listing of all runs of a task with the given task name.
88+
* ``/history/by_name/{name}``
89+
a listing of all runs of a task with the given task ``{name}``.
9090
Example screenshot:
9191

9292
.. figure:: history_by_name.png
9393
:alt: By name screenshot
94-
* ``/history/by_params/:name?data=params``
95-
a listing of all runs of a given task restricted to runs with param values matching the given data.
96-
The data is a json blob describing the parameters,
97-
e.g. ``{"foo": "bar"}`` looks for a task with ``foo=bar``.
94+
* ``/history/by_params/{name}?data=params``
95+
a listing of all runs of the task ``{name}`` restricted to runs with ``params`` matching the given history.
96+
The ``params`` is a json blob describing the parameters,
97+
e.g. ``data={"foo": "bar"}`` looks for a task with ``foo=bar``.

luigi/contrib/external_program.py

+1
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,7 @@ def _track_url_by_pattern():
192192
self.build_tracking_url(match.group(1))
193193
)
194194
else:
195+
file_to_write.flush()
195196
sleep(time_to_sleep)
196197

197198
track_proc = Process(target=_track_url_by_pattern)

luigi/contrib/gcs.py

+30-46
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,21 @@
2626
from urllib.parse import urlsplit
2727
from io import BytesIO
2828

29+
from tenacity import retry
30+
from tenacity import retry_if_exception
31+
from tenacity import retry_if_exception_type
32+
from tenacity import wait_exponential
33+
from tenacity import stop_after_attempt
34+
from tenacity import after_log
2935
from luigi.contrib import gcp
3036
import luigi.target
3137
from luigi.format import FileWrapper
3238

3339
logger = logging.getLogger('luigi-interface')
3440

41+
# Retry when following errors happened
42+
RETRYABLE_ERRORS = None
43+
3544
try:
3645
import httplib2
3746

@@ -42,12 +51,8 @@
4251
logger.warning("Loading GCS module without the python packages googleapiclient & google-auth. \
4352
This will crash at runtime if GCS functionality is used.")
4453
else:
45-
# Retry transport and file IO errors.
4654
RETRYABLE_ERRORS = (httplib2.HttpLib2Error, IOError)
4755

48-
# Number of times to retry failed downloads.
49-
NUM_RETRIES = 5
50-
5156
# Number of bytes to send/receive in each request.
5257
CHUNKSIZE = 10 * 1024 * 1024
5358

@@ -64,6 +69,18 @@
6469
GCS_BATCH_URI = 'https://storage.googleapis.com/batch/storage/v1'
6570

6671

72+
# Retry configurations. For more details, see https://tenacity.readthedocs.io/en/latest/
73+
def is_error_5xx(err):
74+
return isinstance(err, errors.HttpError) and err.resp.status >= 500
75+
76+
77+
gcs_retry = retry(retry=(retry_if_exception(is_error_5xx) | retry_if_exception_type(RETRYABLE_ERRORS)),
78+
wait=wait_exponential(multiplier=1, min=1, max=10),
79+
stop=stop_after_attempt(5),
80+
reraise=True,
81+
after=after_log(logger, logging.WARNING))
82+
83+
6784
def _wait_for_consistency(checker):
6885
"""Eventual consistency: wait until GCS reports something is true.
6986
@@ -133,6 +150,7 @@ def _is_root(self, key):
133150
def _add_path_delimiter(self, key):
134151
return key if key[-1:] == '/' else key + '/'
135152

153+
@gcs_retry
136154
def _obj_exists(self, bucket, obj):
137155
try:
138156
self.client.objects().get(bucket=bucket, object=obj).execute()
@@ -157,6 +175,7 @@ def _list_iter(self, bucket, prefix):
157175

158176
response = request.execute()
159177

178+
@gcs_retry
160179
def _do_put(self, media, dest_path):
161180
bucket, obj = self._path_to_bucket_and_key(dest_path)
162181

@@ -165,28 +184,10 @@ def _do_put(self, media, dest_path):
165184
return request.execute()
166185

167186
response = None
168-
attempts = 0
169187
while response is None:
170-
error = None
171-
try:
172-
status, response = request.next_chunk()
173-
if status:
174-
logger.debug('Upload progress: %.2f%%', 100 * status.progress())
175-
except errors.HttpError as err:
176-
error = err
177-
if err.resp.status < 500:
178-
raise
179-
logger.warning('Caught error while uploading', exc_info=True)
180-
except RETRYABLE_ERRORS as err:
181-
logger.warning('Caught error while uploading', exc_info=True)
182-
error = err
183-
184-
if error:
185-
attempts += 1
186-
if attempts >= NUM_RETRIES:
187-
raise error
188-
else:
189-
attempts = 0
188+
status, response = request.next_chunk()
189+
if status:
190+
logger.debug('Upload progress: %.2f%%', 100 * status.progress())
190191

191192
_wait_for_consistency(lambda: self._obj_exists(bucket, obj))
192193
return response
@@ -380,6 +381,7 @@ def list_wildcard(self, wildcard_path):
380381
len(it) >= len(path + '/' + wildcard_parts[0]) + len(wildcard_parts[1]):
381382
yield it
382383

384+
@gcs_retry
383385
def download(self, path, chunksize=None, chunk_callback=lambda _: False):
384386
"""Downloads the object contents to local file system.
385387
@@ -400,29 +402,11 @@ def download(self, path, chunksize=None, chunk_callback=lambda _: False):
400402
request = self.client.objects().get_media(bucket=bucket, object=obj)
401403
downloader = http.MediaIoBaseDownload(fp, request, chunksize=chunksize)
402404

403-
attempts = 0
404405
done = False
405406
while not done:
406-
error = None
407-
try:
408-
_, done = downloader.next_chunk()
409-
if chunk_callback(fp):
410-
done = True
411-
except errors.HttpError as err:
412-
error = err
413-
if err.resp.status < 500:
414-
raise
415-
logger.warning('Error downloading file, retrying', exc_info=True)
416-
except RETRYABLE_ERRORS as err:
417-
logger.warning('Error downloading file, retrying', exc_info=True)
418-
error = err
419-
420-
if error:
421-
attempts += 1
422-
if attempts >= NUM_RETRIES:
423-
raise error
424-
else:
425-
attempts = 0
407+
_, done = downloader.next_chunk()
408+
if chunk_callback(fp):
409+
done = True
426410

427411
return return_fp
428412

luigi/tools/deps_tree.py

+15-15
Original file line numberDiff line numberDiff line change
@@ -9,18 +9,18 @@
99
$ luigi-deps-tree --module foo_complex examples.Foo
1010
...
1111
└─--[Foo-{} (PENDING)]
12-
|--[Bar-{'num': '0'} (PENDING)]
13-
| |--[Bar-{'num': '4'} (PENDING)]
14-
| └─--[Bar-{'num': '5'} (PENDING)]
15-
|--[Bar-{'num': '1'} (PENDING)]
16-
└─--[Bar-{'num': '2'} (PENDING)]
17-
└─--[Bar-{'num': '6'} (PENDING)]
18-
|--[Bar-{'num': '7'} (PENDING)]
19-
| |--[Bar-{'num': '9'} (PENDING)]
20-
| └─--[Bar-{'num': '10'} (PENDING)]
21-
| └─--[Bar-{'num': '11'} (PENDING)]
22-
└─--[Bar-{'num': '8'} (PENDING)]
23-
└─--[Bar-{'num': '12'} (PENDING)]
12+
|---[Bar-{'num': '0'} (PENDING)]
13+
| |---[Bar-{'num': '4'} (PENDING)]
14+
| └─--[Bar-{'num': '5'} (PENDING)]
15+
|---[Bar-{'num': '1'} (PENDING)]
16+
└─--[Bar-{'num': '2'} (PENDING)]
17+
└─--[Bar-{'num': '6'} (PENDING)]
18+
|---[Bar-{'num': '7'} (PENDING)]
19+
| |---[Bar-{'num': '9'} (PENDING)]
20+
| └─--[Bar-{'num': '10'} (PENDING)]
21+
| └─--[Bar-{'num': '11'} (PENDING)]
22+
└─--[Bar-{'num': '8'} (PENDING)]
23+
└─--[Bar-{'num': '12'} (PENDING)]
2424
"""
2525

2626
from luigi.task import flatten
@@ -52,10 +52,10 @@ def print_tree(task, indent='', last=True):
5252
result = '\n' + indent
5353
if(last):
5454
result += '└─--'
55-
indent += ' '
55+
indent += ' '
5656
else:
57-
result += '|--'
58-
indent += '| '
57+
result += '|---'
58+
indent += '| '
5959
result += '[{0}-{1} ({2})]'.format(name, params, is_complete)
6060
children = flatten(task.requires())
6161
for index, child in enumerate(children):

setup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ def get_static_files(path):
3737
with open('README.rst') as fobj:
3838
long_description = "\n\n" + readme_note + "\n\n" + fobj.read()
3939

40-
install_requires = ['python-dateutil>=2.7.5,<3']
40+
install_requires = ['python-dateutil>=2.7.5,<3', 'tenacity>=6.3.0,<7']
4141

4242
# Can't use python-daemon>=2.2.0 if on windows
4343
# See https://pagure.io/python-daemon/issue/18

test/contrib/gcs_test.py

+25-1
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
import os
3131
import tempfile
3232
import unittest
33+
from unittest import mock
3334

3435
from luigi.contrib import gcs
3536
from target_test import FileSystemTargetTestMixin
@@ -143,7 +144,7 @@ def test_listdir(self):
143144

144145
def test_put_file(self):
145146
with tempfile.NamedTemporaryFile() as fp:
146-
lorem = 'Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt\n'
147+
lorem = b'Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt\n'
147148
# Larger file than chunk size, fails with incorrect progress set up
148149
big = lorem * 41943
149150
fp.write(big)
@@ -196,3 +197,26 @@ def test_close_twice(self):
196197
assert src.closed
197198
src.close()
198199
assert src.closed
200+
201+
202+
class RetryTest(unittest.TestCase):
203+
def test_success_with_retryable_error(self):
204+
m = mock.MagicMock(side_effect=[IOError, IOError, 'test_func_output'])
205+
206+
@gcs.gcs_retry
207+
def mock_func():
208+
return m()
209+
210+
actual = mock_func()
211+
expected = 'test_func_output'
212+
self.assertEqual(expected, actual)
213+
214+
def test_fail_with_retry_limit_exceed(self):
215+
m = mock.MagicMock(side_effect=[IOError, IOError, IOError, IOError, IOError])
216+
217+
@gcs.gcs_retry
218+
def mock_func():
219+
return m()
220+
221+
with self.assertRaises(IOError):
222+
mock_func()

0 commit comments

Comments
 (0)