26
26
from urllib .parse import urlsplit
27
27
from io import BytesIO
28
28
29
+ from tenacity import retry
30
+ from tenacity import retry_if_exception
31
+ from tenacity import retry_if_exception_type
32
+ from tenacity import wait_exponential
33
+ from tenacity import stop_after_attempt
34
+ from tenacity import after_log
29
35
from luigi .contrib import gcp
30
36
import luigi .target
31
37
from luigi .format import FileWrapper
32
38
33
39
logger = logging .getLogger ('luigi-interface' )
34
40
41
+ # Retry when following errors happened
42
+ RETRYABLE_ERRORS = None
43
+
35
44
try :
36
45
import httplib2
37
46
42
51
logger .warning ("Loading GCS module without the python packages googleapiclient & google-auth. \
43
52
This will crash at runtime if GCS functionality is used." )
44
53
else :
45
- # Retry transport and file IO errors.
46
54
RETRYABLE_ERRORS = (httplib2 .HttpLib2Error , IOError )
47
55
48
- # Number of times to retry failed downloads.
49
- NUM_RETRIES = 5
50
-
51
56
# Number of bytes to send/receive in each request.
52
57
CHUNKSIZE = 10 * 1024 * 1024
53
58
64
69
GCS_BATCH_URI = 'https://storage.googleapis.com/batch/storage/v1'
65
70
66
71
72
+ # Retry configurations. For more details, see https://tenacity.readthedocs.io/en/latest/
73
+ def is_error_5xx (err ):
74
+ return isinstance (err , errors .HttpError ) and err .resp .status >= 500
75
+
76
+
77
+ gcs_retry = retry (retry = (retry_if_exception (is_error_5xx ) | retry_if_exception_type (RETRYABLE_ERRORS )),
78
+ wait = wait_exponential (multiplier = 1 , min = 1 , max = 10 ),
79
+ stop = stop_after_attempt (5 ),
80
+ reraise = True ,
81
+ after = after_log (logger , logging .WARNING ))
82
+
83
+
67
84
def _wait_for_consistency (checker ):
68
85
"""Eventual consistency: wait until GCS reports something is true.
69
86
@@ -133,6 +150,7 @@ def _is_root(self, key):
133
150
def _add_path_delimiter (self , key ):
134
151
return key if key [- 1 :] == '/' else key + '/'
135
152
153
+ @gcs_retry
136
154
def _obj_exists (self , bucket , obj ):
137
155
try :
138
156
self .client .objects ().get (bucket = bucket , object = obj ).execute ()
@@ -157,6 +175,7 @@ def _list_iter(self, bucket, prefix):
157
175
158
176
response = request .execute ()
159
177
178
+ @gcs_retry
160
179
def _do_put (self , media , dest_path ):
161
180
bucket , obj = self ._path_to_bucket_and_key (dest_path )
162
181
@@ -165,28 +184,10 @@ def _do_put(self, media, dest_path):
165
184
return request .execute ()
166
185
167
186
response = None
168
- attempts = 0
169
187
while response is None :
170
- error = None
171
- try :
172
- status , response = request .next_chunk ()
173
- if status :
174
- logger .debug ('Upload progress: %.2f%%' , 100 * status .progress ())
175
- except errors .HttpError as err :
176
- error = err
177
- if err .resp .status < 500 :
178
- raise
179
- logger .warning ('Caught error while uploading' , exc_info = True )
180
- except RETRYABLE_ERRORS as err :
181
- logger .warning ('Caught error while uploading' , exc_info = True )
182
- error = err
183
-
184
- if error :
185
- attempts += 1
186
- if attempts >= NUM_RETRIES :
187
- raise error
188
- else :
189
- attempts = 0
188
+ status , response = request .next_chunk ()
189
+ if status :
190
+ logger .debug ('Upload progress: %.2f%%' , 100 * status .progress ())
190
191
191
192
_wait_for_consistency (lambda : self ._obj_exists (bucket , obj ))
192
193
return response
@@ -380,6 +381,7 @@ def list_wildcard(self, wildcard_path):
380
381
len (it ) >= len (path + '/' + wildcard_parts [0 ]) + len (wildcard_parts [1 ]):
381
382
yield it
382
383
384
+ @gcs_retry
383
385
def download (self , path , chunksize = None , chunk_callback = lambda _ : False ):
384
386
"""Downloads the object contents to local file system.
385
387
@@ -400,29 +402,11 @@ def download(self, path, chunksize=None, chunk_callback=lambda _: False):
400
402
request = self .client .objects ().get_media (bucket = bucket , object = obj )
401
403
downloader = http .MediaIoBaseDownload (fp , request , chunksize = chunksize )
402
404
403
- attempts = 0
404
405
done = False
405
406
while not done :
406
- error = None
407
- try :
408
- _ , done = downloader .next_chunk ()
409
- if chunk_callback (fp ):
410
- done = True
411
- except errors .HttpError as err :
412
- error = err
413
- if err .resp .status < 500 :
414
- raise
415
- logger .warning ('Error downloading file, retrying' , exc_info = True )
416
- except RETRYABLE_ERRORS as err :
417
- logger .warning ('Error downloading file, retrying' , exc_info = True )
418
- error = err
419
-
420
- if error :
421
- attempts += 1
422
- if attempts >= NUM_RETRIES :
423
- raise error
424
- else :
425
- attempts = 0
407
+ _ , done = downloader .next_chunk ()
408
+ if chunk_callback (fp ):
409
+ done = True
426
410
427
411
return return_fp
428
412
0 commit comments