@@ -145,10 +145,11 @@ def process_link(img_downloader, anchor_tag, url_match, config=PluginConfig()):
145145 if matching_filepaths : # => a thumbnail has already been generated
146146 fs_thumb_filepath = matching_filepaths [0 ]
147147 else :
148- LOGGER .info ("Thumbnail does not exist => downloading image from %s" , anchor_tag ['href' ])
148+ LOGGER .info ("Thumbnail does not exist for %s => downloading image from %s" , thumb_filename , anchor_tag ['href' ])
149149 tmp_thumb_filepath = img_downloader (url_match , config )
150150 if not tmp_thumb_filepath : # => means the downloader failed to retrieve the image in a "supported" case
151- with open (config .fs_thumbs_dir (thumb_filename + '.none' ), 'w' , encoding = 'utf8' ):
151+ hostname = urlparse (anchor_tag ['href' ]).netloc
152+ with open (config .fs_thumbs_dir (f'{ thumb_filename } .{ hostname } .none' ), 'w' , encoding = 'utf8' ):
152153 pass
153154 return
154155 img_ext = os .path .splitext (tmp_thumb_filepath )[1 ]
@@ -158,7 +159,7 @@ def process_link(img_downloader, anchor_tag, url_match, config=PluginConfig()):
158159 os .rename (tmp_thumb_filepath , fs_thumb_filepath )
159160 # Under Windows, I have sometime seen a bit of delay for this operation to be performed,
160161 # which could trigger a FileNotFoundError on the line below, when calling getsize()
161- if not os .path .getsize (fs_thumb_filepath ): # .none file, meaning no thumbnail could be donwloaded
162+ if not os .path .getsize (fs_thumb_filepath ): # .none file, meaning no thumbnail could be downloaded
162163 return
163164 rel_thumb_filepath = fs_thumb_filepath .replace (config .output_path + '/' , '' ) if config .output_path else fs_thumb_filepath
164165 # Editing HTML on-the-fly to insert an <img> after the <a>:
@@ -331,8 +332,12 @@ def http_get(url, config=PluginConfig()):
331332 if response .status_code != 200 and config .silent_http_errors :
332333 LOGGER .error ('%s HTTP error when fetching %s' , response .status_code , url )
333334 return None
335+ if response .status_code != 200 :
336+ LOGGER .debug (response .text )
334337 if response .status_code != 200 and b'captcha' in response .content :
335338 LOGGER .warning ('CAPTCHA is likely to be required by page %s' , url )
339+ if response .status_code != 200 and b'CloudFront' in response .content :
340+ LOGGER .warning ('CloudFront is blocking request %s' , url )
336341 response .raise_for_status ()
337342 return response
338343
@@ -353,7 +358,7 @@ def register():
353358
354359if __name__ == '__main__' :
355360 html_filepath = sys .argv [1 ]
356- logging .basicConfig (format = "%(asctime)s %( name)s [%(levelname)s] %(message)s" ,
361+ logging .basicConfig (format = "%(asctime)s [%(levelname)s] %( name)s (pid:%(process)s) %(message)s" ,
357362 datefmt = "%H:%M:%S" , level = logging .DEBUG )
358363 config = PluginConfig (dict (
359364 selector = 'article ul ul, h2:nth-of-type(3) + ul, h2:nth-of-type(4) + ul' ,
0 commit comments