@@ -22,11 +22,30 @@ def main():
2222 This is done in the github CI before the website is regenerated, after every modification on the main branch.
2323 """
2424
25- latest_file , previous_file = get_latest_two_csv_files (folder )
25+ try :
26+ latest_file , previous_file = get_latest_two_csv_files (folder )
27+ except FileNotFoundError as e :
28+ print (f"No valid CSV files found: { e } " )
29+ return
30+
31+ # Load the CSV data with defensive handling
32+ try :
33+ current_week_data = pd .read_csv (os .path .join (folder , latest_file ))
34+ except pd .errors .EmptyDataError :
35+ print (f"Latest CSV '{ latest_file } ' is empty or has no columns. Aborting." )
36+ return
37+ except Exception as e :
38+ print (f"Failed to read latest CSV '{ latest_file } ': { e } " )
39+ return
2640
27- # Load the CSV data
28- current_week_data = pd .read_csv (os .path .join (folder , latest_file ))
29- previous_week_data = pd .read_csv (os .path .join (folder , previous_file ))
41+ try :
42+ previous_week_data = pd .read_csv (os .path .join (folder , previous_file ))
43+ except pd .errors .EmptyDataError :
44+ print (f"Previous CSV '{ previous_file } ' is empty or has no columns. Aborting." )
45+ return
46+ except Exception as e :
47+ print (f"Failed to read previous CSV '{ previous_file } ': { e } " )
48+ return
3049
3150 # Merge data on the 'url' column to compare downloads
3251 merged_data = pd .merge (current_week_data , previous_week_data , on = 'url' , suffixes = ('_current' , '_previous' ))
@@ -58,13 +77,44 @@ def get_latest_two_csv_files(folder):
5877 Get the two most recent CSV files in the specified folder.
5978 """
6079 # Get all CSV files in the folder
61- csv_files = [f for f in os .listdir (folder ) if f .endswith ('.csv' )]
80+ csv_candidates = [f for f in os .listdir (folder ) if f .endswith ('.csv' )]
81+
82+ # Parse dates from filenames and ignore files that don't match the expected format
83+ dated_files = []
84+ for f in csv_candidates :
85+ try :
86+ dt = extract_date_from_filename (f )
87+ dated_files .append ((f , dt ))
88+ except Exception :
89+ continue
6290
6391 # Sort files by date (newest first)
64- csv_files = sorted (csv_files , key = lambda f : extract_date_from_filename (f ), reverse = True )
92+ dated_files .sort (key = lambda t : t [1 ], reverse = True )
93+
94+ # Collect two valid CSV files (non-empty and parseable)
95+ valid_files = []
96+ for fname , _ in dated_files :
97+ fullpath = os .path .join (folder , fname )
98+ try :
99+ if os .path .getsize (fullpath ) == 0 :
100+ continue
101+ except OSError :
102+ continue
103+
104+ # Quick parse test: try to read one row to ensure there are columns
105+ try :
106+ pd .read_csv (fullpath , nrows = 1 )
107+ except Exception :
108+ continue
109+
110+ valid_files .append (fname )
111+ if len (valid_files ) >= 2 :
112+ break
113+
114+ if len (valid_files ) < 2 :
115+ raise FileNotFoundError (f"Could not find two valid CSV files in '{ folder } '. Found: { valid_files } " )
65116
66- # Return the two most recent files
67- return csv_files [0 ], csv_files [1 ]
117+ return valid_files [0 ], valid_files [1 ]
68118
69119def download_first_pdf_file_from_zenodo (folder , record_id ):
70120 """
@@ -100,7 +150,7 @@ def download_first_pdf_file_from_zenodo(folder, record_id):
100150 pdf = pypdfium2 .PdfDocument (file_content )
101151 page = pdf [0 ]
102152 pil_image = page .render (
103- scale = 2.0 ,
153+ scale = 2 ,
104154 rotation = 0
105155 ).to_pil ()
106156
@@ -132,7 +182,16 @@ def resize_image(image, height):
132182 """
133183 aspect_ratio = image .width / image .height
134184 new_width = int (aspect_ratio * height )
135- return image .resize ((new_width , height ), Image .LANCZOS )
185+ # Determine resampling method safely to avoid static type-checker attribute errors
186+ resample_method = None
187+ # Prefer the Resampling enum when available (Pillow >= 9.1)
188+ resampling_enum = getattr (Image , "Resampling" , None )
189+ if resampling_enum is not None :
190+ resample_method = getattr (resampling_enum , "LANCZOS" , None )
191+ # Fallback to legacy attribute via getattr to avoid Pylance attribute warnings
192+ if resample_method is None :
193+ resample_method = getattr (Image , "LANCZOS" , getattr (Image , "BICUBIC" , None ))
194+ return image .resize ((new_width , height ), resample_method )
136195
137196# Define the format of your PNG file
138197def get_latest_png_filename (id ):
0 commit comments