@@ -206,6 +206,13 @@ def _store_substitutions(self, name, url, span):
206
206
207
207
# Parse substitutions
208
208
if parser_type == "substitutions" :
209
+ if not any (row ):
210
+ self .logger .error (
211
+ "Something is wrong with the substitutions file; the row should have at least one non-empty value" ,
212
+ extra = {"row" : row },
213
+ )
214
+ continue
215
+
209
216
time = row [1 ][:- 1 ] if row [1 ] != "PU" else 0
210
217
subject = self ._normalize_other_names (row [5 ])
211
218
@@ -401,7 +408,7 @@ def _store_lunch_schedule(self, name, url, span):
401
408
402
409
# Daily lunch schedule format, used until October 2020
403
410
# Example: delitevKosila-0-15-okt2020-CET-objava.pdf
404
- if re .search (r"\/delitevKosila-0-[0-9]+-[a-z0-9]+-[A-Z]{3}-objava\.pdf$" , url ):
411
+ if re .search (r"\/delitevKosila-0-[0-9]+-[a-z0-9]+-[A-Z]{3}-(?i: objava) \.pdf$" , url ):
405
412
date = self ._get_daily_lunch_schedule_date (name , url )
406
413
self ._parse_daily_lunch_schedule (date , tables )
407
414
@@ -414,7 +421,7 @@ def _store_lunch_schedule(self, name, url, span):
414
421
# Daily lunch schedule format, used starting with March 2021
415
422
# Example: delitevKosila-mar9-2021-TOR-objava-PDF-0.pdf
416
423
# Example: delitevKosila-1sept2021-SRE-objava-PDF.pdf
417
- elif re .search (r"\/delitevKosila-[a-z0-9]+(?:-[0-9]+)?-[A-Z]{3}-objava.*\.pdf$" , url ):
424
+ elif re .search (r"\/delitevKosila-[a-z0-9]+(?:-[0-9]+)?-[A-Z]{3}-(?i: objava) .*\.pdf$" , url ):
418
425
date = self ._get_daily_lunch_schedule_date (name , url )
419
426
self ._parse_daily_lunch_schedule (date , tables )
420
427
@@ -534,23 +541,27 @@ def _parse_daily_lunch_schedule(self, date, tables):
534
541
continue
535
542
536
543
for index , row in enumerate (table ):
544
+ # Skip header
545
+ if row [0 ] and "ura" in row [0 ]:
546
+ continue
547
+
548
+ # Skip empty rows
549
+ if len (row ) != 5 or not row [0 ]:
550
+ continue
551
+
537
552
# Handle multiple times in the same cell
538
553
times = row [0 ].split ("\n " , 1 )
539
554
if len (times ) == 2 :
540
555
row [0 ] = times [0 ]
541
556
table [index + 1 ][0 ] = times [1 ]
542
557
558
+ # Handle incorrectly connected cells
559
+ if row [1 ] is None and len (row [0 ].split (" " , 1 )) == 2 :
560
+ row [0 ], row [1 ] = row [0 ].split (" " , 1 )
561
+
543
562
# Handle different time formats
544
563
row [0 ] = row [0 ].strip ().replace ("." , ":" )
545
564
546
- # Skip header
547
- if row [0 ] and "ura" in row [0 ]:
548
- continue
549
-
550
- # Skip empty rows
551
- if len (row ) != 5 or not row [0 ]:
552
- continue
553
-
554
565
is_time_valid = row [0 ] and row [0 ].strip () != "do"
555
566
time = datetime .datetime .strptime (row [0 ], "%H:%M" ).time () if is_time_valid else last_hour
556
567
last_hour = time
0 commit comments