Skip to content

Commit 2650164

Browse files
authored
Merge pull request #38 from filips123/fix-regex-and-updaters
2 parents 0b97b83 + 0f893cc commit 2650164

File tree

2 files changed

+22
-11
lines changed

2 files changed

+22
-11
lines changed

API/gimvicurnik/updaters/eclassroom.py

+21-10
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,13 @@ def _store_substitutions(self, name, url, span):
206206

207207
# Parse substitutions
208208
if parser_type == "substitutions":
209+
if not any(row):
210+
self.logger.error(
211+
"Something is wrong with the substitutions file; the row should have at least one non-empty value",
212+
extra={"row": row},
213+
)
214+
continue
215+
209216
time = row[1][:-1] if row[1] != "PU" else 0
210217
subject = self._normalize_other_names(row[5])
211218

@@ -401,7 +408,7 @@ def _store_lunch_schedule(self, name, url, span):
401408

402409
# Daily lunch schedule format, used until October 2020
403410
# Example: delitevKosila-0-15-okt2020-CET-objava.pdf
404-
if re.search(r"\/delitevKosila-0-[0-9]+-[a-z0-9]+-[A-Z]{3}-objava\.pdf$", url):
411+
if re.search(r"\/delitevKosila-0-[0-9]+-[a-z0-9]+-[A-Z]{3}-(?i:objava)\.pdf$", url):
405412
date = self._get_daily_lunch_schedule_date(name, url)
406413
self._parse_daily_lunch_schedule(date, tables)
407414

@@ -414,7 +421,7 @@ def _store_lunch_schedule(self, name, url, span):
414421
# Daily lunch schedule format, used starting with March 2021
415422
# Example: delitevKosila-mar9-2021-TOR-objava-PDF-0.pdf
416423
# Example: delitevKosila-1sept2021-SRE-objava-PDF.pdf
417-
elif re.search(r"\/delitevKosila-[a-z0-9]+(?:-[0-9]+)?-[A-Z]{3}-objava.*\.pdf$", url):
424+
elif re.search(r"\/delitevKosila-[a-z0-9]+(?:-[0-9]+)?-[A-Z]{3}-(?i:objava).*\.pdf$", url):
418425
date = self._get_daily_lunch_schedule_date(name, url)
419426
self._parse_daily_lunch_schedule(date, tables)
420427

@@ -534,23 +541,27 @@ def _parse_daily_lunch_schedule(self, date, tables):
534541
continue
535542

536543
for index, row in enumerate(table):
544+
# Skip header
545+
if row[0] and "ura" in row[0]:
546+
continue
547+
548+
# Skip empty rows
549+
if len(row) != 5 or not row[0]:
550+
continue
551+
537552
# Handle multiple times in the same cell
538553
times = row[0].split("\n", 1)
539554
if len(times) == 2:
540555
row[0] = times[0]
541556
table[index + 1][0] = times[1]
542557

558+
# Handle incorrectly connected cells
559+
if row[1] is None and len(row[0].split(" ", 1)) == 2:
560+
row[0], row[1] = row[0].split(" ", 1)
561+
543562
# Handle different time formats
544563
row[0] = row[0].strip().replace(".", ":")
545564

546-
# Skip header
547-
if row[0] and "ura" in row[0]:
548-
continue
549-
550-
# Skip empty rows
551-
if len(row) != 5 or not row[0]:
552-
continue
553-
554565
is_time_valid = row[0] and row[0].strip() != "do"
555566
time = datetime.datetime.strptime(row[0], "%H:%M").time() if is_time_valid else last_hour
556567
last_hour = time

API/gimvicurnik/updaters/menu.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ def _get_date(url):
104104
# Another example: 05-splet-februar-3-teden-M-PDF.pdf
105105
# Another example: 04-splet-marec-2-teden-04-M-PDF-0.pdf
106106
# Another example: 01-splet-september-4-teden-02-M-popravek.pdf
107-
date = re.search(r"\d+-splet-([a-z]+)-(\d)-teden-?\d*-[MK]-?\d?(?i:-PDF)?(?:-popravek)?(?:-\d)?\.[a-z]+", url)
107+
date = re.search(r"\d+-splet-([a-z]+)-(\d)-teden-?\d*-[MK]-?\d?(?i:-PDF)?(?:-[a-z]+)?(?:-\d)?\.[a-z]+", url)
108108

109109
if date:
110110
year = datetime.datetime.now().year

0 commit comments

Comments
 (0)