Skip to content

Commit 08af6fc

Browse files
authored
Merge pull request #6 from filips123/fix-lunch-schedule-parsing
Add support for new lunch schedule format
2 parents 6cd00ed + 9661fc2 commit 08af6fc

File tree

4 files changed

+109
-30
lines changed

4 files changed

+109
-30
lines changed

API/gimvicurnik/errors/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from .base import GimVicUrnikError
22
from .config import ConfigError, ConfigParseError, ConfigReadError, ConfigValidationError
3-
from .eclassroom import ClassroomApiError, InvalidRecordError, InvalidTokenError
3+
from .eclassroom import ClassroomApiError, ClassroomError, InvalidRecordError, InvalidTokenError, LunchScheduleError
44
from .menu import MenuApiError, MenuDateError
55
from .timetable import TimetableApiError

API/gimvicurnik/errors/eclassroom.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,11 @@
11
from .base import GimVicUrnikError
22

33

4-
class ClassroomApiError(GimVicUrnikError):
4+
class ClassroomError(GimVicUrnikError):
5+
pass
6+
7+
8+
class ClassroomApiError(ClassroomError):
59
pass
610

711

@@ -11,3 +15,7 @@ class InvalidTokenError(ClassroomApiError):
1115

1216
class InvalidRecordError(ClassroomApiError):
1317
pass
18+
19+
20+
class LunchScheduleError(ClassroomError):
21+
pass

API/gimvicurnik/updaters/eclassroom.py

+97-26
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from pdf2docx import extract_tables
1010

1111
from ..database import Class, Classroom, Document, LunchSchedule, Substitution, Teacher
12-
from ..errors import ClassroomApiError, InvalidRecordError, InvalidTokenError
12+
from ..errors import ClassroomApiError, InvalidRecordError, InvalidTokenError, LunchScheduleError
1313
from ..utils.database import get_or_create
1414
from ..utils.sentry import with_span
1515
from ..utils.url import normalize_url, tokenize_url
@@ -126,8 +126,8 @@ def _store_substitutions(self, name, url, span):
126126
).date()
127127
day = date.isoweekday()
128128

129+
# Save content to temporary file
129130
filename = os.path.join(tempfile.gettempdir(), os.urandom(24).hex() + ".pdf")
130-
131131
file = open(filename, mode="w+b")
132132
file.write(content)
133133
file.close()
@@ -386,12 +386,8 @@ def _store_lunch_schedule(self, name, url, span):
386386

387387
return
388388

389-
date = datetime.datetime.strptime(
390-
re.search(r"Razpored delitve kosila, (.+)", name, re.IGNORECASE).group(1), "%d. %m. %Y"
391-
).date()
392-
389+
# Save content to temporary file
393390
filename = os.path.join(tempfile.gettempdir(), os.urandom(24).hex() + ".pdf")
394-
395391
file = open(filename, mode="w+b")
396392
file.write(content)
397393
file.close()
@@ -400,6 +396,73 @@ def _store_lunch_schedule(self, name, url, span):
400396
tables = with_span(op="extract")(extract_tables)(filename)
401397
os.remove(filename)
402398

399+
# Daily lunch schedule format, used until October 2020
400+
# Example: delitevKosila-0-15-okt2020-CET-objava.pdf
401+
if re.search(r"\/delitevKosila-0-[0-9]+-[a-z0-9]+-[A-Z]{3}-objava.pdf$", url):
402+
date = self._get_daily_lunch_schedule_date(name, url)
403+
self._parse_daily_lunch_schedule(date, tables)
404+
405+
# Weekly lunch schedule format, used starting with February 2021
406+
# Example: delitevKosila-15-19-feb2021.pdf
407+
elif re.search(r"\/delitevKosila-[1-9][0-9]+-[1-9][0-9]+-[a-z0-9]+.pdf$", url):
408+
date = self._get_weekly_lunch_schedule_date(name, url)
409+
self._parse_weekly_lunch_schedule(date, tables)
410+
411+
# Unknown lunch schedule format
412+
else:
413+
raise LunchScheduleError("Unknown lunch schedule format: " + url.rsplit("/", 1)[-1])
414+
415+
# Update or create a document
416+
if not document:
417+
document = Document()
418+
created = True
419+
else:
420+
created = False
421+
422+
document.date = date
423+
document.type = "lunch-schedule"
424+
document.url = url
425+
document.description = name.split(",")[0].capitalize()
426+
document.hash = hash
427+
428+
self.session.add(document)
429+
430+
span.set_tag("document.date", document.date)
431+
span.set_tag("document.hash", document.hash)
432+
span.set_tag("document.action", "created" if created else "updated")
433+
434+
if created:
435+
self.logger.info("Created a new lunch schedule document for %s", document.date)
436+
else:
437+
self.logger.info("Updated the lunch schedule document for %s", document.date)
438+
439+
@staticmethod
440+
def _get_daily_lunch_schedule_date(name, url):
441+
return datetime.datetime.strptime(
442+
re.search(r"Razpored delitve kosila, (.+)", name, re.IGNORECASE).group(1), "%d. %m. %Y"
443+
).date()
444+
445+
@staticmethod
446+
def _get_weekly_lunch_schedule_date(name, url):
447+
month_to_number = {
448+
"jan": 1,
449+
"feb": 2,
450+
"mar": 3,
451+
"apr": 4,
452+
"maj": 5,
453+
"jun": 6,
454+
"jul": 7,
455+
"avg": 8,
456+
"sep": 9,
457+
"okt": 10,
458+
"nov": 11,
459+
"dec": 12,
460+
}
461+
462+
date = re.search(r"\/delitevKosila-([1-9][0-9]+)-[0-9][1-9]+-([a-z]+)([1-9][0-9]+).pdf$", url)
463+
return datetime.date(year=int(date.group(3)), month=month_to_number[date.group(2)], day=int(date.group(1)))
464+
465+
def _parse_daily_lunch_schedule(self, date, tables):
403466
schedule = []
404467

405468
last_hour = None
@@ -438,26 +501,34 @@ def _store_lunch_schedule(self, name, url, span):
438501
self.session.query(LunchSchedule).filter(LunchSchedule.date == date).delete()
439502
self.session.bulk_insert_mappings(LunchSchedule, schedule)
440503

441-
# Update or create a document
442-
if not document:
443-
document = Document()
444-
created = True
445-
else:
446-
created = False
504+
def _parse_weekly_lunch_schedule(self, date, tables):
505+
schedule = []
447506

448-
document.date = date
449-
document.type = "lunch-schedule"
450-
document.url = url
451-
document.description = name.split(",")[0]
452-
document.hash = hash
507+
for table in tables:
508+
# Skip instructions
509+
if "V jedilnico prihajate z maskami ob uri" in table[0][0]:
510+
continue
453511

454-
self.session.add(document)
512+
for row in table:
513+
# Skip header
514+
if row[0] and "ura" in row[0]:
515+
continue
455516

456-
span.set_tag("document.date", document.date)
457-
span.set_tag("document.hash", document.hash)
458-
span.set_tag("document.action", "created" if created else "updated")
517+
time = datetime.datetime.strptime(row[0].strip(), "%H:%M").time()
518+
class_ = row[1].strip()
519+
location = row[2].strip()
459520

460-
if created:
461-
self.logger.info("Created a new lunch schedule document for %s", document.date)
462-
else:
463-
self.logger.info("Updated the lunch schedule document for %s", document.date)
521+
schedule.append(
522+
{
523+
"class_id": get_or_create(self.session, model=Class, name=class_)[0].id,
524+
"date": date,
525+
"time": time,
526+
"location": location,
527+
}
528+
)
529+
530+
date += datetime.timedelta(days=1)
531+
532+
# Store schedule in database
533+
self.session.query(LunchSchedule).filter(LunchSchedule.date == date).delete()
534+
self.session.bulk_insert_mappings(LunchSchedule, schedule)

API/gimvicurnik/updaters/menu.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -154,8 +154,8 @@ def _store_snack_menu(self, url, date, span):
154154

155155
return
156156

157+
# Save content to temporary file
157158
filename = os.path.join(tempfile.gettempdir(), os.urandom(24).hex() + ".pdf")
158-
159159
file = open(filename, mode="w+b")
160160
file.write(content)
161161
file.close()
@@ -238,8 +238,8 @@ def _store_lunch_menu(self, url, date, span):
238238

239239
return
240240

241+
# Save content to temporary file
241242
filename = os.path.join(tempfile.gettempdir(), os.urandom(24).hex() + ".pdf")
242-
243243
file = open(filename, mode="w+b")
244244
file.write(content)
245245
file.close()

0 commit comments

Comments
 (0)