Skip to content

Commit ab436cd

Browse files
authored
Merge pull request #94 from filips123/xlsx-parsing
Reintroduce pdf menu parser
2 parents 5594438 + 586b862 commit ab436cd

File tree

2 files changed

+77
-2
lines changed

2 files changed

+77
-2
lines changed

API/gimvicurnik/updaters/eclassroom.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -656,7 +656,7 @@ def _parse_lunch_schedule_xlsx(self, stream: BytesIO, effective: date) -> None:
656656
"""
657657

658658
# Extract workbook from an XLSX stream
659-
wb = with_span(op="extract")(load_workbook)(stream, read_only=True, data_only=True)
659+
wb = with_span(op="extract")(load_workbook)(stream, data_only=True)
660660

661661
lunch_schedule = []
662662

@@ -665,6 +665,9 @@ def _parse_lunch_schedule_xlsx(self, stream: BytesIO, effective: date) -> None:
665665
if ws.title != "kosilo":
666666
continue
667667

668+
while not ws["A1"].value:
669+
ws.delete_cols(1)
670+
668671
for wr in ws.iter_rows(min_row=3, max_col=5):
669672
if not wr[3].value:
670673
break
@@ -674,7 +677,6 @@ def _parse_lunch_schedule_xlsx(self, stream: BytesIO, effective: date) -> None:
674677
assert isinstance(wr[0].value, datetime)
675678
assert isinstance(wr[1].value, str)
676679
assert isinstance(wr[2].value, str)
677-
assert isinstance(wr[3].value, int)
678680
assert isinstance(wr[4].value, str)
679681

680682
# Ignore rows that do not contain a class name

API/gimvicurnik/updaters/menu.py

+73
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from .base import BaseMultiUpdater, DocumentInfo
1515
from ..database import DocumentType, LunchMenu, SnackMenu
1616
from ..errors import MenuApiError, MenuDateError, MenuFormatError
17+
from ..utils.pdf import extract_tables
1718
from ..utils.sentry import with_span
1819

1920
if typing.TYPE_CHECKING:
@@ -120,6 +121,10 @@ def parse_document(self, document: DocumentInfo, stream: BytesIO, effective: dat
120121
span.set_tag("document.format", document.extension)
121122

122123
match (document.type, document.extension):
124+
case (DocumentType.SNACK_MENU, "pdf"):
125+
self._parse_snack_menu_pdf(stream, effective)
126+
case (DocumentType.LUNCH_MENU, "pdf"):
127+
self._parse_lunch_menu_pdf(stream, effective)
123128
case (DocumentType.SNACK_MENU, "xlsx"):
124129
self._parse_snack_menu_xlsx(stream, effective)
125130
case (DocumentType.LUNCH_MENU, "xlsx"):
@@ -131,6 +136,41 @@ def parse_document(self, document: DocumentInfo, stream: BytesIO, effective: dat
131136
case _:
132137
raise KeyError("Unknown document type for menu")
133138

139+
def _parse_snack_menu_pdf(self, stream: BytesIO, effective: datetime.date) -> None:
140+
"""Parse the snack menu PDF document."""
141+
142+
# Extract all tables from a PDF stream
143+
tables = with_span(op="extract")(extract_tables)(stream)
144+
145+
days = 0
146+
147+
# Parse tables into menus and store them
148+
for table in tables:
149+
for row in table:
150+
if not row[1] or "NV in N" in row[1]:
151+
continue
152+
153+
current = effective + datetime.timedelta(days=days)
154+
days += 1
155+
156+
menu = {
157+
"date": current,
158+
"normal": row[1],
159+
"poultry": row[2],
160+
"vegetarian": row[3],
161+
"fruitvegetable": row[4],
162+
}
163+
164+
model = self.session.query(SnackMenu).filter(SnackMenu.date == current).first()
165+
166+
if not model:
167+
model = SnackMenu()
168+
169+
for key, value in menu.items():
170+
setattr(model, key, value)
171+
172+
self.session.add(model)
173+
134174
def _parse_snack_menu_xlsx(self, stream: BytesIO, effective: datetime.date) -> None:
135175
"""Parse the snack menu XLSX document."""
136176

@@ -201,6 +241,39 @@ def _parse_snack_menu_xlsx(self, stream: BytesIO, effective: datetime.date) -> N
201241

202242
wb.close()
203243

244+
def _parse_lunch_menu_pdf(self, stream: BytesIO, effective: datetime.date) -> None:
245+
"""Parse the lunch menu PDF document."""
246+
247+
# Extract all tables from a PDF stream
248+
tables = with_span(op="extract")(extract_tables)(stream)
249+
250+
days = 0
251+
252+
# Parse tables into menus and store them
253+
for table in tables:
254+
for row in table:
255+
if not row[1] or "N KOSILO" in row[1]:
256+
continue
257+
258+
current = effective + datetime.timedelta(days=days)
259+
days += 1
260+
261+
menu = {
262+
"date": current,
263+
"normal": row[1],
264+
"vegetarian": row[2],
265+
}
266+
267+
model = self.session.query(LunchMenu).filter(LunchMenu.date == current).first()
268+
269+
if not model:
270+
model = LunchMenu()
271+
272+
for key, value in menu.items():
273+
setattr(model, key, value)
274+
275+
self.session.add(model)
276+
204277
def _parse_lunch_menu_xlsx(self, stream: BytesIO, effective: datetime.date) -> None:
205278
"""Parse the lunch menu XLSX document."""
206279

0 commit comments

Comments
 (0)