Skip to content

Commit 3be64c5

Browse files
authored
Merge pull request #84 from filips123/xlsx-parsing
Xlsx parsing
2 parents 0a233e2 + f2bc5b5 commit 3be64c5

File tree

2 files changed

+89
-95
lines changed

2 files changed

+89
-95
lines changed

API/gimvicurnik/updaters/eclassroom.py

+9-14
Original file line numberDiff line numberDiff line change
@@ -267,10 +267,6 @@ def parse_document(self, document: DocumentInfo, stream: BytesIO, effective: dat
267267
span.set_tag("document.type", document.type.value)
268268
span.set_tag("document.format", document.extension)
269269

270-
# Only parse xlsx lunch schedules - a guard for now
271-
if document.type == DocumentType.LUNCH_SCHEDULE and document.extension != "xlsx":
272-
return
273-
274270
match (document.type, document.extension):
275271
case (DocumentType.SUBSTITUTIONS, "pdf"):
276272
self._parse_substitutions_pdf(stream, effective)
@@ -772,27 +768,26 @@ def _parse_lunch_schedule_xlsx(self, stream: BytesIO, effective: date) -> None:
772768
assert isinstance(wr[2].value, str)
773769
assert isinstance(wr[4].value, str)
774770

775-
# Schedule for specific class
776-
class_schedule: dict[str, Any] = {}
771+
schedule: dict[str, Any] = {}
777772

778773
# Time in format H:M
779-
class_schedule["time"] = wr[0].value
774+
schedule["time"] = wr[0].value
780775

781776
# Notes
782-
class_schedule["notes"] = wr[1].value.strip() if wr[1].value else None
777+
schedule["notes"] = wr[1].value.strip() if wr[1].value else None
783778

784779
# Class name (class id)
785780
if wr[2].value:
786-
class_schedule["class_id"] = get_or_create(
787-
self.session, model=Class, name=wr[2].value.strip()
788-
)[0].id
781+
schedule["class_id"] = get_or_create(self.session, model=Class, name=wr[2].value.strip())[
782+
0
783+
].id
789784

790785
# Location
791-
class_schedule["location"] = wr[4].value.strip() if wr[4].value else None
786+
schedule["location"] = wr[4].value.strip() if wr[4].value else None
792787

793788
# Effective date
794-
class_schedule["date"] = effective
795-
lunch_schedule.append(class_schedule)
789+
schedule["date"] = effective
790+
lunch_schedule.append(schedule)
796791

797792
wb.close()
798793

API/gimvicurnik/updaters/menu.py

+80-81
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
from bs4 import BeautifulSoup, ParserRejectedMarkup
1111
from openpyxl import load_workbook
12+
from sqlalchemy import insert
1213

1314
from .base import BaseMultiUpdater, DocumentInfo
1415
from ..database import DocumentType, LunchMenu, SnackMenu
@@ -85,7 +86,9 @@ def get_document_effective(self, document: DocumentInfo) -> datetime.date:
8586

8687
# jedilnik-kosilo-YYYY-MM-DD(-popravek).pdf
8788
# jedilnik-malica-YYYY-MM-DD(-popravek).pdf
88-
date = re.search(r"jedilnik-(?:kosilo|malica)-(\d+)-(\d+)-(\d+)(?:-[\w-]*)?.pdf", document.url)
89+
date = re.search(
90+
r"jedilnik-(?:kosilo|malica)-(\d+)-(\d+)-(\d+)(?:-[\w-]*)?\.(?:pdf|xlsx)", document.url
91+
)
8992

9093
# The specified date is commonly Monday of the effective week
9194
# However, in some cases, it may also be another day of that week
@@ -174,68 +177,67 @@ def _parse_snack_menu_xlsx(self, stream: BytesIO, effective: datetime.date) -> N
174177
# Extract workbook from an XLSX stream
175178
wb = with_span(op="extract")(load_workbook)(stream, read_only=True, data_only=True)
176179

177-
menu: dict[str, Any] = {}
180+
snack_menu: dict[str, Any] = {
181+
"normal": [],
182+
"poultry": [],
183+
"vegetarian": [],
184+
"fruitvegetable": [],
185+
}
178186
days = 0
179187

180-
# Parse tables into menus and store them
188+
# Parse menus and store them
181189
for ws in wb:
182-
for wr in ws.iter_rows(min_row=1, max_col=3):
183-
if not hasattr(wr[0].border, "bottom"):
190+
for wr in ws.iter_rows(min_row=2, max_col=5):
191+
if days == 5:
192+
break
193+
194+
# Ignore blank cells
195+
if not wr[1].value:
184196
continue
185197

186-
# Make mypy not complain about incorrect types for cell values
187-
# If the cell has an incorrect type, we should fail anyway
198+
# Check for correct cell value type (else mypy complains)
188199
if typing.TYPE_CHECKING:
189200
assert isinstance(wr[1].value, str)
190201
assert isinstance(wr[2].value, str)
191202
assert isinstance(wr[3].value, str)
192203
assert isinstance(wr[4].value, str)
193204

194-
# Store the menu after the end of table
195-
if wr[0].border.bottom.color:
196-
if menu and menu["date"]:
197-
# fmt: off
198-
model = (
199-
self.session.query(SnackMenu)
200-
.filter(SnackMenu.date == menu["date"])
201-
.first()
202-
)
203-
# fmt: on
204-
205-
if not model:
206-
model = SnackMenu()
207-
208-
model.date = menu["date"]
209-
model.normal = "\n".join(menu["normal"][1:])
210-
model.poultry = "\n".join(menu["poultry"][1:])
211-
model.vegetarian = "\n".join(menu["vegetarian"][1:])
212-
model.fruitvegetable = "\n".join(menu["fruitvegetable"][1:])
213-
214-
self.session.add(model)
215-
days += 1
216-
217-
menu = {
218-
"date": None,
219-
"normal": [],
220-
"poultry": [],
221-
"vegetarian": [],
222-
"fruitvegetable": [],
223-
}
224-
225-
if wr[0].value and isinstance(wr[0].value, datetime.datetime):
226-
menu["date"] = effective + datetime.timedelta(days=days)
205+
# Ignore information cells
206+
if "NV in N" in wr[1].value:
207+
continue
227208

228209
if wr[1].value:
229-
menu["normal"].append(wr[1].value.strip())
210+
snack_menu["normal"].append(wr[1].value.strip())
230211

231212
if wr[2].value:
232-
menu["poultry"].append(wr[2].value.strip())
213+
snack_menu["poultry"].append(wr[2].value.strip())
233214

234215
if wr[3].value:
235-
menu["vegetarian"].append(wr[3].value.strip())
216+
snack_menu["vegetarian"].append(wr[3].value.strip())
236217

237218
if wr[4].value:
238-
menu["fruitvegetable"].append(wr[4].value.strip())
219+
snack_menu["fruitvegetable"].append(wr[4].value.strip())
220+
221+
# Store the menu after the end of day
222+
if wr[0].border.bottom.color:
223+
snack_menu["date"] = effective + datetime.timedelta(days=days)
224+
self.session.query(SnackMenu).filter(SnackMenu.date == snack_menu["date"]).delete()
225+
226+
snack_menu["normal"] = "\n".join(snack_menu["normal"])
227+
snack_menu["poultry"] = "\n".join(snack_menu["poultry"])
228+
snack_menu["vegetarian"] = "\n".join(snack_menu["vegetarian"])
229+
snack_menu["fruitvegetable"] = "\n".join(snack_menu["fruitvegetable"])
230+
231+
self.session.execute(insert(SnackMenu), snack_menu)
232+
233+
# Set for next day
234+
days += 1
235+
snack_menu = {
236+
"normal": [],
237+
"poultry": [],
238+
"vegetarian": [],
239+
"fruitvegetable": [],
240+
}
239241

240242
wb.close()
241243

@@ -278,56 +280,53 @@ def _parse_lunch_menu_xlsx(self, stream: BytesIO, effective: datetime.date) -> N
278280
# Extract workbook from an XLSX stream
279281
wb = with_span(op="extract")(load_workbook)(stream, read_only=True, data_only=True)
280282

281-
menu: dict[str, Any] = {}
283+
lunch_menu: dict[str, Any] = {
284+
"normal": [],
285+
"vegetarian": [],
286+
}
282287
days = 0
283288

284-
# Parse tables into menus and store them
289+
# Parse menus and store them
285290
for ws in wb:
286-
for wr in ws.iter_rows(min_row=1, max_col=3):
287-
if not hasattr(wr[0].border, "bottom"):
291+
for wr in ws.iter_rows(min_row=2, max_col=3):
292+
if days == 5:
293+
break
294+
295+
# Ignore blank cells
296+
if not wr[1].value:
288297
continue
289298

290-
# Make mypy not complain about incorrect types for cell values
291-
# If the cell has an incorrect type, we should fail anyway
299+
# Check for correct cell value type (else mypy complains)
292300
if typing.TYPE_CHECKING:
293301
assert isinstance(wr[1].value, str)
294302
assert isinstance(wr[2].value, str)
295303

296-
# Store the menu after the end of table
297-
if wr[0].border.bottom.color:
298-
if menu and menu["date"]:
299-
# fmt: off
300-
model = (
301-
self.session.query(LunchMenu)
302-
.filter(LunchMenu.date == menu["date"])
303-
.first()
304-
)
305-
# fmt: on
306-
307-
if not model:
308-
model = LunchMenu()
309-
310-
model.date = menu["date"]
311-
model.normal = "\n".join(menu["normal"][1:])
312-
model.vegetarian = "\n".join(menu["vegetarian"][1:])
313-
314-
self.session.add(model)
315-
days += 1
316-
317-
menu = {
318-
"date": None,
319-
"normal": [],
320-
"vegetarian": [],
321-
}
322-
323-
if wr[0].value and isinstance(wr[0].value, datetime.datetime):
324-
menu["date"] = effective + datetime.timedelta(days=days)
304+
# Ignore information cells
305+
if "N KOSILO" in wr[1].value:
306+
continue
325307

326308
if wr[1].value:
327-
menu["normal"].append(wr[1].value.strip())
309+
lunch_menu["normal"].append(wr[1].value.strip())
328310

329311
if wr[2].value:
330-
menu["vegetarian"].append(wr[2].value.strip())
312+
lunch_menu["vegetarian"].append(wr[2].value.strip())
313+
314+
# Store the menu after the end of day
315+
if wr[0].border.bottom.color:
316+
lunch_menu["date"] = effective + datetime.timedelta(days=days)
317+
self.session.query(LunchMenu).filter(LunchMenu.date == lunch_menu["date"]).delete()
318+
319+
lunch_menu["normal"] = "\n".join(lunch_menu["normal"])
320+
lunch_menu["vegetarian"] = "\n".join(lunch_menu["vegetarian"])
321+
322+
self.session.execute(insert(LunchMenu), lunch_menu)
323+
324+
# Set for next day
325+
days += 1
326+
lunch_menu = {
327+
"normal": [],
328+
"vegetarian": [],
329+
}
331330

332331
wb.close()
333332

0 commit comments

Comments
 (0)