14
14
from .base import BaseMultiUpdater , DocumentInfo
15
15
from ..database import DocumentType , LunchMenu , SnackMenu
16
16
from ..errors import MenuApiError , MenuDateError , MenuFormatError
17
+ from ..utils .pdf import extract_tables
17
18
from ..utils .sentry import with_span
18
19
19
20
if typing .TYPE_CHECKING :
@@ -120,6 +121,10 @@ def parse_document(self, document: DocumentInfo, stream: BytesIO, effective: dat
120
121
span .set_tag ("document.format" , document .extension )
121
122
122
123
match (document .type , document .extension ):
124
+ case (DocumentType .SNACK_MENU , "pdf" ):
125
+ self ._parse_snack_menu_pdf (stream , effective )
126
+ case (DocumentType .LUNCH_MENU , "pdf" ):
127
+ self ._parse_lunch_menu_pdf (stream , effective )
123
128
case (DocumentType .SNACK_MENU , "xlsx" ):
124
129
self ._parse_snack_menu_xlsx (stream , effective )
125
130
case (DocumentType .LUNCH_MENU , "xlsx" ):
@@ -131,6 +136,41 @@ def parse_document(self, document: DocumentInfo, stream: BytesIO, effective: dat
131
136
case _:
132
137
raise KeyError ("Unknown document type for menu" )
133
138
139
+ def _parse_snack_menu_pdf (self , stream : BytesIO , effective : datetime .date ) -> None :
140
+ """Parse the snack menu PDF document."""
141
+
142
+ # Extract all tables from a PDF stream
143
+ tables = with_span (op = "extract" )(extract_tables )(stream )
144
+
145
+ days = 0
146
+
147
+ # Parse tables into menus and store them
148
+ for table in tables :
149
+ for row in table :
150
+ if not row [1 ] or "NV in N" in row [1 ]:
151
+ continue
152
+
153
+ current = effective + datetime .timedelta (days = days )
154
+ days += 1
155
+
156
+ menu = {
157
+ "date" : current ,
158
+ "normal" : row [1 ],
159
+ "poultry" : row [2 ],
160
+ "vegetarian" : row [3 ],
161
+ "fruitvegetable" : row [4 ],
162
+ }
163
+
164
+ model = self .session .query (SnackMenu ).filter (SnackMenu .date == current ).first ()
165
+
166
+ if not model :
167
+ model = SnackMenu ()
168
+
169
+ for key , value in menu .items ():
170
+ setattr (model , key , value )
171
+
172
+ self .session .add (model )
173
+
134
174
def _parse_snack_menu_xlsx (self , stream : BytesIO , effective : datetime .date ) -> None :
135
175
"""Parse the snack menu XLSX document."""
136
176
@@ -201,6 +241,39 @@ def _parse_snack_menu_xlsx(self, stream: BytesIO, effective: datetime.date) -> N
201
241
202
242
wb .close ()
203
243
244
+ def _parse_lunch_menu_pdf (self , stream : BytesIO , effective : datetime .date ) -> None :
245
+ """Parse the lunch menu PDF document."""
246
+
247
+ # Extract all tables from a PDF stream
248
+ tables = with_span (op = "extract" )(extract_tables )(stream )
249
+
250
+ days = 0
251
+
252
+ # Parse tables into menus and store them
253
+ for table in tables :
254
+ for row in table :
255
+ if not row [1 ] or "N KOSILO" in row [1 ]:
256
+ continue
257
+
258
+ current = effective + datetime .timedelta (days = days )
259
+ days += 1
260
+
261
+ menu = {
262
+ "date" : current ,
263
+ "normal" : row [1 ],
264
+ "vegetarian" : row [2 ],
265
+ }
266
+
267
+ model = self .session .query (LunchMenu ).filter (LunchMenu .date == current ).first ()
268
+
269
+ if not model :
270
+ model = LunchMenu ()
271
+
272
+ for key , value in menu .items ():
273
+ setattr (model , key , value )
274
+
275
+ self .session .add (model )
276
+
204
277
def _parse_lunch_menu_xlsx (self , stream : BytesIO , effective : datetime .date ) -> None :
205
278
"""Parse the lunch menu XLSX document."""
206
279
0 commit comments