9
9
from pdf2docx import extract_tables
10
10
11
11
from ..database import Class , Classroom , Document , LunchSchedule , Substitution , Teacher
12
- from ..errors import ClassroomApiError , InvalidRecordError , InvalidTokenError
12
+ from ..errors import ClassroomApiError , InvalidRecordError , InvalidTokenError , LunchScheduleError
13
13
from ..utils .database import get_or_create
14
14
from ..utils .sentry import with_span
15
15
from ..utils .url import normalize_url , tokenize_url
@@ -126,8 +126,8 @@ def _store_substitutions(self, name, url, span):
126
126
).date ()
127
127
day = date .isoweekday ()
128
128
129
+ # Save content to temporary file
129
130
filename = os .path .join (tempfile .gettempdir (), os .urandom (24 ).hex () + ".pdf" )
130
-
131
131
file = open (filename , mode = "w+b" )
132
132
file .write (content )
133
133
file .close ()
@@ -386,12 +386,8 @@ def _store_lunch_schedule(self, name, url, span):
386
386
387
387
return
388
388
389
- date = datetime .datetime .strptime (
390
- re .search (r"Razpored delitve kosila, (.+)" , name , re .IGNORECASE ).group (1 ), "%d. %m. %Y"
391
- ).date ()
392
-
389
+ # Save content to temporary file
393
390
filename = os .path .join (tempfile .gettempdir (), os .urandom (24 ).hex () + ".pdf" )
394
-
395
391
file = open (filename , mode = "w+b" )
396
392
file .write (content )
397
393
file .close ()
@@ -400,6 +396,73 @@ def _store_lunch_schedule(self, name, url, span):
400
396
tables = with_span (op = "extract" )(extract_tables )(filename )
401
397
os .remove (filename )
402
398
399
+ # Daily lunch schedule format, used until October 2020
400
+ # Example: delitevKosila-0-15-okt2020-CET-objava.pdf
401
+ if re .search (r"\/delitevKosila-0-[0-9]+-[a-z0-9]+-[A-Z]{3}-objava.pdf$" , url ):
402
+ date = self ._get_daily_lunch_schedule_date (name , url )
403
+ self ._parse_daily_lunch_schedule (date , tables )
404
+
405
+ # Weekly lunch schedule format, used starting with February 2021
406
+ # Example: delitevKosila-15-19-feb2021.pdf
407
+ elif re .search (r"\/delitevKosila-[1-9][0-9]+-[1-9][0-9]+-[a-z0-9]+.pdf$" , url ):
408
+ date = self ._get_weekly_lunch_schedule_date (name , url )
409
+ self ._parse_weekly_lunch_schedule (date , tables )
410
+
411
+ # Unknown lunch schedule format
412
+ else :
413
+ raise LunchScheduleError ("Unknown lunch schedule format: " + url .rsplit ("/" , 1 )[- 1 ])
414
+
415
+ # Update or create a document
416
+ if not document :
417
+ document = Document ()
418
+ created = True
419
+ else :
420
+ created = False
421
+
422
+ document .date = date
423
+ document .type = "lunch-schedule"
424
+ document .url = url
425
+ document .description = name .split ("," )[0 ].capitalize ()
426
+ document .hash = hash
427
+
428
+ self .session .add (document )
429
+
430
+ span .set_tag ("document.date" , document .date )
431
+ span .set_tag ("document.hash" , document .hash )
432
+ span .set_tag ("document.action" , "created" if created else "updated" )
433
+
434
+ if created :
435
+ self .logger .info ("Created a new lunch schedule document for %s" , document .date )
436
+ else :
437
+ self .logger .info ("Updated the lunch schedule document for %s" , document .date )
438
+
439
+ @staticmethod
440
+ def _get_daily_lunch_schedule_date (name , url ):
441
+ return datetime .datetime .strptime (
442
+ re .search (r"Razpored delitve kosila, (.+)" , name , re .IGNORECASE ).group (1 ), "%d. %m. %Y"
443
+ ).date ()
444
+
445
+ @staticmethod
446
+ def _get_weekly_lunch_schedule_date (name , url ):
447
+ month_to_number = {
448
+ "jan" : 1 ,
449
+ "feb" : 2 ,
450
+ "mar" : 3 ,
451
+ "apr" : 4 ,
452
+ "maj" : 5 ,
453
+ "jun" : 6 ,
454
+ "jul" : 7 ,
455
+ "avg" : 8 ,
456
+ "sep" : 9 ,
457
+ "okt" : 10 ,
458
+ "nov" : 11 ,
459
+ "dec" : 12 ,
460
+ }
461
+
462
+ date = re .search (r"\/delitevKosila-([1-9][0-9]+)-[0-9][1-9]+-([a-z]+)([1-9][0-9]+).pdf$" , url )
463
+ return datetime .date (year = int (date .group (3 )), month = month_to_number [date .group (2 )], day = int (date .group (1 )))
464
+
465
+ def _parse_daily_lunch_schedule (self , date , tables ):
403
466
schedule = []
404
467
405
468
last_hour = None
@@ -438,26 +501,34 @@ def _store_lunch_schedule(self, name, url, span):
438
501
self .session .query (LunchSchedule ).filter (LunchSchedule .date == date ).delete ()
439
502
self .session .bulk_insert_mappings (LunchSchedule , schedule )
440
503
441
- # Update or create a document
442
- if not document :
443
- document = Document ()
444
- created = True
445
- else :
446
- created = False
504
+ def _parse_weekly_lunch_schedule (self , date , tables ):
505
+ schedule = []
447
506
448
- document .date = date
449
- document .type = "lunch-schedule"
450
- document .url = url
451
- document .description = name .split ("," )[0 ]
452
- document .hash = hash
507
+ for table in tables :
508
+ # Skip instructions
509
+ if "V jedilnico prihajate z maskami ob uri" in table [0 ][0 ]:
510
+ continue
453
511
454
- self .session .add (document )
512
+ for row in table :
513
+ # Skip header
514
+ if row [0 ] and "ura" in row [0 ]:
515
+ continue
455
516
456
- span . set_tag ( "document.date" , document . date )
457
- span . set_tag ( "document.hash" , document . hash )
458
- span . set_tag ( "document.action" , "created" if created else "updated" )
517
+ time = datetime . datetime . strptime ( row [ 0 ]. strip (), "%H:%M" ). time ( )
518
+ class_ = row [ 1 ]. strip ( )
519
+ location = row [ 2 ]. strip ( )
459
520
460
- if created :
461
- self .logger .info ("Created a new lunch schedule document for %s" , document .date )
462
- else :
463
- self .logger .info ("Updated the lunch schedule document for %s" , document .date )
521
+ schedule .append (
522
+ {
523
+ "class_id" : get_or_create (self .session , model = Class , name = class_ )[0 ].id ,
524
+ "date" : date ,
525
+ "time" : time ,
526
+ "location" : location ,
527
+ }
528
+ )
529
+
530
+ date += datetime .timedelta (days = 1 )
531
+
532
+ # Store schedule in database
533
+ self .session .query (LunchSchedule ).filter (LunchSchedule .date == date ).delete ()
534
+ self .session .bulk_insert_mappings (LunchSchedule , schedule )
0 commit comments