diff --git a/backend/dining/api_wrapper.py b/backend/dining/api_wrapper.py index f4ef4a5a..90c71490 100644 --- a/backend/dining/api_wrapper.py +++ b/backend/dining/api_wrapper.py @@ -4,6 +4,7 @@ import requests from django.conf import settings +from django.db.models import Count, Max from django.utils import timezone from django.utils.timezone import make_aware from requests.exceptions import ConnectionError, ConnectTimeout, ReadTimeout @@ -122,12 +123,18 @@ def fetch_menu(self, venue_id, date): response.json(), ) # also storing venue_id to later access in fetched_menus list - def load_menu(self, date=timezone.now().date()): + def load_menus(self, date=None): """ Loads today's menu + Invariant: there should be no duplicate Menus. `load_menus` should delete + duplicate menus for all venues for the given date. + NOTE: This method should only be used in load_next_menu.py, which is run based on a cron job every day """ + if date is None: + date = timezone.now().date() + # Venues without a menu should not be parsed skipped_venues = [747, 1163, 1731, 1732, 1733, 1464004, 1464009] @@ -175,6 +182,10 @@ def load_menu(self, date=timezone.now().date()): # Append stations to dining menu self.load_stations(daypart["stations"], dining_menu) + # delete duplicate menus + deleted_count = self.delete_duplicate_menus(date) + print(deleted_count, "duplicate objects deleted for date", date) + def load_stations(self, station_response, dining_menu): for station_data in station_response: # TODO: This is inefficient for venues such as Houston Market @@ -212,3 +223,35 @@ def load_items(self, item_response): ], unique_fields=[DiningItem._meta.pk.name], ) + + def delete_duplicate_menus(self, date): + """Delete duplicate menus for an exact `date`. + Will delete all but the most recently created menus for each dining hall + """ + # Find groups of duplicate menus + duplicate_groups = ( + DiningMenu.objects.values("venue", "date", "start_time", "end_time", "service") + .annotate(menu_count=Count("id"), keep_id=Max("id")) + .filter(menu_count__gt=1, date=date) + ) + + # Find all ids to delete + ids_to_delete = [] + + for group in duplicate_groups: + ids = ( + DiningMenu.objects.filter( + venue=group["venue"], + date=group["date"], + start_time=group["start_time"], + end_time=group["end_time"], + service=group["service"], + ) + .exclude(id=group["keep_id"]) + .values_list("id", flat=True) + ) + ids_to_delete.extend(ids) + + # Delete all duplicates + deleted_count, _ = DiningMenu.objects.filter(id__in=ids_to_delete).delete() + return deleted_count diff --git a/backend/dining/management/commands/load_next_menu.py b/backend/dining/management/commands/load_next_menu.py index af319b0c..022955c7 100644 --- a/backend/dining/management/commands/load_next_menu.py +++ b/backend/dining/management/commands/load_next_menu.py @@ -4,7 +4,6 @@ from django.utils import timezone from dining.api_wrapper import DiningAPIWrapper -from dining.utils.menu_view_cache import delete_menu_view_cache class Command(BaseCommand): @@ -14,21 +13,22 @@ class Command(BaseCommand): the next 7 days, including the original date. """ - def load_one_menu(self, delta, *args, **kwargs): + def load_one_day(self, today, delta, *args, **kwargs): """ - Loads menu for a single day + Loads all menus for a single day """ d = DiningAPIWrapper() - d.load_menu(timezone.now().date() + datetime.timedelta(days=delta)) - delete_menu_view_cache(timezone.now().date() + datetime.timedelta(days=delta)) + + d.load_menus(today + datetime.timedelta(days=delta)) self.stdout.write( - "Loaded new Dining Menu for " - + str(timezone.now().date() + datetime.timedelta(days=delta)) + "Loaded new Dining Menu for " + str(today + datetime.timedelta(days=delta)) ) def handle(self, *args, **kwargs): """ Load menu for the next 7 days """ + today = timezone.now().date() + for i in range(7): - self.load_one_menu(i) + self.load_one_day(today, i) diff --git a/backend/dining/models.py b/backend/dining/models.py index dbb5064a..c6610261 100644 --- a/backend/dining/models.py +++ b/backend/dining/models.py @@ -21,6 +21,10 @@ class DiningItem(models.Model): # Technically, postgres supports json fields but that involves local postgres # instead of sqlite AND we don't need to query on this field + # TODO: New fields to add from allergens: + # vegetarian, vegan, kosher, jain, ask us + # peanut, tree nut, sesame, fish, wheat/gluten, milk, egg, soy + def __str__(self): return f"{self.name}" diff --git a/backend/tests/dining/test_load_menus.py b/backend/tests/dining/test_load_menus.py new file mode 100644 index 00000000..ff8ebc66 --- /dev/null +++ b/backend/tests/dining/test_load_menus.py @@ -0,0 +1,102 @@ +from datetime import timedelta +from unittest.mock import patch + +from django.test import TestCase +from django.utils import timezone + +from dining.api_wrapper import DiningAPIWrapper +from dining.models import DiningMenu, Venue + + +def _make_response(venue_id, date_str, dayparts): + return { + "menus": { + "items": {}, + "days": [ + { + "date": date_str, + "cafes": {str(venue_id): {"dayparts": [dayparts]}}, + } + ], + } + } + + +class TestLoadMenus(TestCase): + def test_load_menus_idempotent(self): + """ + Calling `load_menus` twice should not create duplicate menus. + """ + # Make some new venues + venues = [ + Venue.objects.create(venue_id=2001, name="Hill", image_url="http://x"), + Venue.objects.create(venue_id=2002, name="English", image_url="http://x"), + Venue.objects.create(venue_id=2003, name="Lauder", image_url="http://x"), + ] + + date = timezone.now().date() + date_str = date.isoformat() + + # Each venue will have two meals/dayparts + dayparts = [ + {"starttime": "08:00", "endtime": "10:00", "label": "Breakfast", "stations": []}, + {"starttime": "10:00", "endtime": "14:00", "label": "Lunch", "stations": []}, + ] + + # fetch a fake response for these menus + def fake_fetch(self, venue_id, d): + dayparts_copy = [dict(dp) for dp in dayparts] + return (venue_id, _make_response(venue_id, date_str, dayparts_copy)) + + # load menus twice + with patch.object(DiningAPIWrapper, "fetch_menu", new=fake_fetch): + wrapper = DiningAPIWrapper() + wrapper.load_menus(date) + count_after_first = DiningMenu.objects.filter(date=date).count() + + wrapper.load_menus(date) + count_after_second = DiningMenu.objects.filter(date=date).count() + + # there should be the same amount pre-fetch and post-fetch + expected = len(venues) * len(dayparts) + self.assertEqual(count_after_first, expected) + self.assertEqual(count_after_second, expected) + + def test_delete_duplicate_menus(self): + """ + `delete_duplicate_menus` should remove all but the most recently + created menu for duplicate menu groups on the same date. + """ + venue = Venue.objects.create(venue_id=9001, name="Dup", image_url="http://x") + + date = timezone.now().date() + + start_time = timezone.now() + end_time = start_time + timedelta(hours=1) + + # Create three duplicate menus (same venue, date, times, service). + menus = [ + DiningMenu.objects.create( + venue=venue, + date=date, + start_time=start_time, + end_time=end_time, + service="Dinner", + ) + for _ in range(3) + ] + + # Confrim we created 3 menus + self.assertEqual(DiningMenu.objects.filter(venue=venue, date=date).count(), 3) + + wrapper = DiningAPIWrapper() + deleted_count = wrapper.delete_duplicate_menus(date) + + # Two should be deleted, one should remain + self.assertEqual(deleted_count, 2) + remaining = DiningMenu.objects.filter(venue=venue, date=date) + self.assertEqual(remaining.count(), 1) + + # The remaining menu should be the one with the highest id + kept_id = max(m.id for m in menus) + self.assertEqual(remaining.first().id, kept_id) diff --git a/backend/tests/dining/test_views.py b/backend/tests/dining/test_views.py index a058c26b..90ac35c6 100644 --- a/backend/tests/dining/test_views.py +++ b/backend/tests/dining/test_views.py @@ -153,7 +153,7 @@ def test_skip_venue(self): Venue.objects.all().delete() Venue.objects.create(venue_id=747, name="Skip", image_url="URL") wrapper = DiningAPIWrapper() - wrapper.load_menu() + wrapper.load_menus() self.assertEqual(DiningMenu.objects.count(), 0)