Skip to content

Commit 392efe6

Browse files
authored
Grace delete duplicate menus (#419)
* load_next_menu.py deletes all duplicate menus * fixed merging between caching and deleting duplicate changes * deleted unhelpful logging
1 parent fe03d34 commit 392efe6

7 files changed

Lines changed: 177 additions & 16 deletions

File tree

backend/.codex

Whitespace-only changes.
12.2 MB
Binary file not shown.

backend/dining/api_wrapper.py

Lines changed: 59 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
import datetime
22
import json
3+
import logging
34
from concurrent.futures import ThreadPoolExecutor, as_completed
45

56
import requests
67
from django.conf import settings
8+
from django.db.models import Count, Max
79
from django.utils import timezone
810
from django.utils.timezone import make_aware
911
from requests.exceptions import ConnectionError, ConnectTimeout, ReadTimeout
@@ -12,6 +14,8 @@
1214
from utils.errors import APIError
1315

1416

17+
logger = logging.getLogger(__name__)
18+
1519
OPEN_DATA_URL = "https://3scale-public-prod-open-data.apps.k8s.upenn.edu/api/v1/dining/"
1620
OPEN_DATA_ENDPOINTS = {"VENUES": OPEN_DATA_URL + "venues", "MENUS": OPEN_DATA_URL + "menus"}
1721

@@ -122,29 +126,42 @@ def fetch_menu(self, venue_id, date):
122126
response.json(),
123127
) # also storing venue_id to later access in fetched_menus list
124128

125-
def load_menu(self, date=timezone.now().date()):
129+
def load_menus(self, date=None):
126130
"""
127131
Loads today's menu
132+
Invariant: there should be no duplicate Menus. `load_menus` should delete
133+
duplicate menus for all venues for the given date.
134+
128135
NOTE: This method should only be used in load_next_menu.py, which is
129136
run based on a cron job every day
130137
"""
138+
if date is None:
139+
date = timezone.now().date()
140+
131141
# Venues without a menu should not be parsed
132142
skipped_venues = [747, 1163, 1731, 1732, 1733, 1464004, 1464009]
133143

134144
# TODO: Handle API responses during empty menus (holidays)
135145
venues = [v for v in Venue.objects.all() if v.venue_id not in skipped_venues]
136146
venue_map = {venue.venue_id: venue for venue in venues}
137147

138-
# Fetch all menus in parallel to speed up loading time.
148+
# Fetch menus in parallel to speed up loading time
139149
fetched_menus = []
140-
with ThreadPoolExecutor(max_workers=8) as executor: # 8 can be tuned
141-
futures = [executor.submit(self.fetch_menu, venue.venue_id, date) for venue in venues]
142-
for future in as_completed(futures):
150+
151+
with ThreadPoolExecutor(max_workers=8) as executor:
152+
future_to_venue = {
153+
executor.submit(self.fetch_menu, venue.venue_id, date): venue.venue_id
154+
for venue in venues
155+
}
156+
157+
for future in as_completed(future_to_venue):
143158
try:
144159
venue_id, response_json = future.result()
145160
fetched_menus.append((venue_id, response_json))
146-
except Exception as e:
147-
print(f"Error fetching menu: {e}")
161+
except Exception:
162+
logger.exception(
163+
f"Dining: error fetching menu for venue {future_to_venue[future]}"
164+
)
148165

149166
# Process the fetched menus and load them into the database
150167
for venue_id, response in fetched_menus:
@@ -175,6 +192,9 @@ def load_menu(self, date=timezone.now().date()):
175192
# Append stations to dining menu
176193
self.load_stations(daypart["stations"], dining_menu)
177194

195+
# delete duplicate menus
196+
self.delete_duplicate_menus(date)
197+
178198
def load_stations(self, station_response, dining_menu):
179199
for station_data in station_response:
180200
# TODO: This is inefficient for venues such as Houston Market
@@ -212,3 +232,35 @@ def load_items(self, item_response):
212232
],
213233
unique_fields=[DiningItem._meta.pk.name],
214234
)
235+
236+
def delete_duplicate_menus(self, date):
237+
"""Delete duplicate menus for an exact `date`.
238+
Will delete all but the most recently created menus for each dining hall
239+
"""
240+
# Find groups of duplicate menus
241+
duplicate_groups = (
242+
DiningMenu.objects.values("venue", "date", "start_time", "end_time", "service")
243+
.annotate(menu_count=Count("id"), keep_id=Max("id"))
244+
.filter(menu_count__gt=1, date=date)
245+
)
246+
247+
# Find all ids to delete
248+
ids_to_delete = []
249+
250+
for group in duplicate_groups:
251+
ids = (
252+
DiningMenu.objects.filter(
253+
venue=group["venue"],
254+
date=group["date"],
255+
start_time=group["start_time"],
256+
end_time=group["end_time"],
257+
service=group["service"],
258+
)
259+
.exclude(id=group["keep_id"])
260+
.values_list("id", flat=True)
261+
)
262+
ids_to_delete.extend(ids)
263+
264+
# Delete all duplicates
265+
deleted_count, _ = DiningMenu.objects.filter(id__in=ids_to_delete).delete()
266+
return deleted_count

backend/dining/management/commands/load_next_menu.py

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,21 +14,24 @@ class Command(BaseCommand):
1414
the next 7 days, including the original date.
1515
"""
1616

17-
def load_one_menu(self, delta, *args, **kwargs):
17+
def load_one_menu(self, today, delta, *args, **kwargs):
1818
"""
1919
Loads menu for a single day
2020
"""
21+
date_to_load = today + datetime.timedelta(days=delta)
22+
2123
d = DiningAPIWrapper()
22-
d.load_menu(timezone.now().date() + datetime.timedelta(days=delta))
23-
delete_menu_view_cache(timezone.now().date() + datetime.timedelta(days=delta))
24-
self.stdout.write(
25-
"Loaded new Dining Menu for "
26-
+ str(timezone.now().date() + datetime.timedelta(days=delta))
27-
)
24+
d.load_menus(date_to_load)
25+
delete_menu_view_cache(date_to_load)
26+
27+
# Error logging
28+
self.stdout.write(f"Loaded new Dining Menu for {date_to_load}")
2829

2930
def handle(self, *args, **kwargs):
3031
"""
3132
Load menu for the next 7 days
3233
"""
34+
today = timezone.now().date()
35+
3336
for i in range(7):
34-
self.load_one_menu(i)
37+
self.load_one_menu(today, i)

backend/dining/models.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,10 @@ class DiningItem(models.Model):
2121
# Technically, postgres supports json fields but that involves local postgres
2222
# instead of sqlite AND we don't need to query on this field
2323

24+
# TODO: New fields to add from allergens:
25+
# vegetarian, vegan, kosher, jain, ask us
26+
# peanut, tree nut, sesame, fish, wheat/gluten, milk, egg, soy
27+
2428
def __str__(self):
2529
return f"{self.name}"
2630

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
from datetime import timedelta
2+
from unittest.mock import patch
3+
4+
from django.test import TestCase
5+
from django.utils import timezone
6+
7+
from dining.api_wrapper import DiningAPIWrapper
8+
from dining.models import DiningMenu, Venue
9+
10+
11+
def _make_response(venue_id, date_str, dayparts):
12+
return {
13+
"menus": {
14+
"items": {},
15+
"days": [
16+
{
17+
"date": date_str,
18+
"cafes": {str(venue_id): {"dayparts": [dayparts]}},
19+
}
20+
],
21+
}
22+
}
23+
24+
25+
class TestLoadMenus(TestCase):
26+
def test_load_menus_idempotent(self):
27+
"""
28+
Calling `load_menus` twice should not create duplicate menus.
29+
"""
30+
# Make some new venues
31+
venues = [
32+
Venue.objects.create(venue_id=2001, name="Hill", image_url="http://x"),
33+
Venue.objects.create(venue_id=2002, name="English", image_url="http://x"),
34+
Venue.objects.create(venue_id=2003, name="Lauder", image_url="http://x"),
35+
]
36+
37+
date = timezone.now().date()
38+
date_str = date.isoformat()
39+
40+
# Each venue will have two meals/dayparts
41+
dayparts = [
42+
{"starttime": "08:00", "endtime": "10:00", "label": "Breakfast", "stations": []},
43+
{"starttime": "10:00", "endtime": "14:00", "label": "Lunch", "stations": []},
44+
]
45+
46+
# fetch a fake response for these menus
47+
def fake_fetch(self, venue_id, d):
48+
dayparts_copy = [dict(dp) for dp in dayparts]
49+
return (venue_id, _make_response(venue_id, date_str, dayparts_copy))
50+
51+
# load menus twice
52+
with patch.object(DiningAPIWrapper, "fetch_menu", new=fake_fetch):
53+
wrapper = DiningAPIWrapper()
54+
wrapper.load_menus(date)
55+
count_after_first = DiningMenu.objects.filter(date=date).count()
56+
57+
wrapper.load_menus(date)
58+
count_after_second = DiningMenu.objects.filter(date=date).count()
59+
60+
# there should be the same amount pre-fetch and post-fetch
61+
expected = len(venues) * len(dayparts)
62+
self.assertEqual(count_after_first, expected)
63+
self.assertEqual(count_after_second, expected)
64+
65+
def test_delete_duplicate_menus(self):
66+
"""
67+
`delete_duplicate_menus` should remove all but the most recently
68+
created menu for duplicate menu groups on the same date.
69+
"""
70+
venue = Venue.objects.create(venue_id=9001, name="Dup", image_url="http://x")
71+
72+
date = timezone.now().date()
73+
74+
start_time = timezone.now()
75+
end_time = start_time + timedelta(hours=1)
76+
77+
# Create three duplicate menus (same venue, date, times, service).
78+
menus = [
79+
DiningMenu.objects.create(
80+
venue=venue,
81+
date=date,
82+
start_time=start_time,
83+
end_time=end_time,
84+
service="Dinner",
85+
)
86+
for _ in range(3)
87+
]
88+
89+
# Confrim we created 3 menus
90+
self.assertEqual(DiningMenu.objects.filter(venue=venue, date=date).count(), 3)
91+
92+
wrapper = DiningAPIWrapper()
93+
deleted_count = wrapper.delete_duplicate_menus(date)
94+
95+
# Two should be deleted, one should remain
96+
self.assertEqual(deleted_count, 2)
97+
remaining = DiningMenu.objects.filter(venue=venue, date=date)
98+
self.assertEqual(remaining.count(), 1)
99+
100+
# The remaining menu should be the one with the highest id
101+
kept_id = max(m.id for m in menus)
102+
self.assertEqual(remaining.first().id, kept_id)

backend/tests/dining/test_views.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ def test_skip_venue(self):
153153
Venue.objects.all().delete()
154154
Venue.objects.create(venue_id=747, name="Skip", image_url="URL")
155155
wrapper = DiningAPIWrapper()
156-
wrapper.load_menu()
156+
wrapper.load_menus()
157157
self.assertEqual(DiningMenu.objects.count(), 0)
158158

159159

0 commit comments

Comments
 (0)