|
1 | 1 | import datetime |
2 | 2 | import json |
| 3 | +import logging |
3 | 4 | from concurrent.futures import ThreadPoolExecutor, as_completed |
4 | 5 |
|
5 | 6 | import requests |
6 | 7 | from django.conf import settings |
| 8 | +from django.db.models import Count, Max |
7 | 9 | from django.utils import timezone |
8 | 10 | from django.utils.timezone import make_aware |
9 | 11 | from requests.exceptions import ConnectionError, ConnectTimeout, ReadTimeout |
|
12 | 14 | from utils.errors import APIError |
13 | 15 |
|
14 | 16 |
|
| 17 | +logger = logging.getLogger(__name__) |
| 18 | + |
15 | 19 | OPEN_DATA_URL = "https://3scale-public-prod-open-data.apps.k8s.upenn.edu/api/v1/dining/" |
16 | 20 | OPEN_DATA_ENDPOINTS = {"VENUES": OPEN_DATA_URL + "venues", "MENUS": OPEN_DATA_URL + "menus"} |
17 | 21 |
|
@@ -122,29 +126,42 @@ def fetch_menu(self, venue_id, date): |
122 | 126 | response.json(), |
123 | 127 | ) # also storing venue_id to later access in fetched_menus list |
124 | 128 |
|
125 | | - def load_menu(self, date=timezone.now().date()): |
| 129 | + def load_menus(self, date=None): |
126 | 130 | """ |
127 | 131 | Loads today's menu |
| 132 | + Invariant: there should be no duplicate Menus. `load_menus` should delete |
| 133 | + duplicate menus for all venues for the given date. |
| 134 | +
|
128 | 135 | NOTE: This method should only be used in load_next_menu.py, which is |
129 | 136 | run based on a cron job every day |
130 | 137 | """ |
| 138 | + if date is None: |
| 139 | + date = timezone.now().date() |
| 140 | + |
131 | 141 | # Venues without a menu should not be parsed |
132 | 142 | skipped_venues = [747, 1163, 1731, 1732, 1733, 1464004, 1464009] |
133 | 143 |
|
134 | 144 | # TODO: Handle API responses during empty menus (holidays) |
135 | 145 | venues = [v for v in Venue.objects.all() if v.venue_id not in skipped_venues] |
136 | 146 | venue_map = {venue.venue_id: venue for venue in venues} |
137 | 147 |
|
138 | | - # Fetch all menus in parallel to speed up loading time. |
| 148 | + # Fetch menus in parallel to speed up loading time |
139 | 149 | fetched_menus = [] |
140 | | - with ThreadPoolExecutor(max_workers=8) as executor: # 8 can be tuned |
141 | | - futures = [executor.submit(self.fetch_menu, venue.venue_id, date) for venue in venues] |
142 | | - for future in as_completed(futures): |
| 150 | + |
| 151 | + with ThreadPoolExecutor(max_workers=8) as executor: |
| 152 | + future_to_venue = { |
| 153 | + executor.submit(self.fetch_menu, venue.venue_id, date): venue.venue_id |
| 154 | + for venue in venues |
| 155 | + } |
| 156 | + |
| 157 | + for future in as_completed(future_to_venue): |
143 | 158 | try: |
144 | 159 | venue_id, response_json = future.result() |
145 | 160 | fetched_menus.append((venue_id, response_json)) |
146 | | - except Exception as e: |
147 | | - print(f"Error fetching menu: {e}") |
| 161 | + except Exception: |
| 162 | + logger.exception( |
| 163 | + f"Dining: error fetching menu for venue {future_to_venue[future]}" |
| 164 | + ) |
148 | 165 |
|
149 | 166 | # Process the fetched menus and load them into the database |
150 | 167 | for venue_id, response in fetched_menus: |
@@ -175,6 +192,9 @@ def load_menu(self, date=timezone.now().date()): |
175 | 192 | # Append stations to dining menu |
176 | 193 | self.load_stations(daypart["stations"], dining_menu) |
177 | 194 |
|
| 195 | + # delete duplicate menus |
| 196 | + self.delete_duplicate_menus(date) |
| 197 | + |
178 | 198 | def load_stations(self, station_response, dining_menu): |
179 | 199 | for station_data in station_response: |
180 | 200 | # TODO: This is inefficient for venues such as Houston Market |
@@ -212,3 +232,35 @@ def load_items(self, item_response): |
212 | 232 | ], |
213 | 233 | unique_fields=[DiningItem._meta.pk.name], |
214 | 234 | ) |
| 235 | + |
| 236 | + def delete_duplicate_menus(self, date): |
| 237 | + """Delete duplicate menus for an exact `date`. |
| 238 | + Will delete all but the most recently created menus for each dining hall |
| 239 | + """ |
| 240 | + # Find groups of duplicate menus |
| 241 | + duplicate_groups = ( |
| 242 | + DiningMenu.objects.values("venue", "date", "start_time", "end_time", "service") |
| 243 | + .annotate(menu_count=Count("id"), keep_id=Max("id")) |
| 244 | + .filter(menu_count__gt=1, date=date) |
| 245 | + ) |
| 246 | + |
| 247 | + # Find all ids to delete |
| 248 | + ids_to_delete = [] |
| 249 | + |
| 250 | + for group in duplicate_groups: |
| 251 | + ids = ( |
| 252 | + DiningMenu.objects.filter( |
| 253 | + venue=group["venue"], |
| 254 | + date=group["date"], |
| 255 | + start_time=group["start_time"], |
| 256 | + end_time=group["end_time"], |
| 257 | + service=group["service"], |
| 258 | + ) |
| 259 | + .exclude(id=group["keep_id"]) |
| 260 | + .values_list("id", flat=True) |
| 261 | + ) |
| 262 | + ids_to_delete.extend(ids) |
| 263 | + |
| 264 | + # Delete all duplicates |
| 265 | + deleted_count, _ = DiningMenu.objects.filter(id__in=ids_to_delete).delete() |
| 266 | + return deleted_count |
0 commit comments