-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathapi_wrapper.py
More file actions
257 lines (227 loc) · 10 KB
/
api_wrapper.py
File metadata and controls
257 lines (227 loc) · 10 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
import datetime
import json
from concurrent.futures import ThreadPoolExecutor, as_completed
import requests
from django.conf import settings
from django.db.models import Count, Max
from django.utils import timezone
from django.utils.timezone import make_aware
from requests.exceptions import ConnectionError, ConnectTimeout, ReadTimeout
from dining.models import DiningItem, DiningMenu, DiningStation, Venue
from utils.errors import APIError
OPEN_DATA_URL = "https://3scale-public-prod-open-data.apps.k8s.upenn.edu/api/v1/dining/"
OPEN_DATA_ENDPOINTS = {"VENUES": OPEN_DATA_URL + "venues", "MENUS": OPEN_DATA_URL + "menus"}
class DiningAPIWrapper:
def __init__(self):
self.token = None
self.expiration = timezone.localtime()
self.openid_endpoint = (
"https://sso.apps.k8s.upenn.edu/auth/realms/master/protocol/openid-connect/token"
)
def update_token(self):
if self.expiration > timezone.localtime():
return
body = {
"client_id": settings.DINING_ID,
"client_secret": settings.DINING_SECRET,
"grant_type": "client_credentials",
}
response = requests.post(self.openid_endpoint, data=body).json()
if "error" in response:
raise APIError(f"Dining: {response['error']}, {response.get('error_description')}")
self.expiration = timezone.localtime() + datetime.timedelta(seconds=response["expires_in"])
self.token = response["access_token"]
def request(self, *args, **kwargs):
"""Make a signed request to the dining API."""
self.update_token()
headers = {"Authorization": f"Bearer {self.token}"}
# add authorization headers
if "headers" in kwargs:
kwargs["headers"].update(headers)
else:
kwargs["headers"] = headers
try:
return requests.request(*args, **kwargs)
except (ConnectTimeout, ReadTimeout, ConnectionError):
raise APIError("Dining: Connection timeout")
def get_venues(self):
results = []
venues_route = OPEN_DATA_ENDPOINTS["VENUES"]
response = self.request("GET", venues_route)
if response.status_code != 200:
raise APIError("Dining: error connecting to API " + response.text)
venues = response.json()["result_data"]["campuses"]["203"]["cafes"]
for key, value in venues.items():
# Cleaning up json response
venue = Venue.objects.filter(venue_id=key).first()
value["name"] = venue.name
value["image"] = venue.image_url if venue else None
value["id"] = int(key)
remove_items = [
"cor_icons",
"city",
"state",
"zip",
"latitude",
"longitude",
"description",
"message",
"eod",
"timezone",
"menu_type",
"menu_html",
"location_detail",
"weekly_schedule",
]
[value.pop(item) for item in remove_items]
for day in value["days"]:
day.pop("message")
removed_dayparts = set()
for i in range(len(day["dayparts"])):
daypart = day["dayparts"][i]
[daypart.pop(item) for item in ["id", "hide"]]
if not daypart["starttime"]:
removed_dayparts.add(i)
continue
for time in ["starttime", "endtime"]:
daypart[time] = datetime.datetime.strptime(
day["date"] + "T" + daypart[time], "%Y-%m-%dT%H:%M"
)
# Remove empty dayparts (unavailable meal times)
day["dayparts"] = [
day["dayparts"][i]
for i in range(len(day["dayparts"]))
if i not in removed_dayparts
]
results.append(value)
return results
def fetch_menu(self, venue_id, date):
"""
Calls API to fetch menu for a given venue and date
"""
worker = DiningAPIWrapper() # avoid shared mutable token state across threads
menu_base = OPEN_DATA_ENDPOINTS["MENUS"]
response = worker.request("GET", f"{menu_base}?cafe={venue_id}&date={date}")
if response.status_code != 200:
raise APIError("Dining: error connecting to API " + response.text)
return (
venue_id,
response.json(),
) # also storing venue_id to later access in fetched_menus list
def load_menus(self, date=None):
"""
Loads today's menu
Invariant: there should be no duplicate Menus. `load_menus` should delete
duplicate menus for all venues for the given date.
NOTE: This method should only be used in load_next_menu.py, which is
run based on a cron job every day
"""
if date is None:
date = timezone.now().date()
# Venues without a menu should not be parsed
skipped_venues = [747, 1163, 1731, 1732, 1733, 1464004, 1464009]
# TODO: Handle API responses during empty menus (holidays)
venues = [v for v in Venue.objects.all() if v.venue_id not in skipped_venues]
venue_map = {venue.venue_id: venue for venue in venues}
# Fetch all menus in parallel to speed up loading time.
fetched_menus = []
with ThreadPoolExecutor(max_workers=8) as executor: # 8 can be tuned
futures = [executor.submit(self.fetch_menu, venue.venue_id, date) for venue in venues]
for future in as_completed(futures):
try:
venue_id, response_json = future.result()
fetched_menus.append((venue_id, response_json))
except Exception as e:
print(f"Error fetching menu: {e}")
# Process the fetched menus and load them into the database
for venue_id, response in fetched_menus:
venue = venue_map[venue_id]
# Load new items into database
# TODO: There is something called a "goitem" for venues like English House.
# We are currently not loading them in
self.load_items(response["menus"]["items"])
menu = response["menus"]["days"][0]
dayparts = menu["cafes"][str(venue.venue_id)]["dayparts"][0]
for daypart in dayparts:
# Parse the dates in data
for time in ["starttime", "endtime"]:
daypart[time] = make_aware(
datetime.datetime.strptime(
menu["date"] + "T" + daypart[time], "%Y-%m-%dT%H:%M"
)
)
dining_menu = DiningMenu.objects.create(
venue=venue,
date=menu["date"],
start_time=daypart["starttime"],
end_time=daypart["endtime"],
service=daypart["label"],
)
# Append stations to dining menu
self.load_stations(daypart["stations"], dining_menu)
# delete duplicate menus
deleted_count = self.delete_duplicate_menus(date)
print(deleted_count, "duplicate objects deleted for date", date)
def load_stations(self, station_response, dining_menu):
for station_data in station_response:
# TODO: This is inefficient for venues such as Houston Market
station = DiningStation.objects.create(name=station_data["label"], menu=dining_menu)
item_ids = [int(item) for item in station_data["items"]]
# Bulk add the items into the station
items = DiningItem.objects.filter(item_id__in=item_ids)
station.items.add(*items)
station.save()
def load_items(self, item_response):
item_list = [
DiningItem(
item_id=key,
name=value["label"],
description=value["description"],
ingredients=value["ingredients"],
allergens=", ".join(value["cor_icon"].values()) if value["cor_icon"] else "",
nutrition_info=json.dumps(
{
x["label"]: f"{x['value']}{x['unit']}"
for x in value["nutrition_details"].values()
}
),
)
for key, value in item_response.items()
]
# Ignore conflicts because possibility of duplicate items
DiningItem.objects.bulk_create(
item_list,
update_conflicts=True,
update_fields=[
field.name for field in DiningItem._meta.fields if not field.primary_key
],
unique_fields=[DiningItem._meta.pk.name],
)
def delete_duplicate_menus(self, date):
"""Delete duplicate menus for an exact `date`.
Will delete all but the most recently created menus for each dining hall
"""
# Find groups of duplicate menus
duplicate_groups = (
DiningMenu.objects.values("venue", "date", "start_time", "end_time", "service")
.annotate(menu_count=Count("id"), keep_id=Max("id"))
.filter(menu_count__gt=1, date=date)
)
# Find all ids to delete
ids_to_delete = []
for group in duplicate_groups:
ids = (
DiningMenu.objects.filter(
venue=group["venue"],
date=group["date"],
start_time=group["start_time"],
end_time=group["end_time"],
service=group["service"],
)
.exclude(id=group["keep_id"])
.values_list("id", flat=True)
)
ids_to_delete.extend(ids)
# Delete all duplicates
deleted_count, _ = DiningMenu.objects.filter(id__in=ids_to_delete).delete()
return deleted_count