forked from alltheplaces/alltheplaces
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathyext_search.py
More file actions
106 lines (85 loc) · 4.27 KB
/
yext_search.py
File metadata and controls
106 lines (85 loc) · 4.27 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
from typing import Any, Iterable
from scrapy import Request, Spider
from scrapy.http import JsonRequest, Response
from locations.dict_parser import DictParser
from locations.hours import OpeningHours
from locations.items import Feature
from locations.pipelines.address_clean_up import merge_address_lines
from locations.storefinders.yext_answers import YextAnswersSpider
class YextSearchSpider(Spider):
dataset_attributes = {"source": "api", "api": "yext"}
custom_settings = {"ROBOTSTXT_OBEY": False}
host: str = "https://locator.chick-fil-a.com.yext-cdn.com"
page_size: int = 50
def make_request(self, offset: int) -> JsonRequest:
return JsonRequest("{}/search?r=250000&per={}&offset={}".format(self.host, self.page_size, offset))
def start_requests(self) -> Iterable[Request]:
yield self.make_request(0)
def parse(self, response: Response, **kwargs: Any) -> Any:
for location in response.json()["response"]["entities"]:
item = Feature()
profile = location["profile"]
item = DictParser.parse(profile)
item["ref"] = profile["meta"]["uid"]
item["branch"] = profile.get("geomodifier")
item["street_address"] = merge_address_lines(
[
profile["address"]["line1"],
profile["address"]["line2"],
profile["address"]["line3"],
]
)
if profile.get("websiteUrl") is not None and "?" in profile.get("websiteUrl"):
item["website"] = profile.get("websiteUrl", "").split("?", 1)[0]
else:
item["website"] = profile.get("websiteUrl")
if menu_url := profile.get("menuUrl"):
item["extras"]["website:menu"] = menu_url
if order_url := profile.get("orderUrl"):
item["extras"]["website:orders"] = order_url.split("?")[0]
phones = []
for phone_type in ["localPhone", "mainPhone", "mobilePhone", "alternatePhone"]:
phone = profile.get(phone_type)
if phone:
phones.append(phone.get("number"))
if len(phones) > 0:
item["phone"] = "; ".join(phones)
emails = profile.get("emails")
if emails:
item["email"] = "; ".join(emails)
if facebook_vanity := profile.get("facebookVanityUrl"):
if not facebook_vanity.startswith("http"):
item["facebook"] = "https://www.facebook.com/" + facebook_vanity
else:
item["facebook"] = facebook_vanity
elif facebook_profile := profile.get("facebookPageUrl"):
item["facebook"] = facebook_profile
if profile.get("googlePlaceId"):
item["extras"]["ref:google"] = profile.get("googlePlaceId")
YextAnswersSpider.parse_payment_methods(self, profile, item)
item["opening_hours"] = self.parse_opening_hours(profile.get("hours"))
if oh := self.parse_opening_hours(profile.get("deliveryHours")):
item["extras"]["opening_hours:delivery"] = oh.as_opening_hours()
yield from self.parse_item(location, item) or []
yield from self.request_next_page(response, **kwargs)
def request_next_page(self, response: Response, **kwargs: Any) -> Any:
pager = response.json()["queryParams"]
offset = int(pager["offset"][0])
page_size = int(pager["per"][0])
if offset + page_size < response.json()["response"]["count"]:
yield self.make_request(offset + page_size)
def parse_opening_hours(self, hours: dict, **kwargs: Any) -> OpeningHours | None:
oh = OpeningHours()
if not hours:
return None
normal_hours = hours.get("normalHours")
if not normal_hours:
return None
for day in normal_hours:
if day.get("isClosed"):
oh.set_closed(day["day"].title())
for interval in day.get("intervals", []):
oh.add_range(day["day"].title(), str(interval["start"]).zfill(4), str(interval["end"]).zfill(4), "%H%M")
return oh
def parse_item(self, location: dict, item: Feature) -> Iterable[Feature]:
yield item