forked from alltheplaces/alltheplaces
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathlocation_bank.py
More file actions
84 lines (66 loc) · 3.62 KB
/
location_bank.py
File metadata and controls
84 lines (66 loc) · 3.62 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import re
from typing import Iterable
from scrapy import Spider
from scrapy.http import JsonRequest, Response
from locations.dict_parser import DictParser
from locations.hours import OpeningHours
from locations.items import Feature
from locations.pipelines.address_clean_up import clean_address
# To use, specify the client ID for the brand in the format of
# StoreLocatorAPI?clientId={client_id}
# You may then need to override the parse_item function to
# adjust extracted field values. In particular, services in
# location["slAttributes"] may be of interest.
class LocationBankSpider(Spider):
allowed_domains = ["api.locationbank.net"]
client_id = None
include_images = False
def start_requests(self):
yield JsonRequest(url=f"https://api.locationbank.net/storelocator/StoreLocatorAPI?clientId={self.client_id}")
def parse(self, response):
data = response.json()
if data["detailViewUrl"] is not None:
# It looks like it is possibble to have a different key, but it does not appear to be used
detail_view_key = re.search("{(.+)}", data["detailViewUrl"]).group(1)
if detail_view_key == "locationid":
detail_view_key = "id"
for location in data["locations"]:
self.pre_process_data(location)
location["phone"] = location.pop("primaryPhone")
if location["additionalPhone1"]:
location["phone"] += "; " + location.pop("additionalPhone1")
location["state"] = location.pop("administrativeArea")
if data["detailViewUrl"] is not None:
location["website"] = re.sub(r"\{.+\}", location[detail_view_key], data["detailViewUrl"])
location["street_address"] = clean_address([location.pop("addressLine1"), location.get("addressLine2")])
item = DictParser.parse(location)
item["branch"] = item.pop("name").replace(self.item_attributes["brand"], "").strip()
item["addr_full"] = clean_address(
[
location.get("street_address"),
location.get("subLocality"),
location.get("locality"),
location.get("state"),
location.get("postalCode"),
]
)
item["opening_hours"] = OpeningHours()
for day in location["regularHours"]:
if day["isOpen"]:
item["opening_hours"].add_range(day["openDay"], day["openTime"], day["closeTime"])
else:
item["opening_hours"].set_closed(day["openDay"])
if self.include_images:
image_root = "https://api.locationbank.net/storelocator/StoreLocatorAPI/locationImage"
item["image"] = (
f"{image_root}?clientId={self.client_id}&LocationID={location['id']}&MediaCat={data['imagesCategory']}&Rule={data['imagesCategorySelectOnRule']}"
)
# There are also individual store pages that may have more detail, but nothing of interest has been seen yet,
# so that is being left unimplemented for now:
# f"https://api.locationbank.net/storelocator/StoreLocatorAPI/locationDetails?LocationID={location['id']}&ClientID={self.client_id}"
yield from self.post_process_item(item, response, location)
def pre_process_data(self, feature: dict) -> None:
"""Override with any pre-processing on the data"""
def post_process_item(self, item: Feature, response: Response, feature: dict) -> Iterable[Feature]:
"""Override with any post process on the item"""
yield item