forked from alltheplaces/alltheplaces
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathstoreify.py
More file actions
40 lines (27 loc) · 2.02 KB
/
storeify.py
File metadata and controls
40 lines (27 loc) · 2.02 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import chompjs
from scrapy import Request
from locations.dict_parser import DictParser
from locations.json_blob_spider import JSONBlobSpider
class StoreifySpider(JSONBlobSpider):
"""
Storeify is a shopify related storefinder.
Detectable via `https://sl.storeify.app/js/stores/{api_key}/storeifyapps-storelocator-geojson.js`
To use, specify `api_key` and `domain`
"""
api_key = None
domain = None
# TODO: Autodetection
def start_requests(self):
yield Request(url=f"https://sl.storeify.app/js/stores/{self.api_key}/storeifyapps-storelocator-geojson.js")
# API returns a geojson feature collection
def extract_json(self, response):
return chompjs.parse_js_object(response.text)["features"]
def parse_feature_array(self, response, feature_array):
for feature in feature_array:
self.pre_process_data(feature)
item = DictParser.parse(feature["properties"])
item["image"] = feature["properties"]["thumbnail"]
item["website"] = self.domain + item["website"]
# TODO: Parse hours
# "schedule": "<div class=\"title-store-info\">{{ store_operation }}</div><div class=\"content-store-info\"><table class=\"work-time table\"><tr class=\"row-mon\"><th class=\"dayname\">{{ mon }}</th><td>09:00 {{ am }} - 06:00 {{ pm }}</td></tr><tr class=\"row-tue\"><th class=\"dayname\">{{ tue }}</th><td>09:00 {{ am }} - 06:00 {{ pm }}</td></tr><tr class=\"row-wed\"><th class=\"dayname\">{{ wed }}</th><td>09:00 {{ am }} - 06:00 {{ pm }}</td></tr><tr class=\"row-thu\"><th class=\"dayname\">{{ thu }}</th><td>09:00 {{ am }} - 06:00 {{ pm }}</td></tr><tr class=\"row-fri\"><th class=\"dayname\">{{ fri }}</th><td>09:00 {{ am }} - 06:00 {{ pm }}</td></tr><tr class=\"row-sat\"><th class=\"dayname\">{{ sat }}</th><td>09:00 {{ am }} - 03:00 {{ pm }}</td></tr><tr class=\"row-sun\"><th class=\"dayname\">{{ sun }}</th><td>{{ closed }}</td></tr></table></div>",
yield from self.post_process_item(item, response, feature) or []