forked from alltheplaces/alltheplaces
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathaheadworks.py
More file actions
57 lines (44 loc) · 2.2 KB
/
aheadworks.py
File metadata and controls
57 lines (44 loc) · 2.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
from json import loads
from typing import Iterable
from chompjs import parse_js_object
from scrapy import Spider
from scrapy.http import Response
from locations.dict_parser import DictParser
from locations.hours import DAYS_FULL, OpeningHours
from locations.items import Feature
class AheadworksSpider(Spider):
"""
Documentation available at:
1. https://aheadworks.com/store-locator-extension-for-magento-1
2. https://aheadworks.com/store-locator-extension-for-magento-2
To use this spider, supply a 'start_url' for the store finder page that
contains embedded JavaScript with a complete store list. The 'post_process_item'
method can be overridden if changes to extracted data is necessary, for
example, to clean up location names.
"""
def parse(self, response: Response) -> Iterable[Feature]:
features_js = response.xpath(
'//script[contains(text(), "Aheadworks_StoreLocator/js/view/location-list") and contains(text(), "locationRawItems")]/text()'
).get()
features = parse_js_object(features_js)["#aw-storelocator-navigation"]["Magento_Ui/js/core/app"]["components"][
"locationList"
]["locationRawItems"]
for tab in features:
feature = tab["tabs"][0]
self.pre_process_data(feature)
if feature.get("coming_soon") == "1":
continue
item = DictParser.parse(feature)
item["website"] = self.start_urls[0] + feature["slug"]
item["street_address"] = item.pop("street")
item["opening_hours"] = OpeningHours()
if hours_dict := loads(feature["hoursofoperation"])["hoursofoperation"]:
for day, hours in hours_dict.items():
if day in DAYS_FULL:
item["opening_hours"].add_range(day, hours[0], hours[1])
yield from self.post_process_item(item, response, feature) or []
def pre_process_data(self, feature: dict) -> None:
"""Override with any pre-processing on the item."""
def post_process_item(self, item: Feature, response: Response, feature: dict) -> Iterable[Feature]:
"""Override with any post-processing on the item."""
yield item