forked from alltheplaces/alltheplaces
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathelfsight.py
More file actions
55 lines (46 loc) · 2.11 KB
/
elfsight.py
File metadata and controls
55 lines (46 loc) · 2.11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
from typing import Iterable
from urllib.parse import unquote
import chompjs
from scrapy.http import JsonRequest, Request, Response
from locations.json_blob_spider import JSONBlobSpider
class ElfsightSpider(JSONBlobSpider):
"""
An Elfsight spider will be one of:
- https://shy.elfsight.com/p/boot/?callback=a&shop=(shop)&w=(api_key)
- https://core.service.elfsight.com/p/boot/?w=(api_key)
- Or embedded in data-elfsight-google-maps-options
"""
# detection_rules = [
# DetectionRequestRule(
# url=r"^https?:\/\/shy\.elfsight\.com/p/boot/?callback=.*&shop=(?P<shop>.*)&w=(?P<api_key>[\w-]+)$",
# ),
# DetectionRequestRule(
# url=r"^https?:\/\/core\.service\.elfsight\.com/p/boot/?w=(?P<api_key>[\w-]+)$",
# ),
# }
host = None
shop: str | None = None
api_key: str = ""
def start_requests(self) -> Iterable[JsonRequest | Request]:
if self.host == "core.service.elfsight.com":
yield JsonRequest(f"https://{self.host}/p/boot/?w={self.api_key}")
elif self.host == "shy.elfsight.com":
yield JsonRequest(f"https://{self.host}/p/boot/?callback=a&shop={self.shop}&w={self.api_key}")
else:
for url in self.start_urls:
yield Request(url)
def extract_json(self, response: Response) -> list:
if self.host == "core.service.elfsight.com" or self.host == "shy.elfsight.com":
data = chompjs.parse_js_object(response.text)
return data["data"]["widgets"][self.api_key]["data"]["settings"]["markers"]
else:
return chompjs.parse_js_object(unquote(response.xpath("//@data-elfsight-google-maps-options").get()))[
"markers"
]
def pre_process_data(self, location: dict):
if "infoTitle" in location:
location["name"] = location.pop("infoTitle")
location["addr"] = location.pop("infoAddress")
location["phone"] = location.pop("infoPhone")
location["email"] = location.pop("infoEmail")
location["lat"], location["lon"] = location.get("coordinates").split(", ", 1)