-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathscrapeAmazon.py
More file actions
50 lines (36 loc) · 1.67 KB
/
scrapeAmazon.py
File metadata and controls
50 lines (36 loc) · 1.67 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
from bs4 import BeautifulSoup
import requests
import re
class scrapeAmazon:
def __init__(self, link):
self.pattern = "((https?://)?(w{3})?\.[a-zA-Z]+\.[a-zA-z]+/[a-zA-Z0-9-]+/dp/[a-zA-Z0-9]+)/.+"
self.shortenURL = r"\1"
self.mainURL = re.sub(self.pattern, self.shortenURL, link)
if not re.search("https?://.+", self.mainURL):
self.mainURL = "https://" + self.mainURL
#print(self.mainURL)
self.headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36'}
self.url = self.mainURL
self.page = requests.get(self.url, headers=self.headers)
def getPrice(self):
soup = BeautifulSoup(self.page.content, "lxml")
if(str(soup.find(id="priceblock_ourprice")) == "None"):
data = soup.find(id="priceblock_dealprice").getText()
else:
data = soup.find(id="priceblock_ourprice").getText()
price = ''.join(i for i in data if i.isdigit())
price = int(price)
price /= 100
print(price)
return price
def getProductName(self):
soup = BeautifulSoup(self.page.content, "lxml")
name = soup.find(id="productTitle").getText()
name = name.strip()
print(name)
return name
# - Object creation example - #
scrape = scrapeAmazon("https://www.amazon.in/dp/B07XK8PLQT/ref=s9_acsd_al_bw_c2_x_3_i?pf_rd_m=A1K21FY43GMZF8&pf_rd_s=merchandised-search-2&pf_rd_r=HJX6BXAXE57AK7QY885W&pf_rd_t=101&pf_rd_p=d96f6e5b-f0ca-4db6-8252-4b7026d037bf&pf_rd_i=21563464031")
scrape.getProductName()
scrape.getPrice()
# - Object creation example - #