-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathjustNaturalsTopLayer.py
71 lines (58 loc) · 2.59 KB
/
justNaturalsTopLayer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Jan 1 18:46:55 2021
@author: crystalhansen
"""
from lxml import html
import requests
from bs4 import BeautifulSoup
from datetime import datetime, timedelta
map=[]
url = 'http://www.justnaturalskincare.com/hair-gray/all-gray-hair.html'
response= requests.get(url)
jNSoup= BeautifulSoup(response.text,'lxml')
print (jNSoup.title.string)
# NOT WORKING FILE
for div in jNSoup.find_all('div', id="e8"):
#print(div)
for table_row_cell in jNSoup.select("table tr td "):
# print("table row")
for table_row_cell2 in table_row_cell.select('table tr td'):
#print("tablerow2")
for table_row_cell3 in table_row_cell2.select('table tr td'):
#print("table3")
for table_row_cell4 in table_row_cell3.select('table tr td'):
#print("table4")
for table5 in table_row_cell.select('table'):
#print(table5)
count=0
for row in table5.select('tr td'):
count+=1
#print(row.text.strip())
if(count == 5):
for href in row.select('a'):
link = href['href']
category = href['href'].split("/")
#print(category)
if(len(category) == 1):
### bind to a hashtable and compare if alread in table then omit fro calling request
# python check if key in dict using "in"
if link in map:
print(f"Yes, key: '{link}' exists in dictionary")
else:
print(f"No, key: '{link}' does not exists in dictionary")
map.append(href['href'])
#print(href['href'])
# url2 = 'http://www.justnaturalskincare.com'
# builtUrl = url2 + "/" +href['href']
# print(builtUrl)
# for tables in div.find_all('table'):
#
# for tablebody in tables.find_all('tbody'):
# for tr in tablebody.find_all('tr'):
# for td in tr.find_all('td'):
# print(td)
# for anode in div.find_all('a'):
# print(anode['href'])
#category = anode['href'].split("/")