-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrelicSpreadsheet.py
75 lines (55 loc) · 2.13 KB
/
relicSpreadsheet.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import pandas as pd
import requests
from bs4 import BeautifulSoup
from lxml import html
from openpyxl import load_workbook
def fetch_and_parse(url, xpath_expression):
# Fetch the HTML content
response = requests.get(url)
html_content = response.content
# Parse the HTML content
tree = html.fromstring(html_content)
# Find elements using XPath
elements = tree.xpath(xpath_expression)
return [element.text_content() for element in elements]
def webScrape(excelList, baseurl, ws):
excount = 2
for cell in excelList:
if type(cell[0]) == str:
url = baseurl + cell[0] + "_" + cell[1]
response = requests.get(url)
html_content = response.content
tree = html.fromstring(html_content)
print(excount)
for i in range(6):
xpath = ('//*[@id="72656C6963table"]/tbody/tr[' + str(2+i) + ']/td[1]/a[2]')
texts = fetch_and_parse(url, xpath)
for text in texts:
text = text
if i == 0:
ws['D' + str(excount)] = text
elif i == 1:
ws['F' + str(excount)] = text
elif i == 2:
ws['H' + str(excount)] = text
elif i == 3:
ws['J' + str(excount)] = text
elif i == 4:
ws['L' + str(excount)] = text
elif i == 5:
ws['M' + str(excount)] = text
excount+=1
def main():
wb = load_workbook('warframeRelics.xlsx')
ws = wb.active
exFile = 'warframeRelics.xlsx'
baseurl = 'https://warframe.fandom.com/wiki/'
#Pandas collects data as a DataFrame
df_columns = pd.read_excel(exFile, usecols='A:B')
#.tolist() converts DataFrame to list of lists
excelList = df_columns.values.tolist()
webScrape(excelList, baseurl, ws)
wb.save('warframeRelics.xlsx')
print("saved")
if __name__=="__main__":
main()