-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathGoogleFinanceScreenerCrawler.py
143 lines (106 loc) · 4.41 KB
/
GoogleFinanceScreenerCrawler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
#!/usr/bin/python2.5.2
#-*- coding: utf8 -*-
import time
import codecs
from SeleniumUtil import SeleniumUtil
from FinanceAutomationResult import FinanceAutomationResult
from CrawlSettings import CrawlSettings
PriceMap = {
# put 万亿 before 亿/万
r"万亿" : 1000000000000,
r"亿" : 100000000,
r"万" : 10000,
r"B" : 1000000000,
r"M" : 1000000,
r"K" : 10000
}
site = "Google"
debug=False
class GoogleFinanceScreenerCrawler:
# get the company list when only add one criteria
def GetResults(self, criteria, exchange, results):
if site not in results:
results[site] = {}
each_page_number = 20
endNo = 200
roolUrl = "/finance/stockscreener"
#roolUrl = "http://finance.google.com/finance/stockscreener"
paramUrl = ("#c0=%s®ion=us&exchange=%s" % #"?hl=zh-CN&gl=cn#c0=%s®ion=cn&exchange=%s" % #
(criteria, exchange))
Url=roolUrl+paramUrl
print "opening: ", roolUrl+paramUrl
if not debug:
sln = SeleniumUtil().GetSelenium("Google", "http://0.frontend-yiling_stockscreener-sfetest.sfe.scrooge.hs.borg.google.com/")#"http://0.frontend-canaryccn.sfe.scrooge.ug.borg.google.com:26103/")
sln.open(Url)
time.sleep(10)
page_index = 1
googleResults = {}
#print "opening: 2"
for i in range(0, endNo):
index = i % each_page_number
tickerPath = ("//table[@class='results innermargin']/tbody/tr[%d]/td[2]/a"
% (index + 2))
valuePath = ("//table[@class='results innermargin']/tbody/tr[%d]/td[3]"
% (index + 2))
if not debug and sln.is_element_present(tickerPath):
ticker=sln.get_text(tickerPath).strip().encode("utf-8")
#print ticker
#StockList=codecs.open(("Stock%s.txt",'a','utf-8') %criteria)
#print >>("Stock%s.txt" %criteria),ticker
value = sln.get_text(valuePath).strip().encode("utf-8")
for key, scale in PriceMap.items():
if value.endswith(key):
value = float(value.replace(key, r"")) * float(scale)
print " parse value: %s" % value
break;
googleResults[sln.get_text(tickerPath).strip()] = value
else:
googleResults["601398"] = "NA"
break
if index == each_page_number - 1:
print "Got %s page %s companies." % (page_index, each_page_number)
page_index=page_index+1
if not debug:
try:
sln.click("//span[contains(text(),'Next')]")
time.sleep(2)
except:
break
if not debug:
sln.stop()
#print googleResults
results[site][criteria] = googleResults
return googleResults
def GetAllResults(self, criteriaList, results):
for criteria in criteriaList:
#GoogleFinanceScreenerCrawler().GetResults(criteria, "us", results)
GoogleFinanceScreenerCrawler().GetResults(criteria, "NASDAQ", results)
#GoogleFinanceScreenerCrawler().GetResults(criteria, "AMEX", results)
#GoogleFinanceScreenerCrawler().GetResults(criteria, "NYSE", results)
#GoogleFinanceScreenerCrawler().GetResults(criteria, "ALL_CN_A", results)
#GoogleFinanceScreenerCrawler().GetResults(criteria, "SHA_A", results)
#GoogleFinanceScreenerCrawler().GetResults(criteria, "SHA_B", results)
#GoogleFinanceScreenerCrawler().GetResults(criteria, "SHE_A", results)
#GoogleFinanceScreenerCrawler().GetResults(criteria, "SHE_B", results)
#criteriaList = ["MarketCap"]
#criteria=[]
#for criteria in criteriaList:
# print criteria
#GoogleFinanceScreenerCrawler().GetAllResults(criteriaList, results)
#print results
#print result
#allTickersList=[]
#for site,value in results.items():
# for ticker,values in value.items():
# allTickersList.append(ticker)
#print allTickersList
#print result
crawlProfile = CrawlSettings().GetCrawlProfile("test")
criteriaList=crawlProfile["Criteria"]
tickerFile = crawlProfile["TickerListFile"]
allTickersList= CrawlSettings().LoadTickerList(tickerFile)
resultFiles = crawlProfile["ResultFiles"]
#GoogleFinanceScreenerCrawler().GetAllResults(criteriaList, results)
#allTickersList=["600456","000690"]
#FinanceAutomationResult().SaveAsCriteriaResults(result, ["Google"], criteriaList,allTickersList,resultFiles)
#FinanceAutomationResult().SaveAsSiteResults(result, ["Google"], criteriaList, allTickersList, resultFiles)