-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathintradata.py
More file actions
68 lines (57 loc) · 2.16 KB
/
intradata.py
File metadata and controls
68 lines (57 loc) · 2.16 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import time
import datetime
import pandas
import requests
import csv
import io
import pytz
PROTOCOL = 'http://'
BASE_URL = 'www.google.com/finance/getprices'
def get_google_data(symbol, interval=60, lookback=1, end_time=time.time()):
"""
Get intraday data for the symbol from google finance and
return a pandas DataFrame
:param symbol (str)
:param interval (int)
:param lookback (int)
:param end_time (unix timestamp)
:returns pandas.DataFrame
"""
resource_url = PROTOCOL + BASE_URL
payload = {
'q': symbol,
'i': str(interval),
'p': str(lookback) + 'd',
'ts': str(int(end_time * 1000)),
'f': 'd,o,h,l,c,v'
}
r = requests.get(resource_url, params=payload)
quotes = []
with io.BytesIO(r.content) as csvfile:
quote_reader = csv.reader(csvfile)
timestamp_start = None
timestamp_offset = None
timezone_offset = 0
for row in quote_reader:
if row[0][:16] == 'TIMEZONE_OFFSET=':
timezone_offset = -1 * int(row[0][16:])
elif row[0][0] not in 'a1234567890': # discard headers
continue
elif row[0][0] == 'a': # 'a' prepended to the timestamp that starts each day
timestamp_start = pytz.utc.localize(datetime.datetime.fromtimestamp(float(row[0][1:]))
+ datetime.timedelta(minutes=timezone_offset))
timestamp_offset = 0
elif timestamp_start:
timestamp_offset = int(row[0])
if not timestamp_start and not timestamp_offset:
continue
timestamp = timestamp_start + datetime.timedelta(seconds=timestamp_offset * interval)
closing_price = float(row[1])
high_price = float(row[2])
low_price = float(row[3])
open_price = float(row[4])
volume = float(row[5])
quotes.append((timestamp, closing_price, high_price, low_price, open_price, volume))
df = pandas.DataFrame(quotes, columns=['datetime', 'close', 'high', 'low', 'open', 'volume'])
df = df.set_index('datetime')
return df