intradata/intradata.py at master · maxvitek/intradata · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import time
import datetime
import pandas
import requests
import csv
import io
import pytz

PROTOCOL = 'http://'
BASE_URL = 'www.google.com/finance/getprices'


def get_google_data(symbol, interval=60, lookback=1, end_time=time.time()):
    """
    Get intraday data for the symbol from google finance and
    return a pandas DataFrame
    :param symbol (str)
    :param interval (int)
    :param lookback (int)
    :param end_time (unix timestamp)
    :returns pandas.DataFrame
    """
    resource_url = PROTOCOL + BASE_URL
    payload = {
        'q': symbol,
        'i': str(interval),
        'p': str(lookback) + 'd',
        'ts': str(int(end_time * 1000)),
        'f': 'd,o,h,l,c,v'
    }

    r = requests.get(resource_url, params=payload)

    quotes = []

    with io.BytesIO(r.content) as csvfile:
        quote_reader = csv.reader(csvfile)
        timestamp_start = None
        timestamp_offset = None
        timezone_offset = 0
        for row in quote_reader:
            if row[0][:16] == 'TIMEZONE_OFFSET=':
                timezone_offset = -1 * int(row[0][16:])
            elif row[0][0] not in 'a1234567890':  # discard headers
                continue
            elif row[0][0] == 'a':  # 'a' prepended to the timestamp that starts each day
                timestamp_start = pytz.utc.localize(datetime.datetime.fromtimestamp(float(row[0][1:]))
                                                    + datetime.timedelta(minutes=timezone_offset))
                timestamp_offset = 0
            elif timestamp_start:
                timestamp_offset = int(row[0])

            if not timestamp_start and not timestamp_offset:
                continue

            timestamp = timestamp_start + datetime.timedelta(seconds=timestamp_offset * interval)
            closing_price = float(row[1])
            high_price = float(row[2])
            low_price = float(row[3])
            open_price = float(row[4])
            volume = float(row[5])

            quotes.append((timestamp, closing_price, high_price, low_price, open_price, volume))

    df = pandas.DataFrame(quotes, columns=['datetime', 'close', 'high', 'low', 'open', 'volume'])
    df = df.set_index('datetime')

    return df