-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathreturnsCalc.py
More file actions
107 lines (76 loc) · 4.55 KB
/
returnsCalc.py
File metadata and controls
107 lines (76 loc) · 4.55 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import pandas as pd
import numpy as np
from collections import Counter
from sklearn import svm, neighbors
from sklearn.cross_validation import train_test_split
from sklearn.ensemble import VotingClassifier, RandomForestClassifier
import argparse
"""
the goal of this script is to predict if we should buy, sell, or hold a particular stock based off of the changes in value of every single other stock in the S&P 500
DISCLAIMER: With this simple model we are going to get at best 40-50 percent accuracy in terms of buy/sell signals
"""
parser = argparse.ArgumentParser()
parser.add_argument("ticker", help = " please insert the ticker of any stock in the S&P500", type = str)
args = parser.parse_args()
ticker = args.ticker
def return_calc(ticker):
df = pd.DataFrame.from_csv("master.csv")
tickers = df.columns.values
df.fillna(0, inplace = True)
return_Period = 7 # any number you want
for num in range(1,(return_Period+1) ):
df[str(ticker) + "_" + str(num) ] = ((df[ticker].shift(-num) - df[ticker])/df[ticker])
df.fillna(0, inplace = True)
return tickers, df
def buy_sell_hold(*args): # where args is going to be the group of 7 columns that each represent the change in price over the course of the week
cols = []
for column in args:
cols.append(column)
requirement = 0.02 # the minimum percent change we need to start caring
for c in cols: # for every single row in those columns (every single date )
if c > requirement:
return 1
if c < -requirement:
return -1
return 0
def extract_featuresets(ticker):
tickers, df = return_calc(ticker)
df[str(ticker) + "_target"] = list(map(buy_sell_hold, df[str(ticker) + "_1"],
df[str(ticker) + "_2"],
df[str(ticker) + "_3"],
df[str(ticker) + "_4"],
df[str(ticker) + "_5"],
df[str(ticker) + "_6"],
df[str(ticker) + "_7"]
))
vals = df[str(ticker)+ "_target"].values.tolist() # list of the buy sell results in 1,0,-1
str_vals = [str(i) for i in vals]
df.fillna(0, inplace=True)
df = df.replace([np.inf, -np.inf], np.nan)
df.dropna(inplace=True)
df_vals = df[[tick for tick in tickers]].pct_change()
df_vals = df_vals.replace([np.inf, -np.inf], 0)
df_vals.fillna(0, inplace=True)
X = df_vals.values
y = df[str(ticker) + "_target"].values # the buy or sell signal column for every stock
return X,y,df # where df is the full dataframe of prices for every stock for every day plus those additional 7 columns for the ticker in question
# where X is a list of sublists where each sublist is the percentage change of every single stock for that day
# where y is the list of buy, sell, or hold commands for the ticker in question based off of those last 7 cloumns
def ml(ticker):
X,y,df = extract_featuresets(ticker)
X_train, X_test , y_train, y_test = train_test_split(X, y, test_size = .25)
clf = neighbors.KNeighborsClassifier()
clf.fit(X_train, y_train)
# we are going to use scikit's ensemble learning package called Voting Cassifier to make predictions with three different models and then use them all to "vote" on the final result
clf_ensemble = VotingClassifier([( "lsvc", svm.LinearSVC()), # the reason linear svc would make sesnse is that each of our x values is actually a list of values where each value is the price of a stock for that day, and the whole sublist is for every single stock
("knn", neighbors.KNeighborsClassifier()),
("rfor", RandomForestClassifier())])
# AFTER YOU FIT THE CLASSIFIER, YOU CAN PICKLE IT THEN WHEN YOU WANT TO PREDICT WITH IT, JUST DO PICKLE LOAD AND THERES YOUR ANSWER. THIS WAY YOU DONT HAVE TO CONTINUALLY TRAIN THE MODEL
clf_ensemble.fit(X_train, y_train)
confidence = clf_ensemble.score(X_test, y_test) # where the confidence measures the accuracy of this model
predictions = clf_ensemble.predict(X_test)
print ("predicted spread", Counter(predictions))
print "Where 1 : Buy, -1 : Sell, O : Hold"
print ("This model is " + str(confidence * 100)+ "%" + " accurate" )
return confidence
ml(ticker) # enter whatever stock ticker in the s&p500 that you want