-
Notifications
You must be signed in to change notification settings - Fork 90
/
Copy pathenvironment.py
162 lines (112 loc) · 5.05 KB
/
environment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
import math
import gym
from gym import spaces, logger
from gym.utils import seeding
import numpy as np
from gym.envs.registration import register
class TradeEnv():
"""
This class is the trading environment (render) of our project.
The trading agent calls the class by giving an action at the time t.
Then the render gives back the new portfolio at the next step (time t+1).
#parameters:
- windonw_length: this is the number of time slots looked in the past to build the input tensor
- portfolio_value: this is the initial value of the portfolio
- trading_cost: this is the cost (in % of the traded stocks) the agent will pay to execute the action
- interest_rate: this is the rate of interest (in % of the money the agent has) the agent will:
-get at each step if he has a positive amount of money
-pay if he has a negative amount of money
-train_size: % of data taken for the training of the agent - please note the training data are taken with respect
of the time span (train -> | time T | -> test)
"""
def __init__(self, path = './np_data/input.npy', window_length=50,
portfolio_value= 10000, trading_cost= 0.25/100,interest_rate= 0.02/250, train_size = 0.7):
#path to numpy data
self.path = path
#load the whole data
self.data = np.load(self.path)
#parameters
self.portfolio_value = portfolio_value
self.window_length=window_length
self.trading_cost = trading_cost
self.interest_rate = interest_rate
#number of stocks and features
self.nb_stocks = self.data.shape[1]
self.nb_features = self.data.shape[0]
self.end_train = int((self.data.shape[2]-self.window_length)*train_size)
#init state and index
self.index = None
self.state = None
self.done = False
#init seed
self.seed()
def return_pf(self):
"""
return the value of the portfolio
"""
return self.portfolio_value
def readTensor(self,X,t):
## this is not the tensor of equation 18
## need to batch normalize if you want this one
return X[ : , :, t-self.window_length:t ]
def readUpdate(self, t):
#return the return of each stock for the day t
return np.array([1+self.interest_rate]+self.data[-1,:,t].tolist())
def seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
return [seed]
def reset(self, w_init, p_init, t=0 ):
"""
This function restarts the environment with given initial weights and given value of portfolio
"""
self.state= (self.readTensor(self.data, self.window_length) , w_init , p_init )
self.index = self.window_length + t
self.done = False
return self.state, self.done
def step(self, action):
"""
This function is the main part of the render.
At each step t, the trading agent gives as input the action he wants to do. So, he gives the new value of the weights of the portfolio.
The function computes the new value of the portfolio at the step (t+1), it returns also the reward associated with the action the agent took.
The reward is defined as the evolution of the the value of the portfolio in %.
"""
index = self.index
#get Xt from data:
data = self.readTensor(self.data, index)
done = self.done
#beginning of the day
state = self.state
w_previous = state[1]
pf_previous = state[2]
#the update vector is the vector of the opening price of the day divided by the opening price of the previous day
update_vector = self.readUpdate(index)
#allocation choice
w_alloc = action
pf_alloc = pf_previous
#Compute transaction cost
cost = pf_alloc * np.linalg.norm((w_alloc-w_previous),ord = 1)* self.trading_cost
#convert weight vector into value vector
v_alloc = pf_alloc*w_alloc
#pay transaction costs
pf_trans = pf_alloc - cost
v_trans = v_alloc - np.array([cost]+ [0]*self.nb_stocks)
#####market prices evolution
#we go to the end of the day
#compute new value vector
v_evol = v_trans*update_vector
#compute new portfolio value
pf_evol = np.sum(v_evol)
#compute weight vector
w_evol = v_evol/pf_evol
#compute instanteanous reward
reward = (pf_evol-pf_previous)/pf_previous
#update index
index = index+1
#compute state
state = (self.readTensor(self.data, index), w_evol, pf_evol)
if index >= self.end_train:
done = True
self.state = state
self.index = index
self.done = done
return state, reward, done