Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions config.json
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
{
"data": {
"filename": "sp500.csv",
"filename": "data.csv",
"columns": [
"Close",
"Volume"
"Volume From"
],
"sequence_length": 50,
"train_test_split": 0.85,
Expand All @@ -21,7 +21,7 @@
{
"type": "lstm",
"neurons": 100,
"input_timesteps": 49,
"input_timesteps": 50,
"input_dim": 2,
"return_seq": true
},
Expand Down
32 changes: 18 additions & 14 deletions core/data_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ class DataLoader():
def __init__(self, filename, split, cols):
dataframe = pd.read_csv(filename)
i_split = int(len(dataframe) * split)
print(dataframe.head(), type(dataframe), cols)
self.data_train = dataframe.get(cols).values[:i_split]
self.data_test = dataframe.get(cols).values[i_split:]
self.len_train = len(self.data_train)
Expand All @@ -20,16 +21,13 @@ def get_test_data(self, seq_len, normalise):
Warning: batch method, not generative, make sure you have enough memory to
load data, otherwise reduce size of the training split.
'''
data_windows = []
data_x = []
data_y = []
for i in range(self.len_test - seq_len):
data_windows.append(self.data_test[i:i+seq_len])

data_windows = np.array(data_windows).astype(float)
data_windows = self.normalise_windows(data_windows, single_window=False) if normalise else data_windows

x = data_windows[:, :-1]
y = data_windows[:, -1, [0]]
return x,y
x, y = self._next_window(i, seq_len, normalise, train=False)
data_x.append(x)
data_y.append(y)
return np.array(data_x), np.array(data_y)

def get_train_data(self, seq_len, normalise):
'''
Expand All @@ -40,7 +38,7 @@ def get_train_data(self, seq_len, normalise):
data_x = []
data_y = []
for i in range(self.len_train - seq_len):
x, y = self._next_window(i, seq_len, normalise)
x, y = self._next_window(i, seq_len, normalise, train=True)
data_x.append(x)
data_y.append(y)
return np.array(data_x), np.array(data_y)
Expand All @@ -62,23 +60,29 @@ def generate_train_batch(self, seq_len, batch_size, normalise):
i += 1
yield np.array(x_batch), np.array(y_batch)

def _next_window(self, i, seq_len, normalise):
def _next_window(self, i, seq_len, normalise, train=True):
'''Generates the next data window from the given index location i'''
window = self.data_train[i:i+seq_len]

if train:
window = self.data_train[i:i+seq_len+1]
else:
window = self.data_test[i:i+seq_len+1]
window = self.normalise_windows(window, single_window=True)[0] if normalise else window
x = window[:-1]
y = window[-1, [0]]
return x, y

def normalise_windows(self, window_data, single_window=False):
'''Normalise window with a base value of zero'''
eps = 0.00001
normalised_data = []
window_data = [window_data] if single_window else window_data
for window in window_data:
normalised_window = []
for col_i in range(window.shape[1]):
normalised_col = [((float(p) / float(window[0, col_i])) - 1) for p in window[:, col_i]]

normalised_col = [((float(p) / (float(window[0, col_i]) + eps) ) - 1) for p in window[:, col_i]]
normalised_window.append(normalised_col)
normalised_window = np.array(normalised_window).T # reshape and transpose array back into original multidimensional format
normalised_data.append(normalised_window)
return np.array(normalised_data)
return np.array(normalised_data)
7 changes: 4 additions & 3 deletions core/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,15 @@ def build_model(self, configs):
self.model.compile(loss=configs['model']['loss'], optimizer=configs['model']['optimizer'])

print('[Model] Model Compiled')
print(self.model.summary())
timer.stop()

def train(self, x, y, epochs, batch_size, save_dir):
timer = Timer()
timer.start()
print('[Model] Training Started')
print('[Model] %s epochs, %s batch size' % (epochs, batch_size))

save_fname = os.path.join(save_dir, '%s-e%s.h5' % (dt.datetime.now().strftime('%d%m%Y-%H%M%S'), str(epochs)))
callbacks = [
EarlyStopping(monitor='val_loss', patience=2),
Expand All @@ -70,7 +71,7 @@ def train_generator(self, data_gen, epochs, batch_size, steps_per_epoch, save_di
timer.start()
print('[Model] Training Started')
print('[Model] %s epochs, %s batch size, %s batches per epoch' % (epochs, batch_size, steps_per_epoch))

save_fname = os.path.join(save_dir, '%s-e%s.h5' % (dt.datetime.now().strftime('%d%m%Y-%H%M%S'), str(epochs)))
callbacks = [
ModelCheckpoint(filepath=save_fname, monitor='loss', save_best_only=True)
Expand All @@ -82,7 +83,7 @@ def train_generator(self, data_gen, epochs, batch_size, steps_per_epoch, save_di
callbacks=callbacks,
workers=1
)

print('[Model] Training Completed. Model saved as %s' % save_fname)
timer.stop()

Expand Down
17 changes: 9 additions & 8 deletions run.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ def plot_results_multiple(predicted_data, true_data, prediction_len):
fig = plt.figure(facecolor='white')
ax = fig.add_subplot(111)
ax.plot(true_data, label='True Data')
# Pad the list of predictions to shift it in the graph to it's correct start

# Pad the list of predictions to shift it in the graph to it's correct start
for i, data in enumerate(predicted_data):
padding = [None for p in range(i * prediction_len)]
plt.plot(padding + data, label='Prediction')
Expand All @@ -35,13 +36,12 @@ def plot_results_multiple(predicted_data, true_data, prediction_len):

def main():
configs = json.load(open('config.json', 'r'))
if not os.path.exists(configs['model']['save_dir']): os.makedirs(configs['model']['save_dir'])
if not os.path.exists(configs['model']['save_dir']):
os.makedirs(configs['model']['save_dir'])

data = DataLoader(
os.path.join('data', configs['data']['filename']),
configs['data']['train_test_split'],
configs['data']['columns']
)
datapath = os.path.join('data', configs['data']['filename'])
print('Loading data from ', datapath)
data = DataLoader(datapath, configs['data']['train_test_split'], configs['data']['columns'])

model = Model()
model.build_model(configs)
Expand All @@ -60,6 +60,7 @@ def main():
save_dir = configs['model']['save_dir']
)
'''

# out-of memory generative training
steps_per_epoch = math.ceil((data.len_train - configs['data']['sequence_length']) / configs['training']['batch_size'])
model.train_generator(
Expand Down Expand Up @@ -88,4 +89,4 @@ def main():


if __name__ == '__main__':
main()
main()