jaungiers · fgadaleta · Nov 10, 2018 · Nov 10, 2018 · Nov 11, 2018 · Nov 11, 2018
diff --git a/config.json b/config.json
@@ -1,9 +1,9 @@
 {
 	"data": {
-		"filename": "sp500.csv",
+		"filename": "data.csv",
 		"columns": [
 			"Close",
-			"Volume"
+			"Volume From"
 		],
 		"sequence_length": 50,
 		"train_test_split": 0.85,
@@ -21,7 +21,7 @@
 			{
 				"type": "lstm",
 				"neurons": 100,
-				"input_timesteps": 49,
+				"input_timesteps": 50,
 				"input_dim": 2,
 				"return_seq": true
 			},

diff --git a/core/data_processor.py b/core/data_processor.py
@@ -8,6 +8,7 @@ class DataLoader():
     def __init__(self, filename, split, cols):
         dataframe = pd.read_csv(filename)
         i_split = int(len(dataframe) * split)
+        print(dataframe.head(), type(dataframe), cols)
         self.data_train = dataframe.get(cols).values[:i_split]
         self.data_test  = dataframe.get(cols).values[i_split:]
         self.len_train  = len(self.data_train)
@@ -20,16 +21,13 @@ def get_test_data(self, seq_len, normalise):
         Warning: batch method, not generative, make sure you have enough memory to
         load data, otherwise reduce size of the training split.
         '''
-        data_windows = []
+        data_x = []
+        data_y = []
         for i in range(self.len_test - seq_len):
-            data_windows.append(self.data_test[i:i+seq_len])
-
-        data_windows = np.array(data_windows).astype(float)
-        data_windows = self.normalise_windows(data_windows, single_window=False) if normalise else data_windows
-
-        x = data_windows[:, :-1]
-        y = data_windows[:, -1, [0]]
-        return x,y
+            x, y = self._next_window(i, seq_len, normalise, train=False)
+            data_x.append(x)
+            data_y.append(y)
+        return np.array(data_x), np.array(data_y)
 
     def get_train_data(self, seq_len, normalise):
         '''
@@ -40,7 +38,7 @@ def get_train_data(self, seq_len, normalise):
         data_x = []
         data_y = []
         for i in range(self.len_train - seq_len):
-            x, y = self._next_window(i, seq_len, normalise)
+            x, y = self._next_window(i, seq_len, normalise, train=True)
             data_x.append(x)
             data_y.append(y)
         return np.array(data_x), np.array(data_y)
@@ -62,23 +60,29 @@ def generate_train_batch(self, seq_len, batch_size, normalise):
                 i += 1
             yield np.array(x_batch), np.array(y_batch)
 
-    def _next_window(self, i, seq_len, normalise):
+    def _next_window(self, i, seq_len, normalise, train=True):
         '''Generates the next data window from the given index location i'''
-        window = self.data_train[i:i+seq_len]
+
+        if train:
+            window = self.data_train[i:i+seq_len+1]
+        else:
+            window = self.data_test[i:i+seq_len+1]
         window = self.normalise_windows(window, single_window=True)[0] if normalise else window
         x = window[:-1]
         y = window[-1, [0]]
         return x, y
 
     def normalise_windows(self, window_data, single_window=False):
         '''Normalise window with a base value of zero'''
+        eps = 0.00001
         normalised_data = []
         window_data = [window_data] if single_window else window_data
         for window in window_data:
             normalised_window = []
             for col_i in range(window.shape[1]):
-                normalised_col = [((float(p) / float(window[0, col_i])) - 1) for p in window[:, col_i]]
+
+                normalised_col = [((float(p) / (float(window[0, col_i]) + eps) ) - 1) for p in window[:, col_i]]
                 normalised_window.append(normalised_col)
             normalised_window = np.array(normalised_window).T # reshape and transpose array back into original multidimensional format
             normalised_data.append(normalised_window)
-        return np.array(normalised_data)
+        return np.array(normalised_data)
diff --git a/core/model.py b/core/model.py
@@ -40,14 +40,15 @@ def build_model(self, configs):
 		self.model.compile(loss=configs['model']['loss'], optimizer=configs['model']['optimizer'])
 
 		print('[Model] Model Compiled')
+		print(self.model.summary())
 		timer.stop()
 
 	def train(self, x, y, epochs, batch_size, save_dir):
 		timer = Timer()
 		timer.start()
 		print('[Model] Training Started')
 		print('[Model] %s epochs, %s batch size' % (epochs, batch_size))
-		
+
 		save_fname = os.path.join(save_dir, '%s-e%s.h5' % (dt.datetime.now().strftime('%d%m%Y-%H%M%S'), str(epochs)))
 		callbacks = [
 			EarlyStopping(monitor='val_loss', patience=2),
@@ -70,7 +71,7 @@ def train_generator(self, data_gen, epochs, batch_size, steps_per_epoch, save_di
 		timer.start()
 		print('[Model] Training Started')
 		print('[Model] %s epochs, %s batch size, %s batches per epoch' % (epochs, batch_size, steps_per_epoch))
-		
+
 		save_fname = os.path.join(save_dir, '%s-e%s.h5' % (dt.datetime.now().strftime('%d%m%Y-%H%M%S'), str(epochs)))
 		callbacks = [
 			ModelCheckpoint(filepath=save_fname, monitor='loss', save_best_only=True)
@@ -82,7 +83,7 @@ def train_generator(self, data_gen, epochs, batch_size, steps_per_epoch, save_di
 			callbacks=callbacks,
 			workers=1
 		)
-		
+
 		print('[Model] Training Completed. Model saved as %s' % save_fname)
 		timer.stop()
 

diff --git a/run.py b/run.py
@@ -25,7 +25,8 @@ def plot_results_multiple(predicted_data, true_data, prediction_len):
     fig = plt.figure(facecolor='white')
     ax = fig.add_subplot(111)
     ax.plot(true_data, label='True Data')
-	# Pad the list of predictions to shift it in the graph to it's correct start
+
+    # Pad the list of predictions to shift it in the graph to it's correct start
     for i, data in enumerate(predicted_data):
         padding = [None for p in range(i * prediction_len)]
         plt.plot(padding + data, label='Prediction')
@@ -35,13 +36,12 @@ def plot_results_multiple(predicted_data, true_data, prediction_len):
 
 def main():
     configs = json.load(open('config.json', 'r'))
-    if not os.path.exists(configs['model']['save_dir']): os.makedirs(configs['model']['save_dir'])
+    if not os.path.exists(configs['model']['save_dir']):
+        os.makedirs(configs['model']['save_dir'])
 
-    data = DataLoader(
-        os.path.join('data', configs['data']['filename']),
-        configs['data']['train_test_split'],
-        configs['data']['columns']
-    )
+    datapath = os.path.join('data', configs['data']['filename'])
+    print('Loading data from ', datapath)
+    data = DataLoader(datapath, configs['data']['train_test_split'], configs['data']['columns'])
 
     model = Model()
     model.build_model(configs)
@@ -60,6 +60,7 @@ def main():
 		save_dir = configs['model']['save_dir']
 	)
 	'''
+
     # out-of memory generative training
     steps_per_epoch = math.ceil((data.len_train - configs['data']['sequence_length']) / configs['training']['batch_size'])
     model.train_generator(
@@ -88,4 +89,4 @@ def main():
 
 
 if __name__ == '__main__':
-    main()
+    main()