Skip to content

Commit 56dbac8

Browse files
Merge pull request #52 from ArnovanHilten/dev
Dev: implemented network from .npz
2 parents 4e83423 + 04b42b6 commit 56dbac8

7 files changed

+54
-27
lines changed

GenNet_utils/Create_network.py

+11-11
Original file line numberDiff line numberDiff line change
@@ -38,12 +38,11 @@ def example_network():
3838
return model, masks
3939

4040
def layer_block(model, mask, i):
41-
masks = []
4241
model = LocallyDirected1D(mask=mask, filters=1, input_shape=(mask.shape[0], 1),
4342
name="LocallyDirected_" + str(i))(model)
4443
model = K.layers.Activation("tanh")(model)
4544
model = K.layers.BatchNormalization(center=False, scale=False)(model)
46-
return model, masks
45+
return model
4746

4847
def create_network_from_csv(datapath, l1_value=0.01, regression=False):
4948
masks = []
@@ -60,12 +59,13 @@ def create_network_from_csv(datapath, l1_value=0.01, regression=False):
6059
model = K.layers.Reshape(input_shape=(inputsize,), target_shape=(inputsize, 1))(input_layer)
6160

6261
for i in range(len(columns) - 1):
63-
matrix_ones = np.ones(len(network_csv[[columns[i], columns[i + 1]]]), np.bool)
64-
matrix_coord = (network_csv[columns[i]].values, network_csv[columns[i + 1]].values)
62+
network_csv2 = network_csv.drop_duplicates(columns[i])
63+
matrix_ones = np.ones(len(network_csv2[[columns[i], columns[i + 1]]]), np.bool)
64+
matrix_coord = (network_csv2[columns[i]].values, network_csv2[columns[i + 1]].values)
6565
if i == 0:
66-
matrixshape = (inputsize, network_csv[columns[i + 1]].max() + 1)
66+
matrixshape = (inputsize, network_csv2[columns[i + 1]].max() + 1)
6767
else:
68-
matrixshape = (network_csv[columns[i]].max() + 1, network_csv[columns[i + 1]].max() + 1)
68+
matrixshape = (network_csv2[columns[i]].max() + 1, network_csv2[columns[i + 1]].max() + 1)
6969
mask = scipy.sparse.coo_matrix(((matrix_ones), matrix_coord), shape = matrixshape)
7070
masks.append(mask)
7171
model = layer_block(model, mask, i)
@@ -86,7 +86,6 @@ def create_network_from_csv(datapath, l1_value=0.01, regression=False):
8686
return model, masks
8787

8888
def create_network_from_npz(datapath, l1_value=0.01, regression=False):
89-
print('ToDO: test')
9089
masks = []
9190
mask_shapes_x = []
9291
mask_shapes_y = []
@@ -104,7 +103,9 @@ def create_network_from_npz(datapath, l1_value=0.01, regression=False):
104103
for i in range(len(masks)): # sort all the masks in the correct order
105104
argsort_x = np.argsort(mask_shapes_x)[::-1]
106105
argsort_y = np.argsort(mask_shapes_y)[::-1]
107-
106+
107+
mask_shapes_x = np.array(mask_shapes_x)
108+
mask_shapes_y = np.array(mask_shapes_y)
108109
assert all(argsort_x == argsort_y) # check that both dimensions have the same order
109110

110111
masks = [masks[i] for i in argsort_y] # sort masks
@@ -123,7 +124,7 @@ def create_network_from_npz(datapath, l1_value=0.01, regression=False):
123124
input_layer = K.Input((inputsize,), name='input_layer')
124125
model = K.layers.Reshape(input_shape=(inputsize,), target_shape=(inputsize, 1))(input_layer)
125126

126-
for i in range(len(masks) - 1):
127+
for i in range(len(masks)):
127128
mask = masks[i]
128129
model = layer_block(model, mask, i)
129130

@@ -148,10 +149,9 @@ def create_network_from_npz(datapath, l1_value=0.01, regression=False):
148149

149150

150151
def lasso(inputsize, l1_value):
151-
masks = []
152152
inputs = K.Input((inputsize,), name='inputs')
153153
x1 = K.layers.BatchNormalization(center=False, scale=False, name="inter_out")(inputs)
154154
x1 = K.layers.Dense(units=1, kernel_regularizer=K.regularizers.l1(l1_value))(x1)
155155
x1 = K.layers.Activation("sigmoid")(x1)
156156
model = K.Model(inputs=inputs, outputs=x1)
157-
return model, masks
157+
return model

GenNet_utils/Dataloader.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import os
22
import sys
3-
3+
import glob
44
sys.path.insert(1, os.path.dirname(os.getcwd()) + "/GenNet_utils/")
55
import matplotlib
66
import numpy as np
@@ -23,7 +23,7 @@ def check_data(datapath, mode):
2323
print("genotype.h5 is missing")
2424
if os.path.exists(datapath + 'topology.csv'):
2525
network_structure = True
26-
elif os.path.exists(datapath + '*.npz'):
26+
elif len(glob.glob(datapath + '*.npz'))>0:
2727
network_structure = True
2828
else:
2929
print("topology.csv and *.npz are missing")

GenNet_utils/LocallyDirectedConnected.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ def build(self, input_shape):
146146
raise ValueError('Axis 2 of input should be fully-defined. '
147147
'Found shape:', input_shape)
148148
self.output_length = self.mask.shape[1]
149-
print("output length is " + str(self.output_length))
149+
# print("output length is " + str(self.output_length))
150150
if self.data_format == 'channels_first':
151151
self.kernel_shape = (input_dim, input_length,
152152
self.filters, self.output_length)
@@ -267,7 +267,7 @@ def get_locallyDirected1D_mask(mask, kernel, data_format,
267267

268268
ndims = int(mask.ndim / 2)
269269
indices = np.mat([mask.row, mask.col]).transpose()
270-
print(mask.shape)
270+
# print(mask.shape)
271271
mask = tf.SparseTensor(indices, kernel, [mask.shape[0], mask.shape[1]])
272272

273273
if data_format == 'channels_first':
@@ -429,7 +429,7 @@ def make_2d(tensor, split_dim):
429429
Tensor of shape
430430
`(d0 * ... * d(split_dim-1), d(split_dim) * ... * d(N-1))`.
431431
"""
432-
print(tensor.shape)
432+
# print(tensor.shape)
433433
shape = K.array_ops.shape(tensor)
434434
in_dims = shape[:split_dim]
435435
out_dims = shape[split_dim:]

GenNet_utils/LocallyDirectedConnected_tf2.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -252,9 +252,9 @@ def local_conv_matmul_sparse(inputs, mask, kernel, kernel_idx, output_length, fi
252252
output_shape = (mask.shape[1], mask.shape[0])
253253
inputs_flat = K.reshape(inputs, (K.shape(inputs)[0], -1))
254254

255-
print("kernel_idx", len(kernel_idx))
256-
print("inputs", K.shape(inputs_flat))
257-
print("kernel", K.shape(kernel))
255+
# print("kernel_idx", len(kernel_idx))
256+
# print("inputs", K.shape(inputs_flat))
257+
# print("kernel", K.shape(kernel))
258258

259259
output_flat = K.sparse_ops.sparse_tensor_dense_mat_mul(
260260
kernel_idx, kernel, (mask.shape[1], mask.shape[0]), inputs_flat, adjoint_b=True)

GenNet_utils/Train_network.py

+8-4
Original file line numberDiff line numberDiff line change
@@ -130,8 +130,10 @@ def train_classification(args):
130130
f.write('\n Confusion matrix ')
131131
f.write(str(confusionmatrix_test))
132132

133-
importance_csv = create_importance_csv(datapath, model, masks)
134-
importance_csv.to_csv(resultpath + "connection_weights.csv")
133+
if os.path.exists(datapath + "/topology.csv"):
134+
importance_csv = create_importance_csv(datapath, model, masks)
135+
importance_csv.to_csv(resultpath + "connection_weights.csv")
136+
135137

136138

137139
def train_regression(args):
@@ -231,5 +233,7 @@ def train_regression(args):
231233
f.write('\n Explained variance = ' + str(explained_variance_val))
232234
# f.write('\n Maximum error = ' + str(maximum_error_test))
233235
f.write('\n R2 = ' + str(r2_test))
234-
importance_csv = create_importance_csv(datapath, model, masks)
235-
importance_csv.to_csv(resultpath + "connection_weights.csv")
236+
237+
if os.path.exists(datapath + "/topology.csv"):
238+
importance_csv = create_importance_csv(datapath, model, masks)
239+
importance_csv.to_csv(resultpath + "connection_weights.csv")

README.md

+5-4
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,8 @@ pip3 install -r requirements_GenNet.txt
5757
5858
```
5959
*GenNet is ready to use!*
60-
Navigate to the GenNet foler and use the following command to run the example:
60+
61+
Navigate to the GenNet folder and use the following command to run the example:
6162
```
6263
python GenNet.py train ./examples/example_classification/ 1
6364
```
@@ -128,9 +129,9 @@ python GenNet.py topology --help
128129

129130
#### GenNet output
130131

131-
After training your network it saved together with its results. Results include a text file with the performance, a .CSV file with all the connections and their weights, a .h5 with the best weights on the validtion set and a plot of the training and validation loss.
132+
After training your network it saved together with its results. Results include a text file with the performance, a .CSV file with all the connections and their weights, a .h5 with the best weights on the validation set and a plot of the training and validation loss.
132133

133-
The .CSV file with the weights can be used to create your own plot but `python GenNet.py plot` also has standard plots availabe:
134+
The .CSV file with the weights can be used to create your own plot but `python GenNet.py plot` also has standard plots available:
134135

135136

136137
##### Manhattan plot
@@ -142,7 +143,7 @@ The .CSV file with the weights can be used to create your own plot but `python G
142143

143144
### Jupyter notebook
144145

145-
The orignal jupyter notebooks can be found in the jupyter notebook folder. Navigate to the jupyter notebook folder and start with `jupyter notebook`
146+
The original jupyter notebooks can be found in the jupyter notebook folder. Navigate to the jupyter notebook folder and start with `jupyter notebook`. The notebooks are not updated but can be a useful source to understand the main code and/or to create .npz masks (to define connections between layers).
146147

147148
### More
148149

Submit_slurm_GenNet_train.sh

+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
#!/bin/bash
2+
#SBATCH --mem=10G
3+
#SBATCH --ntasks=6
4+
#SBATCH -p express
5+
#SBATCH --gres=gpu:1
6+
#SBATCH -t 01:00:00
7+
#SBATCH -o /trinity/home/avanhilten/repositories/GenNet/GenNet_utils/SLURM_logs/slurm_logs/out_%j.log
8+
#SBATCH -e /trinity/home/avanhilten/repositories/GenNet/GenNet_utils/SLURM_logs/error_%j.log
9+
10+
# Load the modules
11+
12+
module purge
13+
module load Python/3.7.4-GCCcore-8.3.0
14+
module load libs/cuda/10.1.243
15+
module load libs/cudnn/7.6.5.32-CUDA-10.1.243
16+
module load TensorFlow/2.2.0-fosscuda-2019b-Python-3.7.4
17+
18+
source $HOME/env_GenNet_dev/bin/activate
19+
20+
cd /trinity/home/avanhilten/repositories/GenNet/
21+
22+
python GenNet.py train ./examples/example_classification/ 111

0 commit comments

Comments
 (0)