-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathHW2_MNIST_NN4.py
196 lines (140 loc) · 5.86 KB
/
HW2_MNIST_NN4.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
# %% [markdown]
# ### 1. Import Libraries
# %%
import os
import sys
import warnings
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_openml
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
# %matplotlib inline
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)
# %% [markdown]
# ### 2. Import from mlcblab
# %%
from mlcvlab_GPU.models.nn4 import NN4
from mlcvlab_GPU.nn.losses import l2
from mlcvlab_GPU.optim.sgd import SGD
from mlcvlab_GPU.optim.async_sgd import async_sgd
from mlcvlab_GPU.optim.sync_sgd import sync_sgd
# from numba import jit, njit, vectorize, cuda, uint32, f8, uint8
# %% [markdown]
# ### 3. Set Seed
# %%
np.random.seed(42)
# %% [markdown]
# ### 4. Helper functions
# %%
def load_dataset():
'''Loads the whole dataset with true labels included.'''
x, y = fetch_openml("mnist_784", version=1, return_X_y=True, as_frame=False)
return x,y
def prepare_data(x, y):
'''Converts 10-ary labels in binary labels. If even then label is 1 otherwise 0.'''
y = y.astype(int)
y = y.reshape(len(y),1)
y = (y+1) % 2
return x, y
def split_train_test(x,y):
'''Partitioning the dataset into 10,000 test samles and the remaining 60,000 as training samples.
The shape of the data will be M x N where M = 784 and N = 60000 for X and N = 10000 for y.'''
X_train, X_test = x[:60000].T, x[60000:].T
y_train, y_test = y[:60000].T, y[60000:].T
# X_train, X_test = x[:6].T, x[69900:].T
# y_train, y_test = y[:6].T, y[69900:].T
# adding -1 to the end of every x as a bias term
bias_train = np.ones((1, np.shape(X_train)[1])) * -1
bias_test = np.ones((1, np.shape(X_test)[1])) * -1
X_train = np.append(X_train, bias_train, axis = 0)
X_test = np.append(X_test, bias_test, axis = 0)
return X_train, X_test, y_train, y_test
def minibatch(x_train,y_train,K):
# Batch Size: K
# X_train_batches, y_train_batches should be a list of lists of size K.
x_train = x_train.T
y_train = y_train.T
x_train_batches = np.array([x_train[i:i+K].T for i in range(0, len(x_train), K)])
y_train_batches = np.array([y_train[i:i+K].T for i in range(0, len(y_train), K)])
return x_train_batches, y_train_batches
def initialize_model(M_0,M_1,M_2,M_3, use_batch_norm = True, dropout_p = 0.5):
#Initialize the weights. Adding -1 for the bias term at the end of the vector.
# Random initialization
#W0 = np.random.rand(np.shape(X_train)[0],M_0) # K x M_1 = 785 x 120
W0 = np.ones((np.shape(X_train)[0],M_0)) # K x M_1 = 785 x 120
W0 = np.random.randn(np.shape(W0)[0], np.shape(W0)[1]) * np.sqrt(2/np.shape(W0)[0])
# He initialization
W1 = np.ones((M_0,M_1)) # 120 x 100
W1 = np.random.randn(np.shape(W1)[0], np.shape(W1)[1]) * np.sqrt(2/np.shape(W1)[0])
W2 = np.ones((M_1,M_2)) # 100 x 80
W2 = np.random.randn(np.shape(W2)[0], np.shape(W2)[1]) * np.sqrt(2/np.shape(W2)[0])
# Xavier initialization
W3 = np.random.randn(M_2, M_3) * np.sqrt(1/M_2)
print(f"\nSize of W0 : {W0.shape}, Size of W1 : {W1.shape}, Size of W2 : {W2.shape}, Size of W3 : {W3.shape}\n\n")
four_layer_nn = NN4(use_batchnorm=use_batch_norm, dropout_param=dropout_p)
four_layer_nn.layers[0].W = W0
four_layer_nn.layers[1].W = W1
four_layer_nn.layers[2].W = W2
four_layer_nn.layers[3].W = W3
four_layer_nn.dropout_param = dropout_p
return four_layer_nn
def train_model(model, x_train_batches, y_train_batches, num_epochs=100, learning_rate=0.1):
#TODO : Call async_SGD and sync_SGD to train two versions of the same model. Compare their outcomes and runtime.
#Update both your models with final updated weights and return them
# model_async = async_sgd(model, x_train_batches, y_train_batches,R = num_epochs, lr = learning_rate)
model_sync = sync_sgd(model, x_train_batches, y_train_batches,R = num_epochs, lr = learning_rate)
model_async = None
return model_async, model_sync
def test_model(model, X_test, y_test):
'''Tests the accuracy of the neural network.'''
accuracy = None
# get the number of test instances
T = np.shape(y_test)[1]
# get the predictions of the algorithm using testing x as the input
y_hat = np.zeros(np.shape(y_test))
for index, image in enumerate(X_test.T):
y_hat[0][index] = model.nn4(image,'test')
A = np.absolute(y_test - y_hat)
# check if the value is greater than 0 and set it 1 if so.
for x in range(np.shape(A)[1]):
if A[0][x] > 0:
A[0][x] = 1
# calculate the accuracy
accuracy = 1/T * np.sum(A)
return accuracy
# %% [markdown]
# ### 5. Run the program
# %%
# promena samo da ti pokazem kako se radi sa gitom
# u visual studio
#load data
x, y = load_dataset()
#prepare data
x, y = prepare_data(x,y)
# split data set
X_train, X_test, y_train, y_test = split_train_test(x,y)
#initialize model
M_1 = 120
M_2 = 100
M_3 = 80
M_4 = 1 # Layer 4 must be 1 since this is a binary classification problem
dropout_p_val = 0.5
model = initialize_model(M_1,M_2,M_3,M_4, use_batch_norm = True, dropout_p = dropout_p_val)
K = 6000
x_train_batches, y_train_batches = minibatch(X_train,y_train,K)
#set values for training model
num_epochs = 1
learning_rate = 0.1
model_async, model_sync = train_model(model, x_train_batches, y_train_batches, num_epochs=num_epochs, learning_rate=learning_rate)
print(f"\n\nCompleted training, now testing...")
#testing model
accuracy_async = test_model(model_async, X_test, y_test) * 100
print(f"\n\nCompleted testing model using asynchronous SGD - Accuracy : {accuracy_async}%\n")
#accuracy_sync = test_model(model_sync, X_test, y_test)
#print(f"Completed testing model using synchronous SGD - Accuracy : {accuracy_sync}")
# %%