-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsoftmax.py
More file actions
129 lines (106 loc) · 4.16 KB
/
softmax.py
File metadata and controls
129 lines (106 loc) · 4.16 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import tensorflow as tf # tensor flow
from keras.utils.np_utils import to_categorical # one hot encoding
# configuration parameter
TRAINING_ITERATIONS = 100000
BATCH_SIZE = 128
LEARNING_RATE = 0.001
# read training data from CSV file
dataset = pd.read_csv("./data/train.csv")
x_train = dataset.iloc[:,1:].values.astype('float32')
y_train = dataset.iloc[:,0].values.astype('float32')
y_train = to_categorical(y_train)
y_train = y_train.astype(np.uint8)
# data normalization
x_train = np.multiply(x_train, 1.0/255.0)
# fetch next batch func
epochs_completed = 0
index_in_epoch = 0
num_examples = y_train.shape[0]
def next_batch(batch_size):
global x_train
global y_train
global index_in_epoch
global epochs_completed
start = index_in_epoch
index_in_epoch += batch_size
# when all trainig data have been already used, it is reorder randomly
if index_in_epoch > num_examples:
# finished epoch
epochs_completed += 1
# shuffle the data
perm = np.arange(num_examples)
np.random.shuffle(perm)
x_train = x_train[perm]
y_train = y_train[perm]
# start next epoch
start = 0
index_in_epoch = batch_size
end = index_in_epoch
return x_train[start:end], y_train[start:end]
# define the hidden layer
def add_layer(inputs, in_size, out_size, layer_name, activation_function=None):
with tf.name_scope('layer'):
with tf.name_scope('weights'):
Weights = tf.Variable(tf.random_normal([in_size, out_size]), name='W')
tf.summary.histogram(layer_name + '/weights', Weights)
with tf.name_scope('biases'):
biases = tf.Variable(tf.zeros([1, out_size]) + 0.1, name='b')
tf.summary.histogram(layer_name + '/biases', biases)
with tf.name_scope('Wx_plus_b'):
Wx_plus_b = tf.add(tf.matmul(inputs, Weights), biases)
if activation_function is None:
outputs = Wx_plus_b
else:
outputs = activation_function(Wx_plus_b, )
tf.summary.histogram(layer_name + '/outputs', outputs)
return outputs
# define the placeholder
with tf.name_scope("inputs"):
xs = tf.placeholder(tf.float32, [None, 784], name="x_input")
ys = tf.placeholder(tf.float32, [None, 10], name="y_input")
# add hidden layer
y_predict = add_layer(xs, 784, 10,"l1", activation_function=tf.nn.softmax)
# the error between prediciton and real data
with tf.name_scope('loss'):
cross_entropy = tf.reduce_mean(-tf.reduce_sum(ys * tf.log(y_predict)))
# train the network
with tf.name_scope('train'):
train_step = tf.train.GradientDescentOptimizer(LEARNING_RATE).minimize(cross_entropy)
sess = tf.Session()
merged = tf.summary.merge_all()
writer = tf.summary.FileWriter("./logs/", sess.graph)
# initization
init = tf.global_variables_initializer()
sess.run(init)
display_step = 1
correct_prediction = tf.equal(tf.argmax(y_predict, 1), tf.argmax(ys, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
for i in range(TRAINING_ITERATIONS):
batch_xs, batch_ys = next_batch(BATCH_SIZE, i)
sess.run(train_step, feed_dict={xs: batch_xs, ys: batch_ys})
if i % 50 == 0:
result = sess.run(merged, feed_dict={xs: batch_xs, ys: batch_ys})
writer.add_summary(result, i)
if i % display_step == 0 or (i + 1) == TRAINING_ITERATIONS:
train_accuracy = sess.run(accuracy, feed_dict={xs: batch_xs, ys: batch_ys})
print('training_accuracy => %.4f for step %d' % (train_accuracy, i))
# increase display_step
if i % (display_step * 10) == 0 and i:
display_step *= 10
# read testing data from CSV file
x_test = pd.read_csv("./data/test.csv").values.astype('float32')
# convert from [0:255] => [0.0:1.0]
x_test = np.multiply(x_test, 1.0 / 255.0)
# predict test set
predict = tf.argmax(y_predict, 1)
y_test = sess.run(predict, feed_dict={xs: x_test})
# save results
np.savetxt('submission.csv',
np.c_[range(1, len(x_test) + 1), y_test],
delimiter=',',
header='ImageId,Label',
comments='',
fmt='%d')
sess.close()