Closed
Description
i tried to implement a RNN MODEL to classify Mnist Dataset but i get an accuracy around 40-50% even with running it for more than 20 epochs, while in pytorch, i'll get an accuracy upto 90% after just 4-5 epochs
here is my code:
using Flux
using Flux: onehotbatch, onecold, params, gradient
using MLDatasets: MNIST
using Base.Iterators: partition
using TensorCast
using Statistics: mean
using Random: shuffle
#---------------------------------- DATA -------------------------------------
DATA_TRAIN = MNIST.traindata(Float32)
DATA_TEST = MNIST.testdata(Float32)
#-------------------------------- PREPROCESS DATA ------------------------------
@cast x_train[j][i, k] := DATA_TRAIN[1][i, j, k] # reshape to vector of size 28 with matrix of size 28 x 60000
@cast x_test[j][i, k] := DATA_TEST[1][i, j, k] # reshape to vector of size 28 with matrix of size 28 x 10000
# create onehotbatch for train label
y_train = onehotbatch(DATA_TRAIN[2], 0:9)
y_test = DATA_TEST[2]
#------------------------------ CONSTANTS ---------------------------------------
INPUT_DIM = size(x_train[1], 1)
OUTPUT_DIM = 10 # number of classes
LR = 0.001 # learning rate
EPOCHS = 100
BATCH_SIZE = 1000
TOTAL_SAMPLES = size(x_train[1], 2)
#--------------------------------- BUILD MODEL -----------------------------------
struct RnnModel
rnn
fc
end
Flux.@functor RnnModel
# pass input thorough MODEL
function (m::RnnModel)(input_data)
# warmup rnn
[m.rnn(x) for x ∈ input_data[1:end - 1]]
# pass latest layer to fc layer
m.fc(m.rnn(input_data[end]))
end
# build MODEL
model = RnnModel(
Chain(RNN(INPUT_DIM, 128), relu, RNN(128, 64), relu, RNN(64, 32), relu),
Chain(Dense(32, OUTPUT_DIM), softmax)
)
#----------------------------- HELPER FUNCTIONS --------------------------------------
loss_fn(x, y) = Flux.Losses.logitcrossentropy(model(x), y)
function accuracy(x, y)
Flux.reset!(model)
mean(onecold(model(x), 0:9) .== y)
end
θ = params(model) # model parameters to be updated during training
opt = Flux.ADAM(LR) # optimizer function
#---------------------------- RUN TRAINING ----------------------------------------------
for epoch ∈ 1:EPOCHS
for idx ∈ partition(1:TOTAL_SAMPLES, BATCH_SIZE)
Flux.reset!(model)
features = [x[:, idx] for x ∈ x_train]
labels = y_train[:, idx]
gs = gradient(θ) do
loss = loss_fn(features, labels)
loss
end
# update model
Flux.Optimise.update!(opt, θ, gs)
end
# evaluate model
@info epoch
@show accuracy(x_test, y_test)
end
what i'm doing wrong?