Skip to content

Commit a117a43

Browse files
committed
add MLP classification
1 parent 0e1acae commit a117a43

File tree

5 files changed

+123
-20
lines changed

5 files changed

+123
-20
lines changed

chapter05/mlpClass.m

+63
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
function [model, L] = mlpClass(X,y,k,lambda)
2+
% Train a multilayer perceptron neural network for classification with backpropagation
3+
% logistic activation function is used.
4+
% Input:
5+
% X: d x n data matrix
6+
% Y: p x n response matrix
7+
% k: T x 1 vector to specify number of hidden nodes in each layer
8+
% lambda: regularization parameter
9+
% Ouput:
10+
% model: model structure
11+
% L: (regularized cross entropy) loss
12+
% Written by Mo Chen ([email protected]).
13+
if nargin < 4
14+
lambda = 1e-2;
15+
end
16+
eta = 1e-3;
17+
tol = 1e-4;
18+
maxiter = 50000;
19+
L = inf(1,maxiter);
20+
21+
Y = sparse(y,1:numel(y),1);
22+
k = [size(X,1);k(:);size(Y,1)];
23+
T = numel(k)-1;
24+
W = cell(T,1);
25+
b = cell(T,1);
26+
for t = 1:T
27+
W{t} = randn(k(t),k(t+1));
28+
b{t} = randn(k(t+1),1);
29+
end
30+
R = cell(T,1);
31+
Z = cell(T+1,1);
32+
Z{1} = X;
33+
for iter = 2:maxiter
34+
% forward
35+
for t = 1:T-1
36+
Z{t+1} = sigmoid(W{t}'*Z{t}+b{t}); % 5.10 5.113
37+
end
38+
Z{T+1} = softmax(W{T}'*Z{T}+b{T});
39+
40+
% loss
41+
E = Z{T+1};
42+
Wn = cellfun(@(x) dot(x(:),x(:)),W); % |W|^2
43+
L(iter) = -dot(Y(:),log(E(:)))+0.5*lambda*sum(Wn);
44+
if abs(L(iter)-L(iter-1)) < tol*L(iter-1); break; end
45+
46+
% backward
47+
R{T} = Z{T+1}-Y;
48+
for t = T-1:-1:1
49+
df = Z{t+1}.*(1-Z{t+1}); % h'(a)
50+
R{t} = df.*(W{t+1}*R{t+1}); % 5.66
51+
end
52+
53+
% gradient descent
54+
for t=1:T
55+
dW = Z{t}*R{t}'+lambda*W{t}; % 5.67
56+
db = sum(R{t},2);
57+
W{t} = W{t}-eta*dW; % 5.43
58+
b{t} = b{t}-eta*db;
59+
end
60+
end
61+
L = L(2:iter);
62+
model.W = W;
63+
model.b = b;

chapter05/mlpClassPred.m

+19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
function [y, P] = mlpClassPred(model, X)
2+
% Multilayer perceptron classification prediction
3+
% logistic activation function is used.
4+
% Input:
5+
% model: model structure
6+
% X: d x n data matrix
7+
% Ouput:
8+
% y: 1 x n label vector
9+
% P: k x n probability matrix
10+
% Written by Mo Chen ([email protected]).
11+
W = model.W;
12+
b = model.b;
13+
T = length(W);
14+
Z = X;
15+
for t = 1:T-1
16+
Z = sigmoid(W{t}'*Z+b{t});
17+
end
18+
P = softmax(W{T}'*Z+b{T});
19+
[~,y] = max(P,[],1);

chapter05/mlpReg.m

+18-15
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,24 @@
1-
function [model, L] = mlpReg(X,Y,k,lambda)
2-
% Train a multilayer perceptron neural network
1+
function [model, L] = mlpReg(X,y,k,lambda)
2+
% Train a multilayer perceptron neural network for regression with backpropagation
3+
% tanh activation function is used
34
% Input:
45
% X: d x n data matrix
5-
% Y: p x n response matrix
6+
% y: p x n response matrix
67
% k: T x 1 vector to specify number of hidden nodes in each layer
78
% lambda: regularization parameter
89
% Ouput:
910
% model: model structure
10-
% L: loss
11+
% L: (regularized least square) loss
1112
% Written by Mo Chen ([email protected]).
1213
if nargin < 4
1314
lambda = 1e-2;
1415
end
15-
eta = 1e-3;
16+
eta = 1e-5;
17+
tol = 1e-5;
1618
maxiter = 50000;
1719
L = inf(1,maxiter);
1820

19-
k = [size(X,1);k(:);size(Y,1)];
21+
k = [size(X,1);k(:);size(y,1)];
2022
T = numel(k)-1;
2123
W = cell(T,1);
2224
b = cell(T,1);
@@ -30,30 +32,31 @@
3032
for iter = 2:maxiter
3133
% forward
3234
for t = 1:T-1
33-
Z{t+1} = tanh(W{t}'*Z{t}+b{t});
35+
Z{t+1} = tanh(W{t}'*Z{t}+b{t}); % 5.10 5.113
3436
end
35-
Z{T+1} = W{T}'*Z{T}+b{T};
37+
Z{T+1} = W{T}'*Z{T}+b{T}; % 5.114
3638

3739
% loss
38-
E = Z{T+1}-Y;
40+
E = Z{T+1}-y;
3941
Wn = cellfun(@(x) dot(x(:),x(:)),W); % |W|^2
4042
L(iter) = dot(E(:),E(:))+lambda*sum(Wn);
41-
43+
if abs(L(iter)-L(iter-1)) < tol*L(iter-1); break; end
44+
4245
% backward
43-
R{T} = E; % delta
46+
R{T} = E;
4447
for t = T-1:-1:1
4548
df = 1-Z{t+1}.^2; % h'(a)
46-
R{t} = df.*(W{t+1}*R{t+1}); % delta
49+
R{t} = df.*(W{t+1}*R{t+1}); % 5.66
4750
end
4851

4952
% gradient descent
5053
for t=1:T
51-
dW = Z{t}*R{t}'+lambda*W{t};
54+
dW = Z{t}*R{t}'+lambda*W{t}; % 5.67
5255
db = sum(R{t},2);
53-
W{t} = W{t}-eta*dW;
56+
W{t} = W{t}-eta*dW; % 5.43
5457
b{t} = b{t}-eta*db;
5558
end
5659
end
57-
L = L(1,2:iter);
60+
L = L(2:iter);
5861
model.W = W;
5962
model.b = b;

chapter05/mlpRegPred.m

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
function Y = mlpRegPred(model, X)
2-
% Multilayer perceptron prediction
2+
% Multilayer perceptron regression prediction
3+
% tanh activation function is used.
34
% Input:
45
% model: model structure
56
% X: d x n data matrix

demo/ch05/mlp_demo.m

+21-4
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,32 @@
1-
clear; close all;
1+
clear; close all
2+
%% Regression
23
n = 200;
34
x = linspace(0,2*pi,n);
45
y = sin(x);
56

6-
k = [3,4]; % two hidden layers with 3 and 4 hidden nodes
7+
h = [10,6]; % two hidden layers with 10 and 6 neurons
78
lambda = 1e-2;
8-
[model, L] = mlpReg(x,y,k);
9+
[model, L] = mlpReg(x,y,h,lambda);
910
t = mlpRegPred(model,x);
1011
plot(L);
1112
figure;
1213
hold on
1314
plot(x,y,'.');
1415
plot(x,t);
15-
hold off
16+
hold off
17+
%% Classification
18+
clear;
19+
k = 2;
20+
n = 200;
21+
[X,y] = kmeansRnd(2,k,n);
22+
figure;
23+
plotClass(X,y);
24+
25+
h = 3;
26+
lambda = 1e-2;
27+
[model, llh] = mlpClass(X,y,h,lambda);
28+
[t,p] = mlpClassPred(model,X);
29+
figure;
30+
plotClass(X,t);
31+
figure;
32+
plot(llh);

0 commit comments

Comments
 (0)