Skip to content

Commit 6d57671

Browse files
committed
Merge pull request #28 from sth4nth/master
refined all docs, all test passed
2 parents e8a46c9 + 3ab4747 commit 6d57671

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

124 files changed

+1063
-613
lines changed

TODO.txt

+4-6
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,7 @@
11
TODO:
2-
chapter10: compute bound terms (entropy) inside each factors
32
chapter10/12: prediction functions for VB
4-
chapter05: MLP
5-
chapter08: BP, EP
3+
chapter07: rvm seq bug
4+
extract demos
65

7-
Help:
8-
standardize help descrtiption: add input/output description
9-
chapter04: plot multiclass data boundary
6+
chapter05: MLP
7+
chapter08: BP, EP

chapter01/condEntropy.m

+5-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
function z = condEntropy (x, y)
2-
% Compute conditional entropy H(x|y) of two discrete variables x and y.
3-
% x, y: two vectors of integers of the same length
2+
% Compute conditional entropy z=H(x|y) of two discrete variables x and y.
3+
% Input:
4+
% x, y: two integer vector of the same length
5+
% Output:
6+
% z: conditional entropy z=H(x|y)
47
% Written by Mo Chen ([email protected]).
58
assert(numel(x) == numel(y));
69
n = numel(x);

chapter01/demo.m

+9-9
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
% Done
2-
% demo for information theory toolbox
1+
2+
% demos for ch01
33
clear;
44
k = 10; % variable range
55
n = 100; % number of variables
@@ -10,20 +10,20 @@
1010
% x = randi(k,1,n); % need statistics toolbox
1111
% y = randi(k,1,n);
1212

13-
%% entropy H(x), H(y)
13+
%% Entropy H(x), H(y)
1414
Hx = entropy(x);
1515
Hy = entropy(y);
16-
%% joint entropy H(x,y)
16+
%% Joint entropy H(x,y)
1717
Hxy = jointEntropy(x,y);
18-
%% conditional entropy H(x|y)
18+
%% Conditional entropy H(x|y)
1919
Hx_y = condEntropy(x,y);
20-
%% mutual information I(x,y)
20+
%% Mutual information I(x,y)
2121
Ixy = mutInfo(x,y);
22-
%% relative entropy (KL divergence) KL(p(x)|p(y))
22+
%% Relative entropy (KL divergence) KL(p(x)|p(y))
2323
Dxy = relatEntropy(x,y);
24-
%% normalized mutual information I_n(x,y)
24+
%% Normalized mutual information I_n(x,y)
2525
nIxy = nmi(x,y);
26-
%% nomalized variation information I_v(x,y)
26+
%% Nomalized variation information I_v(x,y)
2727
vIxy = nvi(x,y);
2828
%% H(x|y) = H(x,y)-H(y)
2929
isequalf(Hx_y,Hxy-Hy)

chapter01/entropy.m

+12-7
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,15 @@
11
function z = entropy(x)
2-
% Compute entropy H(x) of a discrete variable x.
3-
% x: a vectors of integers
2+
% Compute entropy z=H(x) of a discrete variable x.
3+
% Input:
4+
% x: a integer vectors
5+
% Output:
6+
% z: entropy z=H(x)
47
% Written by Mo Chen ([email protected]).
58
n = numel(x);
6-
x = reshape(x,1,n);
7-
[u,~,label] = unique(x);
8-
p = full(mean(sparse(1:n,label,1,n,numel(u),n),1));
9-
z = -dot(p,log2(p+eps));
10-
z = max(0,z);
9+
[u,~,x] = unique(x);
10+
k = numel(u);
11+
idx = 1:n;
12+
Mx = sparse(idx,x,1,n,k,n);
13+
Px = nonzeros(mean(Mx,1));
14+
Hx = -dot(Px,log2(Px));
15+
z = max(0,Hx);

chapter01/jointEntropy.m

+5-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
function z = jointEntropy(x, y)
2-
% Compute joint entropy H(x,y) of two discrete variables x and y.
3-
% x, y: two vectors of integers of the same length
2+
% Compute joint entropy z=H(x,y) of two discrete variables x and y.
3+
% Input:
4+
% x, y: two integer vector of the same length
5+
% Output:
6+
% z: joint entroy z=H(x,y)
47
% Written by Mo Chen ([email protected]).
58
assert(numel(x) == numel(y));
69
n = numel(x);

chapter01/mutInfo.m

+4-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
function z = mutInfo(x, y)
22
% Compute mutual information I(x,y) of two discrete variables x and y.
3-
% x, y: two vectors of integers of the same length
3+
% Input:
4+
% x, y: two integer vector of the same length
5+
% Output:
6+
% z: mutual information z=I(x,y)
47
% Written by Mo Chen ([email protected]).
58
assert(numel(x) == numel(y));
69
n = numel(x);

chapter01/nmi.m

+5-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
function z = nmi(x, y)
2-
% Compute normalized mutual information I(x,y)/sqrt(H(x)*H(y)).
3-
% x, y: two vectors of integers of the same length
2+
% Compute normalized mutual information I(x,y)/sqrt(H(x)*H(y)) of two discrete variables x and y.
3+
% Input:
4+
% x, y: two integer vector of the same length
5+
% Ouput:
6+
% z: normalized mutual information z=I(x,y)/sqrt(H(x)*H(y))
47
% Written by Mo Chen ([email protected]).
58
assert(numel(x) == numel(y));
69
n = numel(x);

chapter01/nvi.m

+5-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
function z = nvi(x, y)
2-
% Compute normalized variation information (1-I(x,y)/H(x,y)).
3-
% x, y: two vectors of integers of the same length
2+
% Compute normalized variation information z=(1-I(x,y)/H(x,y)) of two discrete variables x and y.
3+
% Input:
4+
% x, y: two integer vector of the same length
5+
% Output:
6+
% z: normalized variation information z=(1-I(x,y)/H(x,y))
47
% Written by Mo Chen ([email protected]).
58
assert(numel(x) == numel(y));
69
n = numel(x);

chapter01/relatEntropy.m

+5-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
function z = relatEntropy (x, y)
2-
% Compute relative entropy (a.k.a KL divergence) KL(p(x)||p(y)) of two discrete variables x and y.
3-
% x, y: two vectors of integers of the same length
2+
% Compute relative entropy (a.k.a KL divergence) z=KL(p(x)||p(y)) of two discrete variables x and y.
3+
% Input:
4+
% x, y: two integer vector of the same length
5+
% Output:
6+
% z: relative entropy (a.k.a KL divergence) z=KL(p(x)||p(y))
47
% Written by Mo Chen ([email protected]).
58
assert(numel(x) == numel(y));
69
n = numel(x);

chapter02/logDirichlet.m

+5-2
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
11
function y = logDirichlet(X, a)
22
% Compute log pdf of a Dirichlet distribution.
3-
% X: d x n data matrix satifying (sum(X,1)==ones(1,n) && X>=0)
4-
% a: d x k parameters
3+
% Input:
4+
% X: d x n data matrix, each column sums to one (sum(X,1)==ones(1,n) && X>=0)
5+
% a: d x k parameter of Dirichlet
56
% y: k x n probability density
7+
% Output:
8+
% y: k x n probability density in logrithm scale y=log p(x)
69
% Written by Mo Chen ([email protected]).
710
X = bsxfun(@times,X,1./sum(X,1));
811
if size(a,1) == 1

chapter02/logGauss.m

+6
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
function y = logGauss(X, mu, sigma)
22
% Compute log pdf of a Gaussian distribution.
3+
% Input:
4+
% X: d x n data matrix
5+
% mu: mean of Gaussian
6+
% sigma: variance of Gaussian
7+
% Output:
8+
% y: probability density in logrithm scale y=log p(x)
39
% Written by Mo Chen ([email protected]).
410

511
[d,n] = size(X);

chapter02/logKde.m

+6-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
function z = logKde (X, Y, sigma)
22
% Compute log pdf of kernel density estimator.
3+
% Input:
4+
% X: d x n data matrix to be evaluate
5+
% Y: d x k data matrix served as database
6+
% Output:
7+
% z: probability density in logrithm scale z=log p(x|y)
38
% Written by Mo Chen ([email protected]).
49
D = bsxfun(@plus,full(dot(X,X,1)),full(dot(Y,Y,1))')-full(2*(Y'*X));
5-
z = logSumExp(D/(-2*sigma^2),1)-0.5*log(2*pi)-log(sigma*size(Y,2));
10+
z = logsumexp(D/(-2*sigma^2),1)-0.5*log(2*pi)-log(sigma*size(Y,2),1);

chapter02/logMn.m

+7-9
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,9 @@
1-
function z = logMn (x, p)
1+
function z = logMn(x, p)
22
% Compute log pdf of a multinomial distribution.
3+
% Input:
4+
% x: d x 1 integer vector
5+
% p: d x 1 probability
6+
% Output:
7+
% z: probability density in logrithm scale z=log p(x)
38
% Written by Mo Chen ([email protected]).
4-
if numel(x) ~= numel(p)
5-
n = numel(x);
6-
x = reshape(x,1,n);
7-
[u,~,label] = unique(x);
8-
x = full(sum(sparse(label,1:n,1,n,numel(u),n),2));
9-
end
10-
z = gammaln(sum(x)+1)-sum(gammaln(x+1))+dot(x,log(p));
11-
endfunction
9+
z = gammaln(sum(x)+1)-sum(gammaln(x+1))+dot(x,log(p));

chapter02/logMvGamma.m

+9-3
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,13 @@
11
function y = logMvGamma(x,d)
2-
% Compute logarithm multivariate Gamma function.
3-
% Gamma_p(x) = pi^(d(d-1)/4) \prod_(j=1)^d Gamma(x+(1-j)/2)
4-
% log(Gamma_p(x)) = d(d-1)/4 log(pi) + \sum_(j=1)^d log(Gamma(x+(1-j)/2))
2+
% Compute logarithm multivariate Gamma function
3+
% which is used in the probability density function of the Wishart and inverse Wishart distributions.
4+
% Gamma_d(x) = pi^(d(d-1)/4) \prod_(j=1)^d Gamma(x+(1-j)/2)
5+
% log(Gamma_d(x)) = d(d-1)/4 log(pi) + \sum_(j=1)^d log(Gamma(x+(1-j)/2))
6+
% Input:
7+
% x: m x n data matrix
8+
% d: dimension
9+
% Output:
10+
% y: m x n logarithm multivariate Gamma
511
% Written by Michael Chen ([email protected]).
612
s = size(x);
713
x = reshape(x,1,prod(s));

chapter02/logSt.m

+7
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,12 @@
11
function y = logSt(X, mu, sigma, v)
22
% Compute log pdf of a Student's t distribution.
3+
% Input:
4+
% X: d x n data matrix
5+
% mu: mean
6+
% sigma: variance
7+
% v: degree of freedom
8+
% Output:
9+
% y: probability density in logrithm scale y=log p(x)
310
% Written by mo Chen ([email protected]).
411
[d,k] = size(mu);
512

chapter02/logVmf.m

+6
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
function y = logVmf(X, mu, kappa)
22
% Compute log pdf of a von Mises-Fisher distribution.
3+
% Input:
4+
% X: d x n data matrix
5+
% mu: d x k mean
6+
% kappa: 1 x k variance
7+
% Output:
8+
% y: k x n probability density in logrithm scale y=log p(x)
39
% Written by Mo Chen ([email protected]).
410
d = size(X,1);
511
c = (d/2-1)*log(kappa)-(d/2)*log(2*pi)-logbesseli(d/2-1,kappa);

chapter02/logWishart.m

+7-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
1-
function y = logWishart(Sigma, v, W)
1+
function y = logWishart(Sigma, W, v)
22
% Compute log pdf of a Wishart distribution.
3+
% Input:
4+
% Sigma: d x d covariance matrix
5+
% W: d x d covariance parameter
6+
% v: degree of freedom
7+
% Output:
8+
% y: probability density in logrithm scale y=log p(Sigma)
39
% Written by Mo Chen ([email protected]).
410
d = length(Sigma);
511
B = -0.5*v*logdet(W)-0.5*v*d*log(2)-logmvgamma(0.5*v,d);

chapter03/demo.m

+10-11
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,17 @@
1-
% Done
2-
% demo for chapter 03
1+
% demos for ch03
32
clear; close all;
43
d = 1;
54
n = 200;
65
[x,t] = linRnd(d,n);
7-
%%
8-
% model = linReg(x,t);
9-
% linPlot(model,x,t);
10-
%%
11-
% [model1,llh1] = linRegEm(x,t);
12-
% plot(llh);
13-
% linPlot(model,x,t);
14-
%%
6+
%% Linear regression
7+
model = linReg(x,t);
8+
plotBar(model,x,t);
9+
%% Empirical Bayesian linear regression via EM
10+
[model1,llh] = linRegEm(x,t);
11+
plot(llh);
12+
plotBar(model,x,t);
13+
%% Empirical Bayesian linear regression via Mackay fix point iteration method
1514
[model,llh] = linRegFp(x,t);
1615
[y, sigma] = linPred(model,x,t);
1716
plot(llh);
18-
linPlot(model,x,t);
17+
plotBar(model,x,t);

chapter03/linPlot.m

+5-3
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
function linPlot(model, X, t)
2-
% Plot linear function and data
3-
% X: 1xn data
4-
% t: 1xn response
2+
% Plot linear function for 1d data data
3+
% Input:
4+
% model: trained model structure
5+
% X: 1 x n data
6+
% t: 1 x n response
57
% Written by Mo Chen ([email protected]).
68
color = [255,228,225]/255; %pink
79
% [x,idx] = sort(x);

chapter03/linPred.m

+6-1
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,13 @@
11
function [y, sigma, p] = linPred(model, X, t)
2-
% Compute linear model reponse y = w'*X+w0 and likelihood
2+
% Compute linear regression model reponse y = w'*X+w0 and likelihood
3+
% Input:
34
% model: trained model structure
45
% X: d x n testing data
56
% t (optional): 1 x n testing response
7+
% Output:
8+
% y: 1 x n prediction
9+
% sigma: variance
10+
% p: 1 x n likelihood of t
611
% Written by Mo Chen ([email protected]).
712
w = model.w;
813
w0 = model.w0;

chapter03/linReg.m

+4
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,11 @@
11
function model = linReg(X, t, lambda)
22
% Fit linear regression model y=w'x+w0
3+
% Input:
34
% X: d x n data
45
% t: 1 x n response
6+
% lambda: regularization parameter
7+
% Output:
8+
% model: trained model structure
59
% Written by Mo Chen ([email protected]).
610
if nargin < 3
711
lambda = 0;

chapter03/linRegFp.m

+7-2
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,13 @@
11
function [model, llh] = linRegFp(X, t, alpha, beta)
2-
% Fit empirical Bayesian linear model with Mackay fixed point method
3-
% (p.168)
2+
% Fit empirical Bayesian linear model with Mackay fixed point method (p.168)
3+
% Input:
44
% X: d x n data
55
% t: 1 x n response
6+
% alpha: prior parameter
7+
% beta: prior parameter
8+
% Output:
9+
% model: trained model structure
10+
% llh: loglikelihood
611
% Written by Mo Chen ([email protected]).
712
if nargin < 3
813
alpha = 0.02;

chapter03/linRnd.m

+7-3
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,14 @@
11
function [X, t] = linRnd(d, n)
22
% Generate data from a linear model p(t|w,x)=G(w'x+w0,sigma), sigma=sqrt(1/beta)
3-
% where w and w0 are generated from Gauss(0,1),
4-
% beta is generated from Gamma(1,1),
5-
% X is generated form [0,1]
3+
% where w and w0 are generated from Gauss(0,1), beta is generated from
4+
% Gamma(1,1), X is generated form [0,1].
5+
% Input:
66
% d: dimension of data
77
% n: number of data
8+
% Output:
9+
% X: d x n data matrix
10+
% t: 1 x n response variable
11+
% Written by Mo Chen ([email protected]).
812
beta = randg; % need statistcs toolbox
913
X = rand(d,n);
1014
w = randn(d,1);

chapter04/binPlot.m

+5-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
function binPlot(model, X, t)
22
% Plot binary classification result for 2d data
3-
% X: 2xn data matrix
4-
% t: 1xn label
3+
% Input:
4+
% model: trained model structure
5+
% X: 2 x n data matrix
6+
% t: 1 x n label
7+
% Written by Mo Chen ([email protected]).
58
assert(size(X,1) == 2);
69
w = model.w;
710
xi = min(X,[],2);

0 commit comments

Comments
 (0)