-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlab3.py
210 lines (165 loc) · 7.28 KB
/
lab3.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
# coding: utf-8
# # Lab 3: Bayes Classifier and Boosting
# ## Jupyter notebooks
#
# In this lab, you can use Jupyter <https://jupyter.org/> to get a nice layout of your code and plots in one document. However, you may also use Python as usual, without Jupyter.
#
# If you have Python and pip, you can install Jupyter with `sudo pip install jupyter`. Otherwise you can follow the instruction on <http://jupyter.readthedocs.org/en/latest/install.html>.
#
# And that is everything you need! Now use a terminal to go into the folder with the provided lab files. Then run `jupyter notebook` to start a session in that folder. Click `lab3.ipynb` in the browser window that appeared to start this very notebook. You should click on the cells in order and either press `ctrl+enter` or `run cell` in the toolbar above to evaluate all the expressions.
# ## Import the libraries
#
# Check out `labfuns.py` if you are interested in the details.
import numpy as np
from scipy import misc
from imp import reload
from labfuns import *
from sklearn import decomposition
from matplotlib.colors import ColorConverter
# ## Bayes classifier functions to implement
#
# The lab descriptions state what each function should do.
# Note that you do not need to handle the W argument for this part
# in: labels - N x 1 vector of class labels
# out: prior - C x 1 vector of class priors
def computePrior(labels,W=None):
# Your code here
return prior
# Note that you do not need to handle the W argument for this part
# in: X - N x d matrix of N data points
# labels - N x 1 vector of class labels
# out: mu - C x d matrix of class means
# sigma - d x d x C matrix of class covariances
def mlParams(X,labels,W=None):
# Your code here
return mu, sigma
# in: X - N x d matrix of M data points
# prior - C x 1 vector of class priors
# mu - C x d matrix of class means
# sigma - d x d x C matrix of class covariances
# out: h - N x 1 class predictions for test points
def classify(X,prior,mu,sigma,covdiag=True):
# Your code here
# Example code for solving a psd system
# L = np.linalg.cholesky(A)
# y = np.linalg.solve(L,b)
# x = np.linalg.solve(L.H,y)
return h
# ## Test the Maximum Likelihood estimates
#
# Call `genBlobs` and `plotGaussian` to verify your estimates.
X, labels = genBlobs(centers=5)
mu, sigma = mlParams(X,labels)
plotGaussian(X,labels,mu,sigma)
# ## Boosting functions to implement
#
# The lab descriptions state what each function should do.
# in: X - N x d matrix of N data points
# labels - N x 1 vector of class labels
# T - number of boosting iterations
# out: priors - length T list of prior as above
# mus - length T list of mu as above
# sigmas - length T list of sigma as above
# alphas - T x 1 vector of vote weights
def trainBoost(X,labels,T=5,covdiag=True):
# Your code here
return priors,mus,sigmas,alphas
# in: X - N x d matrix of N data points
# priors - length T list of prior as above
# mus - length T list of mu as above
# sigmas - length T list of sigma as above
# alphas - T x 1 vector of vote weights
# out: yPred - N x 1 class predictions for test points
def classifyBoost(X,priors,mus,sigmas,alphas,covdiag=True):
# Your code here
return c
# ## Define our testing function
#
# The function below, `testClassifier`, will be used to try out the different datasets. `fetchDataset` can be provided with any of the dataset arguments `wine`, `iris`, `olivetti` and `vowel`. Observe that we split the data into a **training** and a **testing** set.
np.set_printoptions(threshold=np.nan)
np.set_printoptions(precision=25)
np.set_printoptions(linewidth=200)
def testClassifier(dataset='iris',dim=0,split=0.7,doboost=False,boostiter=5,covdiag=True,ntrials=100):
X,y,pcadim = fetchDataset(dataset)
means = np.zeros(ntrials,);
for trial in range(ntrials):
# xTr,yTr,xTe,yTe,trIdx,teIdx = trteSplit(X,y,split)
xTr,yTr,xTe,yTe,trIdx,teIdx = trteSplitEven(X,y,split)
# Do PCA replace default value if user provides it
if dim > 0:
pcadim = dim
if pcadim > 0:
pca = decomposition.PCA(n_components=pcadim)
pca.fit(xTr)
xTr = pca.transform(xTr)
xTe = pca.transform(xTe)
## Boosting
if doboost:
# Compute params
priors,mus,sigmas,alphas = trainBoost(xTr,yTr,T=boostiter)
yPr = classifyBoost(xTe,priors,mus,sigmas,alphas,covdiag=covdiag)
else:
## Simple
# Compute params
prior = computePrior(yTr)
mu, sigma = mlParams(xTr,yTr)
# Predict
yPr = classify(xTe,prior,mu,sigma,covdiag=covdiag)
# Compute classification error
print "Trial:",trial,"Accuracy",100*np.mean((yPr==yTe).astype(float))
means[trial] = 100*np.mean((yPr==yTe).astype(float))
print "Final mean classification accuracy ", np.mean(means), "with standard deviation", np.std(means)
# ## Plotting the decision boundary
#
# This is some code that you can use for plotting the decision boundary
# boundary in the last part of the lab.
def plotBoundary(dataset='iris',split=0.7,doboost=False,boostiter=5,covdiag=True):
X,y,pcadim = fetchDataset(dataset)
xTr,yTr,xTe,yTe,trIdx,teIdx = trteSplitEven(X,y,split)
pca = decomposition.PCA(n_components=2)
pca.fit(xTr)
xTr = pca.transform(xTr)
xTe = pca.transform(xTe)
pX = np.vstack((xTr, xTe))
py = np.hstack((yTr, yTe))
if doboost:
## Boosting
# Compute params
priors,mus,sigmas,alphas = trainBoost(xTr,yTr,T=boostiter,covdiag=covdiag)
else:
## Simple
# Compute params
prior = computePrior(yTr)
mu, sigma = mlParams(xTr,yTr)
xRange = np.arange(np.min(pX[:,0]),np.max(pX[:,0]),np.abs(np.max(pX[:,0])-np.min(pX[:,0]))/100.0)
yRange = np.arange(np.min(pX[:,1]),np.max(pX[:,1]),np.abs(np.max(pX[:,1])-np.min(pX[:,1]))/100.0)
grid = np.zeros((yRange.size, xRange.size))
for (xi, xx) in enumerate(xRange):
for (yi, yy) in enumerate(yRange):
if doboost:
## Boosting
grid[yi,xi] = classifyBoost(np.matrix([[xx, yy]]),priors,mus,sigmas,alphas,covdiag=covdiag)
else:
## Simple
grid[yi,xi] = classify(np.matrix([[xx, yy]]),prior,mu,sigma,covdiag=covdiag)
classes = range(np.min(y), np.max(y)+1)
ys = [i+xx+(i*xx)**2 for i in range(len(classes))]
colormap = cm.rainbow(np.linspace(0, 1, len(ys)))
plt.hold(True)
conv = ColorConverter()
for (color, c) in zip(colormap, classes):
try:
CS = plt.contour(xRange,yRange,(grid==c).astype(float),15,linewidths=0.25,colors=conv.to_rgba_array(color))
except ValueError:
pass
xc = pX[py == c, :]
plt.scatter(xc[:,0],xc[:,1],marker='o',c=color,s=40,alpha=0.5)
plt.xlim(np.min(pX[:,0]),np.max(pX[:,0]))
plt.ylim(np.min(pX[:,1]),np.max(pX[:,1]))
plt.show()
# ## Run some experiments
#
# Call the `testClassifier` and `plotBoundary` functions for this part.
# Example usage of the functions
testClassifier(dataset='iris',split=0.7,doboost=False,boostiter=5,covdiag=True)
plotBoundary(dataset='iris',split=0.7,doboost=False,boostiter=5,covdiag=True)