-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbai22_MultiClassSVM.py
More file actions
153 lines (124 loc) · 3.92 KB
/
bai22_MultiClassSVM.py
File metadata and controls
153 lines (124 loc) · 3.92 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
# -*- coding: utf-8 -*-
"""
Created on Fri May 1 17:39:52 2020
@author: phamk
"""
import numpy as np
from random import shuffle
# naive way to calculate loss and grad
def svm_loss_naive(W, X, y, reg):
d, C = W.shape
_, N = X.shape
## naive loss and grad
loss = 0
dW = np.zeros_like(W)
for n in range(N):
xn = X[:, n]
score = W.T.dot(xn)
for j in range(C):
if j == y[n]:
continue
margin = 1 - score[y[n]] + score[j]
if margin > 0:
loss += margin
dW[:, j] += xn
dW[:, y[n]] -= xn
loss /= N
loss += 0.5*reg*np.sum(W * W) # regularization
dW /= N
dW += reg*W # gradient off regularization
return loss, dW
# random, small data
N, C, d = 10, 3, 5
reg = .1
W = np.random.randn(d, C)
X = np.random.randn(d, N)
y = np.random.randint(C, size = N)
# sanity check
print('loss without regularization:', svm_loss_naive(W, X, y, 0)[0])
print('loss with regularization:', svm_loss_naive(W, X, y, .1)[0])
f = lambda W: svm_loss_naive(W, X, y, .1)[0]
# for checking if calculated grad is correct
def numerical_grad_general(W, f):
eps = 1e-6
g = np.zeros_like(W)
# flatening variable -> 1d. Then we need
# only one for loop
W_flattened = W.flatten()
g_flattened = np.zeros_like(W_flattened)
for i in range(W.size):
W_p = W_flattened.copy()
W_n = W_flattened.copy()
W_p[i] += eps
W_n[i] -= eps
# back to shape of W
W_p = W_p.reshape(W.shape)
W_n = W_n.reshape(W.shape)
g_flattened[i] = (f(W_p) - f(W_n))/(2*eps)
# convert back to original shape
return g_flattened.reshape(W.shape)
# compare two ways of computing gradient
g1 = svm_loss_naive(W, X, y, .1)[1]
g2 = numerical_grad_general(W, f)
print('gradient difference: %f' %np.linalg.norm(g1 - g2))
# this should be very small
# more efficient way to compute loss and grad
def svm_loss_vectorized(W, X, y, reg):
d, C = W.shape
_, N = X.shape
loss = 0
dW = np.zeros_like(W)
Z = W.T.dot(X)
correct_class_score = np.choose(y, Z).reshape(N,1).T
margins = np.maximum(0, Z - correct_class_score + 1)
margins[y, np.arange(margins.shape[1])] = 0
loss = np.sum(margins, axis = (0, 1))
loss /= N
loss += 0.5 * reg * np.sum(W * W)
F = (margins > 0).astype(int)
F[y, np.arange(F.shape[1])] = np.sum(-F, axis = 0)
dW = X.dot(F.T)/N + reg*W
return loss, dW
N, C, d = 49000, 10, 3073
reg = .1
W = np.random.randn(d, C)
X = np.random.randn(d, N)
y = np.random.randint(C, size = N)
import time
t1 = time.time()
l1, dW1 = svm_loss_naive(W, X, y, reg)
t2 = time.time()
print('Naive : run time:', t2 - t1, '(s)')
t1 = time.time()
l2, dW2 = svm_loss_vectorized(W, X, y, reg)
t2 = time.time()
print('Vectorized: run time:', t2 - t1, '(s)')
print('loss difference:', np.linalg.norm(l1 - l2))
print('gradient difference:', np.linalg.norm(dW1 - dW2))
# Mini-batch gradient descent
def multiclass_svm_GD(X, y, Winit, reg, lr=.1, \
batch_size = 100, num_iters = 1000, print_every = 100):
W = Winit
loss_history = np.zeros((num_iters))
for it in range(num_iters):
# randomly pick a batch of X
idx = np.random.choice(X.shape[1], batch_size)
X_batch = X[:, idx]
y_batch = y[idx]
loss_history[it], dW = \
svm_loss_vectorized(W, X_batch, y_batch, reg)
W -= lr*dW
if it % print_every == 1:
print ('it %d/%d, loss = %f' \
%(it, num_iters, loss_history[it]))
return W, loss_history
N, C, d = 49000, 10, 3073
reg = .1
W = np.random.randn(d, C)
X = np.random.randn(d, N)
y = np.random.randint(C, size = N)
W, loss_history = multiclass_svm_GD(X, y, W, reg)
import matplotlib.pyplot as plt
# plot loss as a function of iteration
plt.plot(loss_history)
plt.show()