pset1-vincent/q4_softmaxreg.py at master · DeepNLP/pset1-vincent · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import numpy as np
import random

from cs224d.data_utils import *

from q1_softmax import softmax
from q2_gradcheck import gradcheck_naive
from q3_sgd import load_saved_params

def getSentenceFeature(tokens, wordVectors, sentence):
    """ Obtain the sentence feature for sentiment analysis by averaging its word vectors """
    # Implement computation for the sentence features given a sentence.

    # Inputs:
    # - tokens: a dictionary that maps words to their indices in
    #          the word vector list
    # - wordVectors: word vectors (each row) for all tokens
    # - sentence: a list of words in the sentence of interest

    # Output:
    # - sentVector: feature vector for the sentence

    sentVector = np.zeros((wordVectors.shape[1],))

    ### YOUR CODE HERE
    indices = [tokens[word] for word in sentence]
    sentVector = np.mean(wordVectors[indices], axis=0)
    ### END YOUR CODE

    return sentVector

def softmaxRegression(features, labels, weights, regularization = 0.0, nopredictions = False):
    """ Softmax Regression """
    # Implement softmax regression with weight regularization.

    # Inputs:
    # - features: feature vectors, each row is a feature vector
    # - labels: labels corresponding to the feature vectors
    # - weights: weights of the regressor
    # - regularization: L2 regularization constant

    # Output:
    # - cost: cost of the regressor
    # - grad: gradient of the regressor cost with respect to its
    #        weights
    # - pred: label predictions of the regressor (you might find
    #        np.argmax helpful)

    prob = softmax(features.dot(weights))
    if len(features.shape) > 1:
        N = features.shape[0]
    else:
        N = 1
    # A vectorized implementation of    1/N * sum(cross_entropy(x_i, y_i)) + 1/2*|w|^2
    cost = np.sum(-np.log(prob[range(N), labels])) / N
    cost += 0.5 * regularization * np.sum(weights ** 2)

    ### YOUR CODE HERE: compute the gradients and predictions
    #https://github.com/ZhangBanger/cs224d/blob/master/assignment1/q4_softmaxreg.py
    # NOTE - N is the batch size
    # features is an N x M matrix, M being # features
    # weights is an M X K matrix, K being # classes
    # prob is an N x K matrix (batchSize x classes)
    # labels is a 1-hot (row) vector

    # Get delta, an N x K matrix with CE error signal
    # z = XW, where X = features and W = weights
    # dJ/dz
    delta = np.array(prob)
    delta[range(N), labels] -= 1.

    # dz/dW = 1/N * X * delta
    # dJ/dW = dJ/dz * dz/dW
    grad = features.T.dot(delta) / N

    grad += regularization * weights

    if N > 1:
        pred = np.argmax(prob, axis=1)
    else:
        pred = np.argmax(prob)

    ### END YOUR CODE

    if nopredictions:
        return cost, grad
    else:
        return cost, grad, pred

def accuracy(y, yhat):
    """ Precision for classifier """
    assert(y.shape == yhat.shape)
    return np.sum(y == yhat) * 100.0 / y.size

def softmax_wrapper(features, labels, weights, regularization = 0.0):
    cost, grad, _ = softmaxRegression(features, labels, weights,
        regularization)
    return cost, grad

def sanity_check():
    """
    Run python q4_softmaxreg.py.
    """
    random.seed(314159)
    np.random.seed(265)

    dataset = StanfordSentiment()
    tokens = dataset.tokens()
    nWords = len(tokens)

    _, wordVectors0, _ = load_saved_params()
    wordVectors = (wordVectors0[:nWords,:] + wordVectors0[nWords:,:])
    dimVectors = wordVectors.shape[1]

    dummy_weights = 0.1 * np.random.randn(dimVectors, 5)
    dummy_features = np.zeros((10, dimVectors))
    dummy_labels = np.zeros((10,), dtype=np.int32)
    for i in range(10):
        words, dummy_labels[i] = dataset.getRandomTrainSentence()
        dummy_features[i, :] = getSentenceFeature(tokens, wordVectors, words)
    print ("==== Gradient check for softmax regression ====")
    gradcheck_naive(lambda weights: softmaxRegression(dummy_features,
        dummy_labels, weights, 1.0, nopredictions = True), dummy_weights)

    print ("\n=== Results ===")
    print (softmaxRegression(dummy_features, dummy_labels, dummy_weights, 1.0))

if __name__ == "__main__":
    sanity_check()