nd891-project-facial-keypoint-detection/models.py at master · sdonatti/nd891-project-facial-keypoint-detection · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import numpy as np
import torch.nn as nn


# ----------------------------------------------------------------------------
def initialize_xavier(module):
    """
    Initializes convolutional and linear module weights according to
    `Xavier <http://pytorch.org/docs/stable/nn.html#torch.nn.init.xavier_uniform_>`_
    and sets biases to an arbitrary small value close to zero

    Parameters
    ----------
    module : torch.nn.Module
        The PyTorch module (e.g., torch.nn.Linear)
    """
    # filter according to the module type
    if isinstance(module, nn.Conv2d) or isinstance(module, nn.Linear):
        # initialize module weight with xavier uniform
        nn.init.xavier_uniform_(module.weight)
        # initialize module bias with small values close to zero
        if module.bias is not None:
            module.bias.data.fill_(0.01)
# ----------------------------------------------------------------------------


# ----------------------------------------------------------------------------
def count_parameters(module):
    """
    Counts the trainable parameters of the class PyTorch model

    Parameters
    ----------
    module : torch.nn.Module
        The PyTorch model (e.g., Net)

    Returns
    -------
    int
        The model's trainable parameter number
    """
    # calculate and return the number of trainable parameters
    return np.sum(p.numel() for p in module.parameters() if p.requires_grad)
# ----------------------------------------------------------------------------


# ----------------------------------------------------------------------------
class Flatten(nn.Module):
    """
    PyTorch layer that flattens tensors
    """

    def __init__(self):
        # initialize parent class variables
        super(Flatten, self).__init__()

    def forward(self, thick_tensor):
        # flatten the input tensor
        flat_tensor = thick_tensor.view(thick_tensor.size()[0], -1)
        # return the flatten input tensor
        return flat_tensor
# ----------------------------------------------------------------------------


# ----------------------------------------------------------------------------
class Net(nn.Module):
    """
    PyTorch model that predicts facial landmarks
    """

    def __init__(self):
        # initialize parent class variables
        super(Net, self).__init__()

        # Define all the layers of this CNN, the only requirements are:
        # 1. This network takes in a square (same width and height), grayscale image as input
        # 2. It ends with a linear layer that represents the keypoints
        # it's suggested that you make this last layer output 136 values, 2 for each of the 68 keypoint (x, y) pairs

        # As an example, you've been given a convolutional layer, which you may (but don't have to) change:
        # 1 input image channel (grayscale), 32 output channels/feature maps, 5x5 square convolution kernel
        # self.conv1 = nn.Conv2d(1, 32, 5)

        # Note that among the layers to add, consider including:
        # maxpooling layers, multiple conv layers, fully-connected layers,
        # and other layers (such as dropout or batch normalization) to avoid overfitting

        # define the feature extractor backbone
        self.extractor = nn.Sequential(nn.Conv2d(1, 8, kernel_size=5, stride=2, padding=2, bias=False),
                                       nn.BatchNorm2d(8),
                                       nn.ReLU(inplace=True),
                                       nn.Conv2d(8, 16, kernel_size=3, stride=2, padding=1, bias=False),
                                       nn.BatchNorm2d(16),
                                       nn.ReLU(inplace=True),
                                       nn.Conv2d(16, 32, kernel_size=3, stride=2, padding=1, bias=False),
                                       nn.BatchNorm2d(32),
                                       nn.ReLU(inplace=True),
                                       nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1, bias=False),
                                       nn.BatchNorm2d(64),
                                       nn.ReLU(inplace=True),
                                       nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1, bias=False),
                                       nn.BatchNorm2d(128),
                                       nn.ReLU(inplace=True),
                                       nn.Conv2d(128, 256, kernel_size=3, stride=2, padding=1, bias=False),
                                       nn.BatchNorm2d(256),
                                       nn.ReLU(inplace=True),
                                       nn.Conv2d(256, 512, kernel_size=1, stride=2, padding=0, bias=False),
                                       nn.BatchNorm2d(512),
                                       nn.ReLU(inplace=True),
                                       Flatten())

        # define the keypoint regressor
        self.regressor = nn.Sequential(nn.Linear(512*2*2, 136))

    def forward(self, x):
        # Define the feed forward behavior of this model
        # x is the input image and, as an example, here you may choose to include a pool/conv step:
        # x = self.pool(F.relu(self.conv1(x)))

        # extract features from the input image
        x = self.extractor(x)
        # estimate the facial landmarks from image features
        x = self.regressor(x)

        # a modified x, having gone through all the layers of your model, should be returned
        return x
# ----------------------------------------------------------------------------