paddleseg/models/lraspp.py

# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from functools import partial

import paddle
import paddle.nn as nn
import paddle.nn.functional as F

from paddleseg import utils
from paddleseg.models import layers
from paddleseg.cvlibs import manager


@manager.MODELS.add_component
class LRASPP(nn.Layer):
    """
    Semantic segmentation model with a light R-ASPP head.
    
    The original article refers to
        Howard, Andrew, et al. "Searching for mobilenetv3."
        (https://arxiv.org/pdf/1909.11065.pdf)

    Args:
        num_classes (int): The number of target classes.
        backbone(nn.Layer): Backbone network, such as stdc1net and resnet18. The backbone must
            has feat_channels, of which the length is 5.
        backbone_indices (List(int), optional): The values indicate the indices of backbone output 
            used as the input of the LR-ASPP head.
            Default: [0, 1, 3].
        lraspp_head_inter_chs (List(int), optional): The intermediate channels of LR-ASPP head.
            Default: [32, 64].
        lraspp_head_out_ch (int, optional): The output channels of each ASPP branch in the LR-ASPP head.
            Default: 128
        resize_mode (str, optional): The resize mode for the upsampling operation in the LR-ASPP head.
            Default: bilinear.
        use_gap (bool, optional): If true, use global average pooling in the LR-ASPP head; otherwise, use
            a 49x49 kernel for average pooling.
            Default: True.
        pretrained (str, optional): The path or url of pretrained model. Default: None.
    """

    def __init__(self,
                 num_classes,
                 backbone,
                 backbone_indices=[0, 1, 3],
                 lraspp_head_inter_chs=[32, 64],
                 lraspp_head_out_ch=128,
                 resize_mode='bilinear',
                 use_gap=True,
                 pretrained=None):
        super().__init__()

        # backbone
        assert hasattr(backbone, 'feat_channels'), \
            "The backbone should has feat_channels."
        assert len(backbone.feat_channels) >= len(backbone_indices), \
            f"The length of input backbone_indices ({len(backbone_indices)}) should not be" \
            f"greater than the length of feat_channels ({len(backbone.feat_channels)})."
        assert len(backbone.feat_channels) > max(backbone_indices), \
            f"The max value ({max(backbone_indices)}) of backbone_indices should be " \
            f"less than the length of feat_channels ({len(backbone.feat_channels)})."
        self.backbone = backbone

        assert len(backbone_indices) >= 1, "The lenght of backbone_indices " \
            "should not be lesser than 1"

        # head
        assert len(backbone_indices) == len(
            lraspp_head_inter_chs
        ) + 1, "The length of backbone_indices should be 1 greater than lraspp_head_inter_chs."
        self.backbone_indices = backbone_indices

        self.lraspp_head = LRASPPHead(backbone_indices, backbone.feat_channels,
                                      lraspp_head_inter_chs, lraspp_head_out_ch,
                                      num_classes, resize_mode, use_gap)

        # pretrained
        self.pretrained = pretrained
        self.init_weight()

    def forward(self, x):
        x_hw = x.shape[2:]

        feats_backbone = self.backbone(x)
        assert len(feats_backbone) >= len(self.backbone_indices), \
            f"The nums of backbone feats ({len(feats_backbone)}) should be greater or " \
            f"equal than the nums of backbone_indices ({len(self.backbone_indices)})"

        y = self.lraspp_head(feats_backbone)
        y = F.interpolate(y, x_hw, mode='bilinear', align_corners=False)
        logit_list = [y]

        return logit_list

    def init_weight(self):
        if self.pretrained is not None:
            utils.load_entire_model(self, self.pretrained)


class LRASPPHead(nn.Layer):

    def __init__(self,
                 indices,
                 in_chs,
                 mid_chs,
                 out_ch,
                 n_classes,
                 resize_mode,
                 use_gap,
                 align_corners=False):
        super().__init__()

        self.indices = indices[-2::-1]
        self.in_chs = [in_chs[i] for i in indices[::-1]]
        self.mid_chs = mid_chs[::-1]
        self.convs = nn.LayerList()
        self.conv_ups = nn.LayerList()
        for in_ch, mid_ch in zip(self.in_chs[1:], self.mid_chs):
            self.convs.append(
                nn.Conv2D(in_ch, mid_ch, kernel_size=1, bias_attr=False))
            self.conv_ups.append(layers.ConvBNReLU(out_ch + mid_ch, out_ch, 1))
        self.conv_w = nn.Sequential(
            nn.AvgPool2D(kernel_size=(49, 49), stride=(16, 20))
            if not use_gap else nn.AdaptiveAvgPool2D(1),
            nn.Conv2D(self.in_chs[0], out_ch, 1, bias_attr=False), nn.Sigmoid())
        self.conv_v = layers.ConvBNReLU(self.in_chs[0], out_ch, 1)
        self.conv_t = nn.Conv2D(out_ch, out_ch, kernel_size=1, bias_attr=False)
        self.conv_out = nn.Conv2D(out_ch,
                                  n_classes,
                                  kernel_size=1,
                                  bias_attr=False)

        self.interp = partial(F.interpolate,
                              mode=resize_mode,
                              align_corners=align_corners)

    def forward(self, in_feat_list):
        x = in_feat_list[-1]

        x = self.conv_v(x) * self.interp(self.conv_w(x), x.shape[2:])
        y = self.conv_t(x)

        for idx, conv, conv_up in zip(self.indices, self.convs, self.conv_ups):
            feat = in_feat_list[idx]
            y = self.interp(y, feat.shape[2:])
            y = paddle.concat([y, conv(feat)], axis=1)
            y = conv_up(y)

        y = self.conv_out(y)
        return y