Skip to content
This repository was archived by the owner on Jul 2, 2021. It is now read-only.

Commit a029d2f

Browse files
authored
Merge pull request #685 from Hakuyume/fpn
Feature Pyramid Networks
2 parents 9629c30 + 0f03307 commit a029d2f

File tree

25 files changed

+2166
-3
lines changed

25 files changed

+2166
-3
lines changed

chainercv/links/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
from chainercv.links.model.pixelwise_softmax_classifier import PixelwiseSoftmaxClassifier # NOQA
88

99
from chainercv.links.model.faster_rcnn.faster_rcnn_vgg import FasterRCNNVGG16 # NOQA
10+
from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import FasterRCNNFPNResNet101 # NOQA
11+
from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import FasterRCNNFPNResNet50 # NOQA
1012
from chainercv.links.model.resnet import ResNet101 # NOQA
1113
from chainercv.links.model.resnet import ResNet152 # NOQA
1214
from chainercv.links.model.resnet import ResNet50 # NOQA
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
from chainercv.links.model.fpn.faster_rcnn import FasterRCNN # NOQA
2+
from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import FasterRCNNFPNResNet101 # NOQA
3+
from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import FasterRCNNFPNResNet50 # NOQA
4+
from chainercv.links.model.fpn.fpn import FPN # NOQA
5+
from chainercv.links.model.fpn.head import Head # NOQA
6+
from chainercv.links.model.fpn.head import head_loss_post # NOQA
7+
from chainercv.links.model.fpn.head import head_loss_pre # NOQA
8+
from chainercv.links.model.fpn.rpn import RPN # NOQA
9+
from chainercv.links.model.fpn.rpn import rpn_loss # NOQA
Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,172 @@
1+
from __future__ import division
2+
3+
import numpy as np
4+
5+
import chainer
6+
from chainer.backends import cuda
7+
8+
from chainercv import transforms
9+
10+
11+
class FasterRCNN(chainer.Chain):
12+
"""Base class of Feature Pyramid Networks.
13+
14+
This is a base class of Feature Pyramid Networks [#]_.
15+
16+
.. [#] Tsung-Yi Lin et al.
17+
Feature Pyramid Networks for Object Detection. CVPR 2017
18+
19+
Args:
20+
extractor (Link): A link that extracts feature maps.
21+
This link must have :obj:`scales`, :obj:`mean` and
22+
:meth:`__call__`.
23+
rpn (Link): A link that has the same interface as
24+
:class:`~chainercv.links.model.fpn.RPN`.
25+
Please refer to the documentation found there.
26+
head (Link): A link that has the same interface as
27+
:class:`~chainercv.links.model.fpn.Head`.
28+
Please refer to the documentation found there.
29+
30+
Parameters:
31+
nms_thresh (float): The threshold value
32+
for :func:`~chainercv.utils.non_maximum_suppression`.
33+
The default value is :obj:`0.45`.
34+
This value can be changed directly or by using :meth:`use_preset`.
35+
score_thresh (float): The threshold value for confidence score.
36+
If a bounding box whose confidence score is lower than this value,
37+
the bounding box will be suppressed.
38+
The default value is :obj:`0.6`.
39+
This value can be changed directly or by using :meth:`use_preset`.
40+
41+
"""
42+
43+
_min_size = 800
44+
_max_size = 1333
45+
_stride = 32
46+
47+
def __init__(self, extractor, rpn, head):
48+
super(FasterRCNN, self).__init__()
49+
with self.init_scope():
50+
self.extractor = extractor
51+
self.rpn = rpn
52+
self.head = head
53+
54+
self.use_preset('visualize')
55+
56+
def use_preset(self, preset):
57+
"""Use the given preset during prediction.
58+
59+
This method changes values of :obj:`nms_thresh` and
60+
:obj:`score_thresh`. These values are a threshold value
61+
used for non maximum suppression and a threshold value
62+
to discard low confidence proposals in :meth:`predict`,
63+
respectively.
64+
65+
If the attributes need to be changed to something
66+
other than the values provided in the presets, please modify
67+
them by directly accessing the public attributes.
68+
69+
Args:
70+
preset ({'visualize', 'evaluate'}): A string to determine the
71+
preset to use.
72+
"""
73+
74+
if preset == 'visualize':
75+
self.nms_thresh = 0.5
76+
self.score_thresh = 0.7
77+
elif preset == 'evaluate':
78+
self.nms_thresh = 0.5
79+
self.score_thresh = 0.05
80+
else:
81+
raise ValueError('preset must be visualize or evaluate')
82+
83+
def __call__(self, x):
84+
assert(not chainer.config.train)
85+
hs = self.extractor(x)
86+
rpn_locs, rpn_confs = self.rpn(hs)
87+
anchors = self.rpn.anchors(h.shape[2:] for h in hs)
88+
rois, roi_indices = self.rpn.decode(
89+
rpn_locs, rpn_confs, anchors, x.shape)
90+
rois, roi_indices = self.head.distribute(rois, roi_indices)
91+
head_locs, head_confs = self.head(hs, rois, roi_indices)
92+
return rois, roi_indices, head_locs, head_confs
93+
94+
def predict(self, imgs):
95+
"""Detect objects from images.
96+
97+
This method predicts objects for each image.
98+
99+
Args:
100+
imgs (iterable of numpy.ndarray): Arrays holding images.
101+
All images are in CHW and RGB format
102+
and the range of their value is :math:`[0, 255]`.
103+
104+
Returns:
105+
tuple of lists:
106+
This method returns a tuple of three lists,
107+
:obj:`(bboxes, labels, scores)`.
108+
109+
* **bboxes**: A list of float arrays of shape :math:`(R, 4)`, \
110+
where :math:`R` is the number of bounding boxes in a image. \
111+
Each bounding box is organized by \
112+
:math:`(y_{min}, x_{min}, y_{max}, x_{max})` \
113+
in the second axis.
114+
* **labels** : A list of integer arrays of shape :math:`(R,)`. \
115+
Each value indicates the class of the bounding box. \
116+
Values are in range :math:`[0, L - 1]`, where :math:`L` is the \
117+
number of the foreground classes.
118+
* **scores** : A list of float arrays of shape :math:`(R,)`. \
119+
Each value indicates how confident the prediction is.
120+
121+
"""
122+
123+
sizes = [img.shape[1:] for img in imgs]
124+
x, scales = self.prepare(imgs)
125+
126+
with chainer.using_config('train', False), chainer.no_backprop_mode():
127+
rois, roi_indices, head_locs, head_confs = self(x)
128+
bboxes, labels, scores = self.head.decode(
129+
rois, roi_indices, head_locs, head_confs,
130+
scales, sizes, self.nms_thresh, self.score_thresh)
131+
132+
bboxes = [cuda.to_cpu(bbox) for bbox in bboxes]
133+
labels = [cuda.to_cpu(label) for label in labels]
134+
scores = [cuda.to_cpu(score) for score in scores]
135+
return bboxes, labels, scores
136+
137+
def prepare(self, imgs):
138+
"""Preprocess images.
139+
140+
Args:
141+
imgs (iterable of numpy.ndarray): Arrays holding images.
142+
All images are in CHW and RGB format
143+
and the range of their value is :math:`[0, 255]`.
144+
145+
Returns:
146+
Two arrays: preprocessed images and \
147+
scales that were caluclated in prepocessing.
148+
149+
"""
150+
151+
scales = []
152+
resized_imgs = []
153+
for img in imgs:
154+
_, H, W = img.shape
155+
scale = self._min_size / min(H, W)
156+
if scale * max(H, W) > self._max_size:
157+
scale = self._max_size / max(H, W)
158+
scales.append(scale)
159+
H, W = int(H * scale), int(W * scale)
160+
img = transforms.resize(img, (H, W))
161+
img -= self.extractor.mean
162+
resized_imgs.append(img)
163+
164+
size = np.array([im.shape[1:] for im in resized_imgs]).max(axis=0)
165+
size = (np.ceil(size / self._stride) * self._stride).astype(int)
166+
x = np.zeros((len(imgs), 3, size[0], size[1]), dtype=np.float32)
167+
for i, img in enumerate(resized_imgs):
168+
_, H, W = img.shape
169+
x[i, :, :H, :W] = img
170+
171+
x = self.xp.array(x)
172+
return x, scales
Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
from __future__ import division
2+
3+
import chainer
4+
import chainer.functions as F
5+
import chainer.links as L
6+
7+
from chainercv.links.model.fpn.faster_rcnn import FasterRCNN
8+
from chainercv.links.model.fpn.fpn import FPN
9+
from chainercv.links.model.fpn.head import Head
10+
from chainercv.links.model.fpn.rpn import RPN
11+
from chainercv.links.model.resnet import ResNet101
12+
from chainercv.links.model.resnet import ResNet50
13+
from chainercv import utils
14+
15+
16+
class FasterRCNNFPNResNet(FasterRCNN):
17+
"""Base class for FasterRCNNFPNResNet50 and FasterRCNNFPNResNet101.
18+
19+
A subclass of this class should have :obj:`_base` and :obj:`_models`.
20+
"""
21+
22+
def __init__(self, n_fg_class=None, pretrained_model=None):
23+
param, path = utils.prepare_pretrained_model(
24+
{'n_fg_class': n_fg_class}, pretrained_model, self._models)
25+
26+
base = self._base(n_class=1, arch='he')
27+
base.pick = ('res2', 'res3', 'res4', 'res5')
28+
base.pool1 = lambda x: F.max_pooling_2d(
29+
x, 3, stride=2, pad=1, cover_all=False)
30+
base.remove_unused()
31+
extractor = FPN(
32+
base, len(base.pick), (1 / 4, 1 / 8, 1 / 16, 1 / 32, 1 / 64))
33+
34+
super(FasterRCNNFPNResNet, self).__init__(
35+
extractor=extractor,
36+
rpn=RPN(extractor.scales),
37+
head=Head(param['n_fg_class'] + 1, extractor.scales),
38+
)
39+
40+
if path == 'imagenet':
41+
_copyparams(
42+
self.extractor.base,
43+
self._base(pretrained_model='imagenet', arch='he'))
44+
elif path:
45+
chainer.serializers.load_npz(path, self)
46+
47+
48+
class FasterRCNNFPNResNet50(FasterRCNNFPNResNet):
49+
"""Feature Pyramid Networks with ResNet-50.
50+
51+
This is a model of Feature Pyramid Networks [#]_.
52+
This model uses :class:`~chainercv.links.ResNet50` as
53+
its base feature extractor.
54+
55+
.. [#] Tsung-Yi Lin et al.
56+
Feature Pyramid Networks for Object Detection. CVPR 2017
57+
58+
Args:
59+
n_fg_class (int): The number of classes excluding the background.
60+
pretrained_model (string): The weight file to be loaded.
61+
This can take :obj:`'coco'`, `filepath` or :obj:`None`.
62+
The default value is :obj:`None`.
63+
64+
* :obj:`'coco'`: Load weights trained on train split of \
65+
MS COCO 2017. \
66+
The weight file is downloaded and cached automatically. \
67+
:obj:`n_fg_class` must be :obj:`80` or :obj:`None`.
68+
* :obj:`'imagenet'`: Load weights of ResNet-50 trained on \
69+
ImageNet. \
70+
The weight file is downloaded and cached automatically. \
71+
This option initializes weights partially and the rests are \
72+
initialized randomly. In this case, :obj:`n_fg_class` \
73+
can be set to any number.
74+
* `filepath`: A path of npz file. In this case, :obj:`n_fg_class` \
75+
must be specified properly.
76+
* :obj:`None`: Do not load weights.
77+
78+
"""
79+
80+
_base = ResNet50
81+
_models = {
82+
'coco': {
83+
'param': {'n_fg_class': 80},
84+
'url': 'https://chainercv-models.preferred.jp/'
85+
'faster_rcnn_fpn_resnet50_coco_trained_2018_12_13.npz',
86+
'cv2': True
87+
},
88+
}
89+
90+
91+
class FasterRCNNFPNResNet101(FasterRCNNFPNResNet):
92+
"""Feature Pyramid Networks with ResNet-101.
93+
94+
This is a model of Feature Pyramid Networks [#]_.
95+
This model uses :class:`~chainercv.links.ResNet101` as
96+
its base feature extractor.
97+
98+
.. [#] Tsung-Yi Lin et al.
99+
Feature Pyramid Networks for Object Detection. CVPR 2017
100+
101+
Args:
102+
n_fg_class (int): The number of classes excluding the background.
103+
pretrained_model (string): The weight file to be loaded.
104+
This can take :obj:`'coco'`, `filepath` or :obj:`None`.
105+
The default value is :obj:`None`.
106+
107+
* :obj:`'coco'`: Load weights trained on train split of \
108+
MS COCO 2017. \
109+
The weight file is downloaded and cached automatically. \
110+
:obj:`n_fg_class` must be :obj:`80` or :obj:`None`.
111+
* :obj:`'imagenet'`: Load weights of ResNet-101 trained on \
112+
ImageNet. \
113+
The weight file is downloaded and cached automatically. \
114+
This option initializes weights partially and the rests are \
115+
initialized randomly. In this case, :obj:`n_fg_class` \
116+
can be set to any number.
117+
* `filepath`: A path of npz file. In this case, :obj:`n_fg_class` \
118+
must be specified properly.
119+
* :obj:`None`: Do not load weights.
120+
121+
"""
122+
123+
_base = ResNet101
124+
_models = {
125+
'coco': {
126+
'param': {'n_fg_class': 80},
127+
'url': 'https://chainercv-models.preferred.jp/'
128+
'faster_rcnn_fpn_resnet101_coco_trained_2018_12_13.npz',
129+
'cv2': True
130+
},
131+
}
132+
133+
134+
def _copyparams(dst, src):
135+
if isinstance(dst, chainer.Chain):
136+
for link in dst.children():
137+
_copyparams(link, src[link.name])
138+
elif isinstance(dst, chainer.ChainList):
139+
for i, link in enumerate(dst):
140+
_copyparams(link, src[i])
141+
else:
142+
dst.copyparams(src)
143+
if isinstance(dst, L.BatchNormalization):
144+
dst.avg_mean = src.avg_mean
145+
dst.avg_var = src.avg_var

chainercv/links/model/fpn/fpn.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
import chainer
2+
import chainer.functions as F
3+
from chainer import initializers
4+
import chainer.links as L
5+
6+
7+
class FPN(chainer.Chain):
8+
"""An extractor class of Feature Pyramid Networks.
9+
10+
This class wraps a feature extractor and provides
11+
multi-scale features.
12+
13+
Args:
14+
base (Link): A base feature extractor.
15+
It should have :meth:`__call__` and :obj:`mean`.
16+
:meth:`__call__` should take a batch of images and return
17+
feature maps of them. The size of the :math:`k+1`-th feature map
18+
should be the half as that of the :math:`k`-th feature map.
19+
n_base_output (int): The number of feature maps
20+
that :obj:`base` returns.
21+
scales (tuple of floats): The scales of feature maps.
22+
23+
"""
24+
25+
def __init__(self, base, n_base_output, scales):
26+
super(FPN, self).__init__()
27+
with self.init_scope():
28+
self.base = base
29+
self.inner = chainer.ChainList()
30+
self.outer = chainer.ChainList()
31+
32+
init = {'initialW': initializers.GlorotNormal()}
33+
for _ in range(n_base_output):
34+
self.inner.append(L.Convolution2D(256, 1, **init))
35+
self.outer.append(L.Convolution2D(256, 3, pad=1, **init))
36+
37+
self.scales = scales
38+
39+
@property
40+
def mean(self):
41+
return self.base.mean
42+
43+
def __call__(self, x):
44+
hs = list(self.base(x))
45+
46+
for i in reversed(range(len(hs))):
47+
hs[i] = self.inner[i](hs[i])
48+
if i + 1 < len(hs):
49+
hs[i] += F.unpooling_2d(hs[i + 1], 2, cover_all=False)
50+
51+
for i in range(len(hs)):
52+
hs[i] = self.outer[i](hs[i])
53+
54+
while len(hs) < len(self.scales):
55+
hs.append(F.max_pooling_2d(hs[-1], 1, stride=2, cover_all=False))
56+
57+
return hs

0 commit comments

Comments
 (0)