chainer · yuyu2172 · Nov 20, 2018 · Nov 20, 2018 · Nov 20, 2018 · Nov 20, 2018
diff --git a/chainercv/datasets/__init__.py b/chainercv/datasets/__init__.py
@@ -12,8 +12,14 @@
 from chainercv.datasets.cityscapes.cityscapes_utils import cityscapes_semantic_segmentation_label_names  # NOQA
 from chainercv.datasets.coco.coco_bbox_dataset import COCOBboxDataset  # NOQA
 from chainercv.datasets.coco.coco_instance_segmentation_dataset import COCOInstanceSegmentationDataset  # NOQA
+from chainercv.datasets.coco.coco_panoptic_segmentation_dataset import COCOPanopticSegmentationDataset  # NOQA
+from chainercv.datasets.coco.coco_semantic_segmentation_dataset import COCOSemanticSegmentationDataset  # NOQA
 from chainercv.datasets.coco.coco_utils import coco_bbox_label_names  # NOQA
 from chainercv.datasets.coco.coco_utils import coco_instance_segmentation_label_names  # NOQA
+from chainercv.datasets.coco.coco_utils import coco_panoptic_segmentation_label_colors  # NOQA
+from chainercv.datasets.coco.coco_utils import coco_panoptic_segmentation_label_names  # NOQA
+from chainercv.datasets.coco.coco_utils import coco_semantic_segmentation_label_names  # NOQA
+from chainercv.datasets.coco.coco_utils import coco_semantic_segmentation_label_colors  # NOQA
 from chainercv.datasets.cub.cub_label_dataset import CUBLabelDataset  # NOQA
 from chainercv.datasets.cub.cub_point_dataset import CUBPointDataset  # NOQA
 from chainercv.datasets.cub.cub_utils import cub_label_names  # NOQA

diff --git a/chainercv/datasets/coco/coco_bbox_dataset.py b/chainercv/datasets/coco/coco_bbox_dataset.py
@@ -83,7 +83,7 @@ def __init__(self, data_dir='auto', split='train', year='2017',
         else:
             img_split = 'train'
         if data_dir == 'auto':
-            data_dir = get_coco(split, img_split, year)
+            data_dir = get_coco(split, img_split, year, 'instances')
 
         self.img_root = os.path.join(
             data_dir, 'images', '{}{}'.format(img_split, year))

diff --git a/chainercv/datasets/coco/coco_instance_segmentation_dataset.py b/chainercv/datasets/coco/coco_instance_segmentation_dataset.py
@@ -85,7 +85,7 @@ def __init__(
         else:
             img_split = 'train'
         if data_dir == 'auto':
-            data_dir = get_coco(split, img_split, year)
+            data_dir = get_coco(split, img_split, year, 'instances')
 
         self.img_root = os.path.join(
             data_dir, 'images', '{}{}'.format(img_split, year))

diff --git a/chainercv/datasets/coco/coco_panoptic_segmentation_dataset.py b/chainercv/datasets/coco/coco_panoptic_segmentation_dataset.py
@@ -0,0 +1,102 @@
+import json
+import numpy as np
+import os
+
+from chainercv import utils
+from chainercv.chainer_experimental.datasets.sliceable import GetterDataset
+from chainercv.datasets.coco.coco_utils import get_coco
+
+
+def _rgb2id(color):
+    return color[0] + 256 * color[1] + 256 * 256 * color[2]
+
+
+class COCOPanopticSegmentationDataset(GetterDataset):
+
+    def __init__(
+            self, data_dir='auto', split='train',
+            use_crowded=False, return_crowded=False,
+            return_area=False):
+        super(COCOPanopticSegmentationDataset, self).__init__()
+        self.use_crowded = use_crowded
+
+        if data_dir == 'auto':
+            data_dir = get_coco(split, split, '2017', 'panoptic')
+
+        self.img_root = os.path.join(
+            data_dir, 'images', '{}{}'.format(split, 2017))
+
+        self.label_root = os.path.join(
+            data_dir, 'annotations', 'panoptic_{}{}'.format(split, 2017))
+        anno_path = os.path.join(
+            data_dir, 'annotations',
+            'panoptic_{}{}.json'.format(split, 2017))
+
+        self.data_dir = data_dir
+        annos = json.load(open(anno_path, 'r'))
+        self.annos = annos
+
+        self.cat_ids = [cat['id'] for cat in annos['categories']]
+        self.img_paths = [ann['file_name'][:-4] + '.jpg'
+                          for ann in annos['annotations']]
+
+        self.add_getter('img', self._get_image)
+        self.add_getter('mask', self._get_mask)
+        self.add_getter(
+            ['label', 'area', 'crowded'],
+            self._get_annotations)
+        keys = ('img', 'mask', 'label')
+        if return_area:
+            keys += ('area',)
+        if return_crowded:
+            keys += ('crowded',)
+        self.keys = keys
+
+    def __len__(self):
+        return len(self.img_paths)
+
+    def _get_image(self, i):
+        img_path = os.path.join(
+            self.img_root, self.img_paths[i])
+        img = utils.read_image(img_path, dtype=np.float32, color=True)
+        return img
+
+    def _get_mask(self, i):
+        anno = self.annos['annotations'][i]
+        label_path = os.path.join(self.label_root, anno['file_name'])
+        rgb_id_map = utils.read_image(
+            label_path,
+            dtype=np.uint32, color=True)
+        id_map = _rgb2id(rgb_id_map)
+
+        H, W = id_map.shape
+        n_seg = len(anno['segments_info'])
+        crowded = []
+        mask = np.zeros((n_seg, H, W), dtype=np.bool)
+        for i, segm in enumerate(anno['segments_info']):
+            mask[i, id_map == segm['id']] = True
+            crowded.append(segm['iscrowd'])
+        crowded = np.array(crowded, dtype=np.bool)
+
+        if not self.use_crowded:
+            not_crowded = np.logical_not(crowded)
+            mask = mask[not_crowded]
+        return mask
+
+    def _get_annotations(self, i):
+        anno = self.annos['annotations'][i]['segments_info']
+
+        label = np.array(
+            [self.cat_ids.index(segm['category_id'])
+             for segm in anno], dtype=np.int32)
+        area = np.array(
+            [segm['area'] for segm in anno], dtype=np.float32)
+        crowded = np.array(
+            [segm['iscrowd'] for segm in anno], dtype=np.bool)
+
+        if not self.use_crowded:
+            not_crowded = np.logical_not(crowded)
+            label = label[not_crowded]
+            area = area[not_crowded]
+            crowded = crowded[not_crowded]
+        return label, area, crowded
diff --git a/chainercv/datasets/coco/coco_semantic_segmentation_dataset.py b/chainercv/datasets/coco/coco_semantic_segmentation_dataset.py
@@ -0,0 +1,93 @@
+import json
+import numpy as np
+import os
+
+from chainercv import utils
+from chainercv.chainer_experimental.datasets.sliceable import GetterDataset
+from chainercv.datasets.coco.coco_utils import get_coco
+
+
+def _rgb2id(color):
+    return color[0] + 256 * color[1] + 256 * 256 * color[2]
+
+
+class COCOSemanticSegmentationDataset(GetterDataset):
+
+    """Semantic segmentation dataset for `MS COCO`_.
+
+    Semantic segmentations are generated from panoptic segmentations
+    as done in the `official toolkit`_.
+
+    .. _`MS COCO`: http://cocodataset.org/#home
+
+    .. _`official toolkit`: https://github.com/cocodataset/panopticapi/
+        blob/master/converters/panoptic2semantic_segmentation.py
+
+    Args:
+        data_dir (string): Path to the root of the training data. If this is
+            :obj:`auto`, this class will automatically download data for you
+            under :obj:`$CHAINER_DATASET_ROOT/pfnet/chainercv/coco`.
+        split ({'train', 'val'}): Select a split of the dataset.
+
+    This dataset returns the following data.
+
+    .. csv-table::
+        :header: name, shape, dtype, format
+
+        :obj:`img`, ":math:`(3, H, W)`", :obj:`float32`, \
+        "RGB, :math:`[0, 255]`"
+        :obj:`label`, ":math:`(H, W)`", :obj:`int32`, \
+        ":math:`[-1, \#class - 1]`"
+
+    """
+
+    def __init__(self, data_dir='auto', split='train'):
+        super(COCOSemanticSegmentationDataset, self).__init__()
+        if data_dir == 'auto':
+            data_dir = get_coco(split, split, '2017', 'panoptic')
+
+        self.img_root = os.path.join(
+            data_dir, 'images', '{}{}'.format(split, 2017))
+
+        self.label_root = os.path.join(
+            data_dir, 'annotations', 'panoptic_{}{}'.format(split, 2017))
+        anno_path = os.path.join(
+            data_dir, 'annotations',
+            'panoptic_{}{}.json'.format(split, 2017))
+
+        self.data_dir = data_dir
+        annos = json.load(open(anno_path, 'r'))
+        self.annos = annos
+
+        self.cat_ids = [cat['id'] for cat in annos['categories']]
+        self.img_paths = [ann['file_name'][:-4] + '.jpg'
+                          for ann in annos['annotations']]
+
+        self.add_getter('img', self._get_image)
+        self.add_getter('label', self._get_label)
+
+        self.keys = ('img', 'label')
+
+    def __len__(self):
+        return len(self.img_paths)
+
+    def _get_image(self, i):
+        img_path = os.path.join(
+            self.img_root, self.img_paths[i])
+        img = utils.read_image(img_path, dtype=np.float32, color=True)
+        return img
+
+    def _get_label(self, i):
+        # https://github.com/cocodataset/panopticapi/blob/master/converters/
+        # panoptic2semantic_segmentation.py#L58
+        anno = self.annos['annotations'][i]
+        label_path = os.path.join(self.label_root, anno['file_name'])
+        rgb_id_map = utils.read_image(
+            label_path,
+            dtype=np.uint32, color=True)
+        id_map = _rgb2id(rgb_id_map)
+        label = -1 * np.ones_like(id_map, dtype=np.int32)
+        for inst in anno['segments_info']:
+            mask = id_map == inst['id']
+            label[mask] = self.cat_ids.index(inst['category_id'])
+        return label