|
| 1 | +# Copyright (c) OpenMMLab. All rights reserved. |
| 2 | +import logging |
| 3 | +import numpy as np |
| 4 | +from typing import Dict, List, Optional, Sequence, Tuple, Union |
| 5 | + |
| 6 | +from .utils.bbox_overlaps_rotated import (calculate_bboxes_area_rotated, |
| 7 | + qbox_to_rbox) |
| 8 | +from .voc_map import VOCMeanAP |
| 9 | + |
| 10 | +logger = logging.getLogger(__name__) |
| 11 | + |
| 12 | +try: |
| 13 | + # we prefer to use `bbox_iou_rotated` in mmcv to calculate ious |
| 14 | + from mmcv.ops import box_iou_rotated |
| 15 | + from torch import Tensor |
| 16 | + HAS_MMCV = True |
| 17 | +except Exception as e: # noqa F841 |
| 18 | + from .utils.bbox_overlaps_rotated import calculate_overlaps_rotated |
| 19 | + HAS_MMCV = False |
| 20 | + logger.debug( |
| 21 | + 'mmcv is not installed, calculating IoU of rotated bbox with OpenCV.') |
| 22 | + |
| 23 | + |
| 24 | +def filter_by_bboxes_area_rotated(bboxes: np.ndarray, |
| 25 | + min_area: Optional[float], |
| 26 | + max_area: Optional[float]): |
| 27 | + """Filter the rotated bboxes with an area range. |
| 28 | +
|
| 29 | + Args: |
| 30 | + bboxes (numpy.ndarray): The bboxes with shape (n, 5) in 'xywha' format. |
| 31 | + min_area (float, optional): The minimum area. If None, do not filter |
| 32 | + the minimum area. |
| 33 | + max_area (float, optional): The maximum area. If None, do not filter |
| 34 | + the maximum area. |
| 35 | +
|
| 36 | + Returns: |
| 37 | + numpy.ndarray: A mask of ``bboxes`` identify which bbox are filtered. |
| 38 | + """ |
| 39 | + bboxes_area = calculate_bboxes_area_rotated(bboxes) |
| 40 | + area_mask = np.ones_like(bboxes_area, dtype=bool) |
| 41 | + if min_area is not None: |
| 42 | + area_mask &= (bboxes_area >= min_area) |
| 43 | + if max_area is not None: |
| 44 | + area_mask &= (bboxes_area < max_area) |
| 45 | + return area_mask |
| 46 | + |
| 47 | + |
| 48 | +class DOTAMeanAP(VOCMeanAP): |
| 49 | + """DOTA evaluation metric. |
| 50 | +
|
| 51 | + DOTA is a large-scale dataset for object detection in aerial images which |
| 52 | + is introduced in https://arxiv.org/abs/1711.10398. This metric computes |
| 53 | + the DOTA mAP (mean Average Precision) with the given IoU thresholds and |
| 54 | + scale ranges. |
| 55 | +
|
| 56 | + Args: |
| 57 | + iou_thrs (float | List[float]): IoU thresholds. Defaults to 0.5. |
| 58 | + scale_ranges (List[tuple], optional): Scale ranges for evaluating |
| 59 | + mAP. If not specified, all bounding boxes would be included in |
| 60 | + evaluation. Defaults to None. |
| 61 | + num_classes (int, optional): The number of classes. If None, it will be |
| 62 | + obtained from the 'CLASSES' field in ``self.dataset_meta``. |
| 63 | + Defaults to None. |
| 64 | + eval_mode (str): 'area' or '11points', 'area' means calculating the |
| 65 | + area under precision-recall curve, '11points' means calculating |
| 66 | + the average precision of recalls at [0, 0.1, ..., 1]. |
| 67 | + The PASCAL VOC2007 defaults to use '11points', while PASCAL |
| 68 | + VOC2012 defaults to use 'area'. |
| 69 | + Defaults to '11points'. |
| 70 | + nproc (int): Processes used for computing TP and FP. If nproc |
| 71 | + is less than or equal to 1, multiprocessing will not be used. |
| 72 | + Defaults to 4. |
| 73 | + drop_class_ap (bool): Whether to drop the class without ground truth |
| 74 | + when calculating the average precision for each class. |
| 75 | + classwise (bool): Whether to return the computed results of each |
| 76 | + class. Defaults to False. |
| 77 | + **kwargs: Keyword parameters passed to :class:`BaseMetric`. |
| 78 | +
|
| 79 | + Examples: |
| 80 | +
|
| 81 | + >>> import numpy as np |
| 82 | + >>> from mmeval import DOTAMetric |
| 83 | + >>> num_classes = 15 |
| 84 | + >>> dota_metric = DOTAMetric(num_classes=15) |
| 85 | + >>> |
| 86 | + >>> def _gen_bboxes(num_bboxes, img_w=256, img_h=256): |
| 87 | + ... # random generate bounding boxes in 'xywha' formart. |
| 88 | + ... x = np.random.rand(num_bboxes, ) * img_w |
| 89 | + ... y = np.random.rand(num_bboxes, ) * img_h |
| 90 | + ... w = np.random.rand(num_bboxes, ) * (img_w - x) |
| 91 | + ... h = np.random.rand(num_bboxes, ) * (img_h - y) |
| 92 | + ... a = np.random.rand(num_bboxes, ) * np.pi / 2 |
| 93 | + ... return np.stack([x, y, w, h, a], axis=1) |
| 94 | + >>> prediction = { |
| 95 | + ... 'bboxes': _gen_bboxes(10), |
| 96 | + ... 'scores': np.random.rand(10, ), |
| 97 | + ... 'labels': np.random.randint(0, num_classes, size=(10, )) |
| 98 | + ... } |
| 99 | + >>> groundtruth = { |
| 100 | + ... 'bboxes': _gen_bboxes(10), |
| 101 | + ... 'labels': np.random.randint(0, num_classes, size=(10, )), |
| 102 | + ... 'bboxes_ignore': _gen_bboxes(5), |
| 103 | + ... 'labels_ignore': np.random.randint(0, num_classes, size=(5, )) |
| 104 | + ... } |
| 105 | + >>> dota_metric(predictions=[prediction, ], groundtruths=[groundtruth, ]) # doctest: +ELLIPSIS # noqa: E501 |
| 106 | + {'mAP@0.5': ..., 'mAP': ...} |
| 107 | + """ |
| 108 | + |
| 109 | + def __init__(self, |
| 110 | + iou_thrs: Union[float, List[float]] = 0.5, |
| 111 | + scale_ranges: Optional[List[Tuple]] = None, |
| 112 | + num_classes: Optional[int] = None, |
| 113 | + eval_mode: str = '11points', |
| 114 | + nproc: int = 4, |
| 115 | + drop_class_ap: bool = True, |
| 116 | + classwise: bool = False, |
| 117 | + **kwargs) -> None: |
| 118 | + super().__init__( |
| 119 | + iou_thrs=iou_thrs, |
| 120 | + scale_ranges=scale_ranges, |
| 121 | + num_classes=num_classes, |
| 122 | + eval_mode=eval_mode, |
| 123 | + use_legacy_coordinate=False, |
| 124 | + nproc=nproc, |
| 125 | + drop_class_ap=drop_class_ap, |
| 126 | + classwise=classwise, |
| 127 | + **kwargs) |
| 128 | + |
| 129 | + def add(self, predictions: Sequence[Dict], groundtruths: Sequence[Dict]) -> None: # type: ignore # yapf: disable # noqa: E501 |
| 130 | + """Add the intermediate results to ``self._results``. |
| 131 | +
|
| 132 | + Args: |
| 133 | + predictions (Sequence[Dict]): A sequence of dict. Each dict |
| 134 | + representing a detection result for an image, with the |
| 135 | + following keys: |
| 136 | + - bboxes (numpy.ndarray): Shape (N, 5) or shape (N, 8). |
| 137 | + bounding bboxes of this image. The box format is depend on |
| 138 | + predict_box_type. Details in Note. |
| 139 | + - scores (numpy.ndarray): Shape (N, ), the predicted scores |
| 140 | + of bounding boxes. |
| 141 | + - labels (numpy.ndarray): Shape (N, ), the predicted labels |
| 142 | + of bounding boxes. |
| 143 | +
|
| 144 | + groundtruths (Sequence[Dict]): A sequence of dict. Each dict |
| 145 | + represents a groundtruths for an image, with the following |
| 146 | + keys: |
| 147 | +
|
| 148 | + - bboxes (numpy.ndarray): Shape (M, 5) or shape (M, 8), the |
| 149 | + groundtruth bounding bboxes of this image, The box format |
| 150 | + is depend on predict_box_type. Details in Note. |
| 151 | + - labels (numpy.ndarray): Shape (M, ), the ground truth |
| 152 | + labels of bounding boxes. |
| 153 | + - bboxes_ignore (numpy.ndarray): Shape (K, 5) or shape(K, 8), |
| 154 | + the groundtruth ignored bounding bboxes of this image. The |
| 155 | + box format is depend on ``self.predict_box_type``.Details in |
| 156 | + upper note. |
| 157 | + - labels_ignore (numpy.ndarray): Shape (K, ), the ground |
| 158 | + truth ignored labels of bounding boxes. |
| 159 | +
|
| 160 | + Note: |
| 161 | + The box shape of ``predictions`` and ``groundtruths`` is depends |
| 162 | + on the predict_box_type. If predict_box_type is 'rbox', the box |
| 163 | + shape should be (N, 5) which represents the (x, y,w, h, angle), |
| 164 | + otherwise the box shape should be (N, 8) which represents the |
| 165 | + (x1, y1, x2, y2, x3, y3, x4, y4). |
| 166 | + """ |
| 167 | + for prediction, groundtruth in zip(predictions, groundtruths): |
| 168 | + assert isinstance(prediction, dict), 'The prediciton should be ' \ |
| 169 | + f'a sequence of dict, but got a sequence of {type(prediction)}.' # noqa: E501 |
| 170 | + assert isinstance(groundtruth, dict), 'The label should be ' \ |
| 171 | + f'a sequence of dict, but got a sequence of {type(groundtruth)}.' # noqa: E501 |
| 172 | + self._results.append((prediction, groundtruth)) |
| 173 | + |
| 174 | + @staticmethod |
| 175 | + def _calculate_image_tpfp( # type: ignore |
| 176 | + pred_bboxes: np.ndarray, gt_bboxes: np.ndarray, |
| 177 | + ignore_gt_bboxes: np.ndarray, iou_thrs: List[float], |
| 178 | + area_ranges: List[Tuple[Optional[float], Optional[float]]], *args, |
| 179 | + **kwargs) -> Tuple[np.ndarray, np.ndarray]: |
| 180 | + """Calculate the true positive and false positive on an image. |
| 181 | +
|
| 182 | + Args: |
| 183 | + pred_bboxes (numpy.ndarray): Predicted bboxes of this image, with |
| 184 | + shape (N, 6) or shape (N,9) which depends on predict_box_type. |
| 185 | + If the predict_box_type is |
| 186 | + The predicted score of the bbox is concatenated behind the |
| 187 | + predicted bbox. |
| 188 | + gt_bboxes (numpy.ndarray): Ground truth bboxes of this image, with |
| 189 | + shape (M, 5) or shape (M, 8). |
| 190 | + ignore_gt_bboxes (numpy.ndarray): Ground truth ignored bboxes of |
| 191 | + this image, with shape (K, 5) or shape (K, 8). |
| 192 | + iou_thrs (List[float]): The IoU thresholds. |
| 193 | + area_ranges (List[Tuple]): The area ranges. |
| 194 | +
|
| 195 | + Returns: |
| 196 | + tuple (tp, fp): |
| 197 | + - tp (numpy.ndarray): Shape (num_ious, num_scales, N), |
| 198 | + the true positive flag of each predicted bbox on this image. |
| 199 | + - fp (numpy.ndarray): Shape (num_ious, num_scales, N), |
| 200 | + the false positive flag of each predicted bbox on this image. |
| 201 | +
|
| 202 | + Note: |
| 203 | + This method should be a staticmethod to avoid resource competition |
| 204 | + during multiple processes. |
| 205 | + """ |
| 206 | + # Step 0. (optional) |
| 207 | + # we need to convert qbox type box to rbox type because OpenCV only |
| 208 | + # support rbox format iou calculation. |
| 209 | + if gt_bboxes.shape[-1] == 8: # qbox shape (M, 8) |
| 210 | + pred_bboxes = qbox_to_rbox(pred_bboxes[:, :8]) |
| 211 | + gt_bboxes = qbox_to_rbox(gt_bboxes) |
| 212 | + ignore_gt_bboxes = qbox_to_rbox(ignore_gt_bboxes) |
| 213 | + |
| 214 | + # Step 1. Concatenate `gt_bboxes` and `ignore_gt_bboxes`, then set |
| 215 | + # the `ignore_gt_flags`. |
| 216 | + all_gt_bboxes = np.concatenate((gt_bboxes, ignore_gt_bboxes)) |
| 217 | + ignore_gt_flags = np.concatenate((np.zeros( |
| 218 | + (gt_bboxes.shape[0], 1), |
| 219 | + dtype=bool), np.ones((ignore_gt_bboxes.shape[0], 1), dtype=bool))) |
| 220 | + |
| 221 | + # Step 2. Initialize the `tp` and `fp` arrays. |
| 222 | + num_preds = pred_bboxes.shape[0] |
| 223 | + tp = np.zeros((len(iou_thrs), len(area_ranges), num_preds)) |
| 224 | + fp = np.zeros((len(iou_thrs), len(area_ranges), num_preds)) |
| 225 | + |
| 226 | + # Step 3. If there are no gt bboxes in this image, then all pred bboxes |
| 227 | + # within area range are false positives. |
| 228 | + if all_gt_bboxes.shape[0] == 0: |
| 229 | + for idx, (min_area, max_area) in enumerate(area_ranges): |
| 230 | + area_mask = filter_by_bboxes_area_rotated( |
| 231 | + pred_bboxes[:, :5], min_area, max_area) |
| 232 | + fp[:, idx, area_mask] = 1 |
| 233 | + return tp, fp |
| 234 | + |
| 235 | + # Step 4. Calculate the IoUs between the predicted bboxes and the |
| 236 | + # ground truth bboxes. |
| 237 | + if HAS_MMCV: |
| 238 | + # the input and output of box_iou_rotated are torch.Tensor |
| 239 | + ious = np.array( |
| 240 | + box_iou_rotated( |
| 241 | + Tensor(pred_bboxes[:, :5]), Tensor(all_gt_bboxes))) |
| 242 | + else: |
| 243 | + ious = calculate_overlaps_rotated((pred_bboxes[:, :5]), |
| 244 | + all_gt_bboxes) |
| 245 | + # For each pred bbox, the max iou with all gts. |
| 246 | + ious_max = ious.max(axis=1) |
| 247 | + # For each pred bbox, which gt overlaps most with it. |
| 248 | + ious_argmax = ious.argmax(axis=1) |
| 249 | + # Sort all pred bbox in descending order by scores. |
| 250 | + sorted_indices = np.argsort(-pred_bboxes[:, -1]) |
| 251 | + |
| 252 | + # Step 5. Count the `tp` and `fp` of each iou threshold and area range. |
| 253 | + for iou_thr_idx, iou_thr in enumerate(iou_thrs): |
| 254 | + for area_idx, (min_area, max_area) in enumerate(area_ranges): |
| 255 | + # The flags that gt bboxes have been matched. |
| 256 | + gt_covered_flags = np.zeros(all_gt_bboxes.shape[0], dtype=bool) |
| 257 | + # The flags that gt bboxes out of area range. |
| 258 | + gt_area_mask = filter_by_bboxes_area_rotated( |
| 259 | + all_gt_bboxes, min_area, max_area) |
| 260 | + ignore_gt_area_flags = ~gt_area_mask |
| 261 | + |
| 262 | + # Count the prediction bboxes in order of decreasing score. |
| 263 | + for pred_bbox_idx in sorted_indices: |
| 264 | + if ious_max[pred_bbox_idx] >= iou_thr: |
| 265 | + matched_gt_idx = ious_argmax[pred_bbox_idx] |
| 266 | + # Ignore the pred bbox that match an ignored gt bbox. |
| 267 | + if ignore_gt_flags[matched_gt_idx]: |
| 268 | + continue |
| 269 | + # Ignore the pred bbox that is out of area range. |
| 270 | + if ignore_gt_area_flags[matched_gt_idx]: |
| 271 | + continue |
| 272 | + if not gt_covered_flags[matched_gt_idx]: |
| 273 | + tp[iou_thr_idx, area_idx, pred_bbox_idx] = 1 |
| 274 | + gt_covered_flags[matched_gt_idx] = True |
| 275 | + else: |
| 276 | + # This gt bbox has been matched and counted as fp. |
| 277 | + fp[iou_thr_idx, area_idx, pred_bbox_idx] = 1 |
| 278 | + else: |
| 279 | + area_mask = filter_by_bboxes_area_rotated( |
| 280 | + pred_bboxes[pred_bbox_idx, :5], min_area, max_area) |
| 281 | + if area_mask: |
| 282 | + fp[iou_thr_idx, area_idx, pred_bbox_idx] = 1 |
| 283 | + |
| 284 | + return tp, fp |
| 285 | + |
| 286 | + def _filter_by_bboxes_area(self, bboxes: np.ndarray, |
| 287 | + min_area: Optional[float], |
| 288 | + max_area: Optional[float]): |
| 289 | + """Filter the bboxes with an area range. |
| 290 | +
|
| 291 | + Args: |
| 292 | + bboxes (numpy.ndarray): The bboxes with shape (n, 5) in 'xywha' |
| 293 | + format. |
| 294 | + min_area (Optional[float]): The minimum area. If None, does not |
| 295 | + filter the minimum area. |
| 296 | + max_area (Optional[float]): The maximum area. If None, does not |
| 297 | + filter the maximum area. |
| 298 | +
|
| 299 | + Returns: |
| 300 | + numpy.ndarray: A mask of ``bboxes`` identify which bbox are |
| 301 | + filtered. |
| 302 | + """ |
| 303 | + return filter_by_bboxes_area_rotated(bboxes, min_area, max_area) |
0 commit comments