From d9f24f97334e98ec0b322493a668e5272a3153e9 Mon Sep 17 00:00:00 2001 From: Charles Zaloom Date: Wed, 28 May 2025 17:11:22 -0400 Subject: [PATCH 1/3] reverted filter changes back to discrete object --- src/valor_lite/object_detection/__init__.py | 3 +- .../object_detection/computation.py | 91 +++ src/valor_lite/object_detection/manager.py | 622 +++++++++--------- .../test_average_precision.py | 49 +- tests/object_detection/test_average_recall.py | 47 +- .../object_detection/test_confusion_matrix.py | 55 +- tests/object_detection/test_counts.py | 33 +- tests/object_detection/test_dataloader.py | 3 +- tests/object_detection/test_evaluator.py | 59 +- tests/object_detection/test_f1.py | 16 +- tests/object_detection/test_filtering.py | 236 +++---- tests/object_detection/test_pr_curve.py | 8 +- tests/object_detection/test_precision.py | 16 +- tests/object_detection/test_recall.py | 16 +- tests/object_detection/test_stability.py | 18 +- 15 files changed, 698 insertions(+), 574 deletions(-) diff --git a/src/valor_lite/object_detection/__init__.py b/src/valor_lite/object_detection/__init__.py index e433df8ca..a4c17fcc5 100644 --- a/src/valor_lite/object_detection/__init__.py +++ b/src/valor_lite/object_detection/__init__.py @@ -1,5 +1,5 @@ from .annotation import Bitmask, BoundingBox, Detection, Polygon -from .manager import DataLoader, Evaluator +from .manager import DataLoader, Evaluator, Filter from .metric import Metric, MetricType __all__ = [ @@ -11,4 +11,5 @@ "MetricType", "DataLoader", "Evaluator", + "Filter", ] diff --git a/src/valor_lite/object_detection/computation.py b/src/valor_lite/object_detection/computation.py index c04c8cea7..de4e34a3f 100644 --- a/src/valor_lite/object_detection/computation.py +++ b/src/valor_lite/object_detection/computation.py @@ -221,6 +221,97 @@ def compute_label_metadata( return label_metadata +def filter_cache( + detailed_pairs: NDArray[np.float64], + mask_datums: NDArray[np.bool_], + mask_predictions: NDArray[np.bool_], + mask_ground_truths: NDArray[np.bool_], + n_labels: int, +) -> tuple[NDArray[np.float64], NDArray[np.float64], NDArray[np.int32],]: + """ + Performs filtering on a detailed cache. + + Parameters + ---------- + detailed_pairs : NDArray[float64] + A list of sorted detailed pairs with size (N, 7). + mask_datums : NDArray[bool] + A boolean mask with size (N,). + mask_ground_truths : NDArray[bool] + A boolean mask with size (N,). + mask_predictions : NDArray[bool] + A boolean mask with size (N,). + n_labels : int + The total number of unique labels. + + Returns + ------- + NDArray[float64] + Filtered detailed pairs. + NDArray[float64] + Filtered ranked pairs. + NDArray[int32] + Label metadata. + """ + # filter datums + detailed_pairs = detailed_pairs[mask_datums].copy() + + # filter ground truths + if mask_ground_truths.any(): + invalid_groundtruth_indices = np.where(mask_ground_truths)[0] + detailed_pairs[ + invalid_groundtruth_indices[:, None], (1, 3, 5) + ] = np.array([[-1, -1, 0]]) + + # filter predictions + if mask_predictions.any(): + invalid_prediction_indices = np.where(mask_predictions)[0] + detailed_pairs[ + invalid_prediction_indices[:, None], (2, 4, 5, 6) + ] = np.array([[-1, -1, 0, -1]]) + + # filter null pairs + mask_null_pairs = np.all( + np.isclose( + detailed_pairs[:, 1:5], + np.array([-1.0, -1.0, -1.0, -1.0]), + ), + axis=1, + ) + detailed_pairs = detailed_pairs[~mask_null_pairs] + + if detailed_pairs.size == 0: + warnings.warn("no valid filtered pairs") + return ( + np.array([], dtype=np.float64), + np.array([], dtype=np.float64), + np.zeros((n_labels, 2), dtype=np.int32), + ) + + # sorts by score, iou with ground truth id as a tie-breaker + indices = np.lexsort( + ( + detailed_pairs[:, 1], # ground truth id + -detailed_pairs[:, 5], # iou + -detailed_pairs[:, 6], # score + ) + ) + detailed_pairs = detailed_pairs[indices] + label_metadata = compute_label_metadata( + ids=detailed_pairs[:, :5].astype(np.int32), + n_labels=n_labels, + ) + ranked_pairs = rank_pairs( + detailed_pairs=detailed_pairs, + label_metadata=label_metadata, + ) + return ( + detailed_pairs, + ranked_pairs, + label_metadata, + ) + + def rank_pairs( detailed_pairs: NDArray[np.float64], label_metadata: NDArray[np.int32], diff --git a/src/valor_lite/object_detection/manager.py b/src/valor_lite/object_detection/manager.py index f08aff14b..0e060da77 100644 --- a/src/valor_lite/object_detection/manager.py +++ b/src/valor_lite/object_detection/manager.py @@ -1,4 +1,5 @@ import warnings +from dataclasses import asdict, dataclass import numpy as np from numpy.typing import NDArray @@ -17,6 +18,7 @@ compute_label_metadata, compute_polygon_iou, compute_precion_recall, + filter_cache, rank_pairs, ) from valor_lite.object_detection.metric import Metric, MetricType @@ -46,6 +48,56 @@ """ +@dataclass +class Metadata: + number_of_datums: int = 0 + number_of_ground_truths: int = 0 + number_of_predictions: int = 0 + number_of_labels: int = 0 + is_filtered: bool = False + + @classmethod + def create( + cls, + detailed_pairs: NDArray[np.float64], + number_of_datums: int, + number_of_labels: int, + is_filtered: bool, + ): + # count number of ground truths + mask_valid_gts = detailed_pairs[:, 1] >= 0 + unique_ids = np.unique( + detailed_pairs[np.ix_(mask_valid_gts, (0, 1))], axis=0 # type: ignore - np.ix_ typing + ) + number_of_ground_truths = int(unique_ids.shape[0]) + + # count number of predictions + mask_valid_pds = detailed_pairs[:, 2] >= 0 + unique_ids = np.unique( + detailed_pairs[np.ix_(mask_valid_pds, (0, 2))], axis=0 # type: ignore - np.ix_ typing + ) + number_of_predictions = int(unique_ids.shape[0]) + + return cls( + number_of_datums=number_of_datums, + number_of_ground_truths=number_of_ground_truths, + number_of_predictions=number_of_predictions, + number_of_labels=number_of_labels, + is_filtered=is_filtered, + ) + + def to_dict(self) -> dict[str, int | bool]: + return asdict(self) + + +@dataclass +class Filter: + mask_datums: NDArray[np.bool_] + mask_groundtruths: NDArray[np.bool_] + mask_predictions: NDArray[np.bool_] + metadata: Metadata + + class Evaluator: """ Object Detection Evaluator @@ -67,80 +119,19 @@ def __init__(self): # temporary cache self._temp_cache: list[NDArray[np.float64]] | None = [] - # cache + # internal cache self._detailed_pairs = np.array([[]], dtype=np.float64) self._ranked_pairs = np.array([[]], dtype=np.float64) self._label_metadata: NDArray[np.int32] = np.array([[]]) - - # filter cache - self._filtered_detailed_pairs: NDArray[np.float64] | None = None - self._filtered_ranked_pairs: NDArray[np.float64] | None = None - self._filtered_label_metadata: NDArray[np.int32] | None = None - - @property - def is_filtered(self) -> bool: - return self._filtered_detailed_pairs is not None - - @property - def label_metadata(self) -> NDArray[np.int32]: - return ( - self._filtered_label_metadata - if self._filtered_label_metadata is not None - else self._label_metadata - ) - - @property - def detailed_pairs(self) -> NDArray[np.float64]: - return ( - self._filtered_detailed_pairs - if self._filtered_detailed_pairs is not None - else self._detailed_pairs - ) - - @property - def ranked_pairs(self) -> NDArray[np.float64]: - return ( - self._filtered_ranked_pairs - if self._filtered_ranked_pairs is not None - else self._ranked_pairs - ) - - @property - def n_labels(self) -> int: - """Returns the total number of unique labels.""" - return len(self.index_to_label) - - @property - def n_datums(self) -> int: - """Returns the number of datums.""" - return np.unique(self.detailed_pairs[:, 0]).size - - @property - def n_groundtruths(self) -> int: - """Returns the number of ground truth annotations.""" - mask_valid_gts = self.detailed_pairs[:, 1] >= 0 - unique_ids = np.unique( - self.detailed_pairs[np.ix_(mask_valid_gts, (0, 1))], axis=0 # type: ignore - np.ix_ typing - ) - return int(unique_ids.shape[0]) - - @property - def n_predictions(self) -> int: - """Returns the number of prediction annotations.""" - mask_valid_pds = self.detailed_pairs[:, 2] >= 0 - unique_ids = np.unique( - self.detailed_pairs[np.ix_(mask_valid_pds, (0, 2))], axis=0 # type: ignore - np.ix_ typing - ) - return int(unique_ids.shape[0]) + self._metadata = Metadata() @property def ignored_prediction_labels(self) -> list[str]: """ Prediction labels that are not present in the ground truth set. """ - label_metadata = self.label_metadata - glabels = set(np.where(label_metadata[:, 0] > 0)[0]) - plabels = set(np.where(label_metadata[:, 1] > 0)[0]) + glabels = set(np.where(self._label_metadata[:, 0] > 0)[0]) + plabels = set(np.where(self._label_metadata[:, 1] > 0)[0]) return [ self.index_to_label[label_id] for label_id in (plabels - glabels) ] @@ -150,137 +141,18 @@ def missing_prediction_labels(self) -> list[str]: """ Ground truth labels that are not present in the prediction set. """ - label_metadata = self.label_metadata - glabels = set(np.where(label_metadata[:, 0] > 0)[0]) - plabels = set(np.where(label_metadata[:, 1] > 0)[0]) + glabels = set(np.where(self._label_metadata[:, 0] > 0)[0]) + plabels = set(np.where(self._label_metadata[:, 1] > 0)[0]) return [ self.index_to_label[label_id] for label_id in (glabels - plabels) ] @property - def metadata(self) -> dict: + def metadata(self) -> Metadata: """ Evaluation metadata. """ - return { - "n_datums": self.n_datums, - "n_groundtruths": self.n_groundtruths, - "n_predictions": self.n_predictions, - "n_labels": self.n_labels, - "ignored_prediction_labels": self.ignored_prediction_labels, - "missing_prediction_labels": self.missing_prediction_labels, - } - - def compute_precision_recall( - self, - iou_thresholds: list[float], - score_thresholds: list[float], - ) -> dict[MetricType, list[Metric]]: - """ - Computes all metrics except for ConfusionMatrix - - Parameters - ---------- - iou_thresholds : list[float] - A list of IOU thresholds to compute metrics over. - score_thresholds : list[float] - A list of score thresholds to compute metrics over. - - Returns - ------- - dict[MetricType, list] - A dictionary mapping MetricType enumerations to lists of computed metrics. - """ - if not iou_thresholds: - raise ValueError("At least one IOU threshold must be passed.") - elif not score_thresholds: - raise ValueError("At least one score threshold must be passed.") - results = compute_precion_recall( - ranked_pairs=self.ranked_pairs, - label_metadata=self.label_metadata, - iou_thresholds=np.array(iou_thresholds), - score_thresholds=np.array(score_thresholds), - ) - return unpack_precision_recall_into_metric_lists( - results=results, - label_metadata=self.label_metadata, - iou_thresholds=iou_thresholds, - score_thresholds=score_thresholds, - index_to_label=self.index_to_label, - ) - - def compute_confusion_matrix( - self, - iou_thresholds: list[float], - score_thresholds: list[float], - ) -> list[Metric]: - """ - Computes confusion matrices at various thresholds. - - Parameters - ---------- - iou_thresholds : list[float] - A list of IOU thresholds to compute metrics over. - score_thresholds : list[float] - A list of score thresholds to compute metrics over. - - Returns - ------- - list[Metric] - List of confusion matrices per threshold pair. - """ - if not iou_thresholds: - raise ValueError("At least one IOU threshold must be passed.") - elif not score_thresholds: - raise ValueError("At least one score threshold must be passed.") - elif self.detailed_pairs.size == 0: - warnings.warn("attempted to compute over an empty set") - return [] - results = compute_confusion_matrix( - detailed_pairs=self.detailed_pairs, - iou_thresholds=np.array(iou_thresholds), - score_thresholds=np.array(score_thresholds), - ) - return unpack_confusion_matrix_into_metric_list( - results=results, - detailed_pairs=self.detailed_pairs, - iou_thresholds=iou_thresholds, - score_thresholds=score_thresholds, - index_to_datum_id=self.index_to_datum_id, - index_to_groundtruth_id=self.index_to_groundtruth_id, - index_to_prediction_id=self.index_to_prediction_id, - index_to_label=self.index_to_label, - ) - - def evaluate( - self, - iou_thresholds: list[float] = [0.1, 0.5, 0.75], - score_thresholds: list[float] = [0.5], - ) -> dict[MetricType, list[Metric]]: - """ - Computes all available metrics. - - Parameters - ---------- - iou_thresholds : list[float], default=[0.1, 0.5, 0.75] - A list of IOU thresholds to compute metrics over. - score_thresholds : list[float], default=[0.5] - A list of score thresholds to compute metrics over. - - Returns - ------- - dict[MetricType, list[Metric]] - Lists of metrics organized by metric type. - """ - metrics = self.compute_precision_recall( - iou_thresholds=iou_thresholds, - score_thresholds=score_thresholds, - ) - metrics[MetricType.ConfusionMatrix] = self.compute_confusion_matrix( - iou_thresholds=iou_thresholds, - score_thresholds=score_thresholds, - ) - return metrics + return self._metadata def _add_datum(self, datum_id: str) -> int: """ @@ -484,7 +356,6 @@ def _add_data( data = np.array(pairs) if data.size > 0: # reset filtered cache if it exists - self.clear_filter() if self._temp_cache is None: raise RuntimeError( "cannot add data as evaluator has already been finalized" @@ -600,13 +471,16 @@ def finalize(self): Evaluator A ready-to-use evaluator object. """ + n_labels = len(self.index_to_label) + n_datums = len(self.index_to_datum_id) if self._temp_cache is None: warnings.warn("evaluator is already finalized or in a bad state") return self elif not self._temp_cache: self._detailed_pairs = np.array([], dtype=np.float64) self._ranked_pairs = np.array([], dtype=np.float64) - self._label_metadata = np.zeros((self.n_labels, 2), dtype=np.int32) + self._label_metadata = np.zeros((n_labels, 2), dtype=np.int32) + self._metadata = Metadata() warnings.warn("no valid pairs") return self else: @@ -623,178 +497,318 @@ def finalize(self): self._detailed_pairs = self._detailed_pairs[indices] self._label_metadata = compute_label_metadata( ids=self._detailed_pairs[:, :5].astype(np.int32), - n_labels=self.n_labels, + n_labels=n_labels, ) self._ranked_pairs = rank_pairs( - detailed_pairs=self.detailed_pairs, + detailed_pairs=self._detailed_pairs, label_metadata=self._label_metadata, ) + self._metadata = Metadata.create( + detailed_pairs=self._detailed_pairs, + number_of_datums=n_datums, + number_of_labels=n_labels, + is_filtered=False, + ) return self - def apply_filter( + def create_filter( self, datum_ids: list[str] | None = None, groundtruth_ids: list[str] | None = None, prediction_ids: list[str] | None = None, labels: list[str] | None = None, - ): + ) -> Filter: """ - Apply a filter on the evaluator. - - Can be reset by calling 'clear_filter'. + Creates a filter object. Parameters ---------- datum_uids : list[str], optional - An optional list of string uids representing datums. + An optional list of string uids representing datums to keep. groundtruth_ids : list[str], optional - An optional list of string uids representing ground truth annotations. + An optional list of string uids representing ground truth annotations to keep. prediction_ids : list[str], optional - An optional list of string uids representing prediction annotations. + An optional list of string uids representing prediction annotations to keep. labels : list[str], optional - An optional list of labels. + An optional list of labels to keep. """ - self._filtered_detailed_pairs = self._detailed_pairs.copy() - self._filtered_ranked_pairs = np.array([], dtype=np.float64) - self._filtered_label_metadata = np.zeros( - (self.n_labels, 2), dtype=np.int32 - ) + mask_datums = np.ones(self._detailed_pairs.shape[0], dtype=np.bool_) - valid_datum_indices = None + # filter datums if datum_ids is not None: if not datum_ids: - self._filtered_detailed_pairs = np.array([], dtype=np.float64) - warnings.warn("no valid filtered pairs") - return + warnings.warn("creating a filter that removes all datums") + return Filter( + mask_datums=np.zeros_like(mask_datums), + mask_groundtruths=np.array([], dtype=np.bool_), + mask_predictions=np.array([], dtype=np.bool_), + metadata=Metadata(is_filtered=True), + ) valid_datum_indices = np.array( [self.datum_id_to_index[uid] for uid in datum_ids], dtype=np.int32, ) + mask_datums = np.isin( + self._detailed_pairs[:, 0], valid_datum_indices + ) - valid_groundtruth_indices = None + filtered_detailed_pairs = self._detailed_pairs[mask_datums] + n_pairs = self._detailed_pairs[mask_datums].shape[0] + mask_groundtruths = np.zeros(n_pairs, dtype=np.bool_) + mask_predictions = np.zeros_like(mask_groundtruths) + + # filter by ground truth annotation ids if groundtruth_ids is not None: + if not groundtruth_ids: + warnings.warn( + "creating a filter that removes all ground truths" + ) valid_groundtruth_indices = np.array( [self.groundtruth_id_to_index[uid] for uid in groundtruth_ids], dtype=np.int32, ) + mask_groundtruths[ + ~np.isin( + filtered_detailed_pairs[:, 1], + valid_groundtruth_indices, + ) + ] = True - valid_prediction_indices = None + # filter by prediction annotation ids if prediction_ids is not None: + if not prediction_ids: + warnings.warn("creating a filter that removes all predictions") valid_prediction_indices = np.array( [self.prediction_id_to_index[uid] for uid in prediction_ids], dtype=np.int32, ) + mask_predictions[ + ~np.isin( + filtered_detailed_pairs[:, 2], + valid_prediction_indices, + ) + ] = True - valid_label_indices = None + # filter by labels if labels is not None: if not labels: - self._filtered_detailed_pairs = np.array([], dtype=np.float64) - warnings.warn("no valid filtered pairs") - return + warnings.warn("creating a filter that removes all labels") + return Filter( + mask_datums=mask_datums, + mask_groundtruths=np.ones_like(mask_datums), + mask_predictions=np.ones_like(mask_datums), + metadata=Metadata(is_filtered=True), + ) valid_label_indices = np.array( [self.label_to_index[label] for label in labels] + [-1] ) - - # filter datums - if valid_datum_indices is not None: - mask_valid_datums = np.isin( - self._filtered_detailed_pairs[:, 0], valid_datum_indices - ) - self._filtered_detailed_pairs = self._filtered_detailed_pairs[ - mask_valid_datums - ] - - n_rows = self._filtered_detailed_pairs.shape[0] - mask_invalid_groundtruths = np.zeros(n_rows, dtype=np.bool_) - mask_invalid_predictions = np.zeros_like(mask_invalid_groundtruths) - - # filter ground truth annotations - if valid_groundtruth_indices is not None: - mask_invalid_groundtruths[ - ~np.isin( - self._filtered_detailed_pairs[:, 1], - valid_groundtruth_indices, - ) + mask_groundtruths[ + ~np.isin(filtered_detailed_pairs[:, 3], valid_label_indices) ] = True - - # filter prediction annotations - if valid_prediction_indices is not None: - mask_invalid_predictions[ - ~np.isin( - self._filtered_detailed_pairs[:, 2], - valid_prediction_indices, - ) + mask_predictions[ + ~np.isin(filtered_detailed_pairs[:, 4], valid_label_indices) ] = True - # filter labels - if valid_label_indices is not None: - mask_invalid_groundtruths[ - ~np.isin( - self._filtered_detailed_pairs[:, 3], valid_label_indices - ) - ] = True - mask_invalid_predictions[ - ~np.isin( - self._filtered_detailed_pairs[:, 4], valid_label_indices - ) - ] = True + filtered_detailed_pairs, _, _ = filter_cache( + self._detailed_pairs, + mask_datums=mask_datums, + mask_ground_truths=mask_groundtruths, + mask_predictions=mask_predictions, + n_labels=len(self.index_to_label), + ) - # filter cache - if mask_invalid_groundtruths.any(): - invalid_groundtruth_indices = np.where(mask_invalid_groundtruths)[ - 0 - ] - self._filtered_detailed_pairs[ - invalid_groundtruth_indices[:, None], (1, 3, 5) - ] = np.array([[-1, -1, 0]]) - - if mask_invalid_predictions.any(): - invalid_prediction_indices = np.where(mask_invalid_predictions)[0] - self._filtered_detailed_pairs[ - invalid_prediction_indices[:, None], (2, 4, 5, 6) - ] = np.array([[-1, -1, 0, -1]]) - - # filter null pairs - mask_null_pairs = np.all( - np.isclose( - self._filtered_detailed_pairs[:, 1:5], - np.array([-1.0, -1.0, -1.0, -1.0]), + number_of_datums = ( + len(datum_ids) + if datum_ids + else np.unique(filtered_detailed_pairs[:, 0]).size + ) + + return Filter( + mask_datums=mask_datums, + mask_groundtruths=mask_groundtruths, + mask_predictions=mask_predictions, + metadata=Metadata.create( + detailed_pairs=filtered_detailed_pairs, + number_of_datums=number_of_datums, + number_of_labels=len(self.index_to_label), + is_filtered=True, ), - axis=1, ) - self._filtered_detailed_pairs = self._filtered_detailed_pairs[ - ~mask_null_pairs - ] - if self._filtered_detailed_pairs.size == 0: - self._ranked_pairs = np.array([], dtype=np.float64) - self._label_metadata = np.zeros((self.n_labels, 2), dtype=np.int32) - warnings.warn("no valid filtered pairs") - return + def filter( + self, filter_: Filter + ) -> tuple[NDArray[np.float64], NDArray[np.float64], NDArray[np.int32],]: + """ + Performs filtering over the internal cache. - # sorts by score, iou with ground truth id as a tie-breaker - indices = np.lexsort( - ( - self._filtered_detailed_pairs[:, 1], # ground truth id - -self._filtered_detailed_pairs[:, 5], # iou - -self._filtered_detailed_pairs[:, 6], # score + Parameters + ---------- + filter_ : Filter + The filter parameterization. + + Returns + ------- + NDArray[float64] + Filtered detailed pairs. + NDArray[float64] + Filtered ranked pairs. + NDArray[int32] + Label metadata. + """ + if not filter_.mask_datums.any(): + warnings.warn("filter removed all datums") + return ( + np.array([], dtype=np.float64), + np.array([], dtype=np.float64), + np.zeros((self.metadata.number_of_labels, 2), dtype=np.int32), ) + if filter_.mask_groundtruths.all(): + warnings.warn("filter removed all ground truths") + if filter_.mask_predictions.all(): + warnings.warn("filter removed all predictions") + return filter_cache( + detailed_pairs=self._detailed_pairs, + mask_datums=filter_.mask_datums, + mask_ground_truths=filter_.mask_groundtruths, + mask_predictions=filter_.mask_predictions, + n_labels=len(self.index_to_label), ) - self._filtered_detailed_pairs = self._filtered_detailed_pairs[indices] - self._filtered_label_metadata = compute_label_metadata( - ids=self._filtered_detailed_pairs[:, :5].astype(np.int32), - n_labels=self.n_labels, + + def compute_precision_recall( + self, + iou_thresholds: list[float], + score_thresholds: list[float], + filter_: Filter | None = None, + ) -> dict[MetricType, list[Metric]]: + """ + Computes all metrics except for ConfusionMatrix + + Parameters + ---------- + iou_thresholds : list[float] + A list of IOU thresholds to compute metrics over. + score_thresholds : list[float] + A list of score thresholds to compute metrics over. + filter_ : Filter, optional + A collection of filter parameters and masks. + + Returns + ------- + dict[MetricType, list] + A dictionary mapping MetricType enumerations to lists of computed metrics. + """ + if not iou_thresholds: + raise ValueError("At least one IOU threshold must be passed.") + elif not score_thresholds: + raise ValueError("At least one score threshold must be passed.") + + if filter_ is not None: + _, ranked_pairs, label_metadata = self.filter(filter_=filter_) + else: + ranked_pairs = self._ranked_pairs + label_metadata = self._label_metadata + + results = compute_precion_recall( + ranked_pairs=ranked_pairs, + label_metadata=label_metadata, + iou_thresholds=np.array(iou_thresholds), + score_thresholds=np.array(score_thresholds), ) - self._filtered_ranked_pairs = rank_pairs( - detailed_pairs=self._filtered_detailed_pairs, - label_metadata=self._filtered_label_metadata, + return unpack_precision_recall_into_metric_lists( + results=results, + label_metadata=label_metadata, + iou_thresholds=iou_thresholds, + score_thresholds=score_thresholds, + index_to_label=self.index_to_label, + ) + + def compute_confusion_matrix( + self, + iou_thresholds: list[float], + score_thresholds: list[float], + filter_: Filter | None = None, + ) -> list[Metric]: + """ + Computes confusion matrices at various thresholds. + + Parameters + ---------- + iou_thresholds : list[float] + A list of IOU thresholds to compute metrics over. + score_thresholds : list[float] + A list of score thresholds to compute metrics over. + filter_ : Filter, optional + A collection of filter parameters and masks. + + Returns + ------- + list[Metric] + List of confusion matrices per threshold pair. + """ + if not iou_thresholds: + raise ValueError("At least one IOU threshold must be passed.") + elif not score_thresholds: + raise ValueError("At least one score threshold must be passed.") + + if filter_ is not None: + detailed_pairs, _, _ = self.filter(filter_=filter_) + else: + detailed_pairs = self._detailed_pairs + + if detailed_pairs.size == 0: + warnings.warn("attempted to compute over an empty set") + return [] + + results = compute_confusion_matrix( + detailed_pairs=detailed_pairs, + iou_thresholds=np.array(iou_thresholds), + score_thresholds=np.array(score_thresholds), + ) + return unpack_confusion_matrix_into_metric_list( + results=results, + detailed_pairs=detailed_pairs, + iou_thresholds=iou_thresholds, + score_thresholds=score_thresholds, + index_to_datum_id=self.index_to_datum_id, + index_to_groundtruth_id=self.index_to_groundtruth_id, + index_to_prediction_id=self.index_to_prediction_id, + index_to_label=self.index_to_label, ) - def clear_filter(self): - """Removes a filter if one exists.""" - self._filtered_detailed_pairs = None - self._filtered_ranked_pairs = None - self._filtered_label_metadata = None + def evaluate( + self, + iou_thresholds: list[float] = [0.1, 0.5, 0.75], + score_thresholds: list[float] = [0.5], + filter_: Filter | None = None, + ) -> dict[MetricType, list[Metric]]: + """ + Computes all available metrics. + + Parameters + ---------- + iou_thresholds : list[float], default=[0.1, 0.5, 0.75] + A list of IOU thresholds to compute metrics over. + score_thresholds : list[float], default=[0.5] + A list of score thresholds to compute metrics over. + filter_ : Filter, optional + A collection of filter parameters and masks. + + Returns + ------- + dict[MetricType, list[Metric]] + Lists of metrics organized by metric type. + """ + metrics = self.compute_precision_recall( + iou_thresholds=iou_thresholds, + score_thresholds=score_thresholds, + filter_=filter_, + ) + metrics[MetricType.ConfusionMatrix] = self.compute_confusion_matrix( + iou_thresholds=iou_thresholds, + score_thresholds=score_thresholds, + filter_=filter_, + ) + return metrics class DataLoader(Evaluator): diff --git a/tests/object_detection/test_average_precision.py b/tests/object_detection/test_average_precision.py index 95a6dbc11..c684624d7 100644 --- a/tests/object_detection/test_average_precision.py +++ b/tests/object_detection/test_average_precision.py @@ -94,10 +94,10 @@ def test_ap_metrics_first_class( assert evaluator.ignored_prediction_labels == [] assert evaluator.missing_prediction_labels == [] - assert evaluator.n_datums == 2 - assert evaluator.n_labels == 1 - assert evaluator.n_groundtruths == 2 - assert evaluator.n_predictions == 1 + assert evaluator.metadata.number_of_datums == 2 + assert evaluator.metadata.number_of_labels == 1 + assert evaluator.metadata.number_of_ground_truths == 2 + assert evaluator.metadata.number_of_predictions == 1 # test AP actual_metrics = [m.to_dict() for m in metrics[MetricType.AP]] @@ -215,10 +215,10 @@ def test_ap_metrics_second_class( assert evaluator.ignored_prediction_labels == [] assert evaluator.missing_prediction_labels == [] - assert evaluator.n_datums == 2 - assert evaluator.n_labels == 1 - assert evaluator.n_groundtruths == 1 - assert evaluator.n_predictions == 1 + assert evaluator.metadata.number_of_datums == 2 + assert evaluator.metadata.number_of_labels == 1 + assert evaluator.metadata.number_of_ground_truths == 1 + assert evaluator.metadata.number_of_predictions == 1 # test AP actual_metrics = [m.to_dict() for m in metrics[MetricType.AP]] @@ -317,10 +317,10 @@ def test_ap_using_torch_metrics_example( assert evaluator.ignored_prediction_labels == ["3"] assert evaluator.missing_prediction_labels == [] - assert evaluator.n_datums == 4 - assert evaluator.n_labels == 6 - assert evaluator.n_groundtruths == 20 - assert evaluator.n_predictions == 19 + assert evaluator.metadata.number_of_datums == 4 + assert evaluator.metadata.number_of_labels == 6 + assert evaluator.metadata.number_of_ground_truths == 20 + assert evaluator.metadata.number_of_predictions == 19 metrics = evaluator.evaluate( iou_thresholds=[0.5, 0.75], @@ -672,15 +672,14 @@ def test_ap_ranked_pair_ordering( method(loader, detections=[input_]) evaluator = loader.finalize() - assert evaluator.metadata == { - "ignored_prediction_labels": [ - "label4", - ], - "missing_prediction_labels": [], - "n_datums": 1, - "n_groundtruths": 3, - "n_labels": 4, - "n_predictions": 4, + assert evaluator.ignored_prediction_labels == ["label4"] + assert evaluator.missing_prediction_labels == [] + assert evaluator.metadata.to_dict() == { + "number_of_datums": 1, + "number_of_ground_truths": 3, + "number_of_labels": 4, + "number_of_predictions": 4, + "is_filtered": False, } metrics = evaluator.evaluate( @@ -826,10 +825,10 @@ def test_ap_true_positive_deassignment( assert evaluator.ignored_prediction_labels == [] assert evaluator.missing_prediction_labels == [] - assert evaluator.n_datums == 1 - assert evaluator.n_labels == 1 - assert evaluator.n_groundtruths == 2 - assert evaluator.n_predictions == 4 + assert evaluator.metadata.number_of_datums == 1 + assert evaluator.metadata.number_of_labels == 1 + assert evaluator.metadata.number_of_ground_truths == 2 + assert evaluator.metadata.number_of_predictions == 4 metrics = evaluator.evaluate( iou_thresholds=[0.5], diff --git a/tests/object_detection/test_average_recall.py b/tests/object_detection/test_average_recall.py index 9525de44c..130ae1597 100644 --- a/tests/object_detection/test_average_recall.py +++ b/tests/object_detection/test_average_recall.py @@ -108,10 +108,10 @@ def test_ar_metrics_first_class( assert evaluator.ignored_prediction_labels == [] assert evaluator.missing_prediction_labels == [] - assert evaluator.n_datums == 2 - assert evaluator.n_labels == 1 - assert evaluator.n_groundtruths == 2 - assert evaluator.n_predictions == 1 + assert evaluator.metadata.number_of_datums == 2 + assert evaluator.metadata.number_of_labels == 1 + assert evaluator.metadata.number_of_ground_truths == 2 + assert evaluator.metadata.number_of_predictions == 1 # test AR actual_metrics = [m.to_dict() for m in metrics[MetricType.AR]] @@ -221,10 +221,10 @@ def test_ar_metrics_second_class( assert evaluator.ignored_prediction_labels == [] assert evaluator.missing_prediction_labels == [] - assert evaluator.n_datums == 2 - assert evaluator.n_labels == 1 - assert evaluator.n_groundtruths == 1 - assert evaluator.n_predictions == 1 + assert evaluator.metadata.number_of_datums == 2 + assert evaluator.metadata.number_of_labels == 1 + assert evaluator.metadata.number_of_ground_truths == 1 + assert evaluator.metadata.number_of_predictions == 1 # test AR actual_metrics = [m.to_dict() for m in metrics[MetricType.AR]] @@ -315,10 +315,10 @@ def test_ar_using_torch_metrics_example( assert evaluator.ignored_prediction_labels == ["3"] assert evaluator.missing_prediction_labels == [] - assert evaluator.n_datums == 4 - assert evaluator.n_labels == 6 - assert evaluator.n_groundtruths == 20 - assert evaluator.n_predictions == 19 + assert evaluator.metadata.number_of_datums == 4 + assert evaluator.metadata.number_of_labels == 6 + assert evaluator.metadata.number_of_ground_truths == 20 + assert evaluator.metadata.number_of_predictions == 19 score_thresholds = [0.0] iou_thresholds = [0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95] @@ -485,10 +485,10 @@ def test_ar_true_positive_deassignment( assert evaluator.ignored_prediction_labels == [] assert evaluator.missing_prediction_labels == [] - assert evaluator.n_datums == 1 - assert evaluator.n_labels == 1 - assert evaluator.n_groundtruths == 2 - assert evaluator.n_predictions == 4 + assert evaluator.metadata.number_of_datums == 1 + assert evaluator.metadata.number_of_labels == 1 + assert evaluator.metadata.number_of_ground_truths == 2 + assert evaluator.metadata.number_of_predictions == 4 metrics = evaluator.evaluate( iou_thresholds=[0.5], @@ -537,13 +537,14 @@ def test_ar_ranked_pair_ordering( method(loader, detections=[input_]) evaluator = loader.finalize() - assert evaluator.metadata == { - "ignored_prediction_labels": ["label4"], - "missing_prediction_labels": [], - "n_datums": 1, - "n_groundtruths": 3, - "n_labels": 4, - "n_predictions": 4, + assert evaluator.ignored_prediction_labels == ["label4"] + assert evaluator.missing_prediction_labels == [] + assert evaluator.metadata.to_dict() == { + "number_of_datums": 1, + "number_of_ground_truths": 3, + "number_of_labels": 4, + "number_of_predictions": 4, + "is_filtered": False, } metrics = evaluator.evaluate( diff --git a/tests/object_detection/test_confusion_matrix.py b/tests/object_detection/test_confusion_matrix.py index aa1fb58b0..a0de5aa5b 100644 --- a/tests/object_detection/test_confusion_matrix.py +++ b/tests/object_detection/test_confusion_matrix.py @@ -1,4 +1,5 @@ import numpy as np +import pytest from valor_lite.object_detection import DataLoader, Detection, Evaluator from valor_lite.object_detection.computation import ( @@ -9,12 +10,13 @@ def test_confusion_matrix_no_data(): evaluator = Evaluator() - curves = evaluator.compute_confusion_matrix( - iou_thresholds=[0.5], - score_thresholds=[0.5], - ) - assert isinstance(curves, list) - assert len(curves) == 0 + with pytest.warns(UserWarning): + cm = evaluator.compute_confusion_matrix( + iou_thresholds=[0.5], + score_thresholds=[0.5], + ) + assert isinstance(cm, list) + assert len(cm) == 0 def test_compute_confusion_matrix(): @@ -182,10 +184,10 @@ def test_confusion_matrix( "unmatched_groundtruth", "v2", ] - assert evaluator.n_datums == 2 - assert evaluator.n_labels == 6 - assert evaluator.n_groundtruths == 4 - assert evaluator.n_predictions == 4 + assert evaluator.metadata.number_of_datums == 2 + assert evaluator.metadata.number_of_labels == 6 + assert evaluator.metadata.number_of_ground_truths == 4 + assert evaluator.metadata.number_of_predictions == 4 actual_metrics = evaluator.compute_confusion_matrix( iou_thresholds=[0.5], @@ -1007,10 +1009,10 @@ def test_confusion_matrix_using_torch_metrics_example( assert evaluator.ignored_prediction_labels == ["3"] assert evaluator.missing_prediction_labels == [] - assert evaluator.n_datums == 4 - assert evaluator.n_labels == 6 - assert evaluator.n_groundtruths == 20 - assert evaluator.n_predictions == 19 + assert evaluator.metadata.number_of_datums == 4 + assert evaluator.metadata.number_of_labels == 6 + assert evaluator.metadata.number_of_ground_truths == 20 + assert evaluator.metadata.number_of_predictions == 19 actual_metrics = evaluator.compute_confusion_matrix( iou_thresholds=[0.5, 0.9], @@ -1402,10 +1404,10 @@ def test_confusion_matrix_fp_unmatched_prediction_edge_case( assert evaluator.ignored_prediction_labels == [] assert evaluator.missing_prediction_labels == [] - assert evaluator.n_datums == 2 - assert evaluator.n_labels == 1 - assert evaluator.n_groundtruths == 2 - assert evaluator.n_predictions == 2 + assert evaluator.metadata.number_of_datums == 2 + assert evaluator.metadata.number_of_labels == 1 + assert evaluator.metadata.number_of_ground_truths == 2 + assert evaluator.metadata.number_of_predictions == 2 actual_metrics = evaluator.compute_confusion_matrix( iou_thresholds=[0.5], @@ -1521,15 +1523,14 @@ def test_confusion_matrix_ranked_pair_ordering( evaluator = loader.finalize() - assert evaluator.metadata == { - "ignored_prediction_labels": [ - "label4", - ], - "missing_prediction_labels": [], - "n_datums": 1, - "n_groundtruths": 3, - "n_labels": 4, - "n_predictions": 4, + assert evaluator.ignored_prediction_labels == ["label4"] + assert evaluator.missing_prediction_labels == [] + assert evaluator.metadata.to_dict() == { + "number_of_datums": 1, + "number_of_ground_truths": 3, + "number_of_labels": 4, + "number_of_predictions": 4, + "is_filtered": False, } actual_metrics = evaluator.compute_confusion_matrix( diff --git a/tests/object_detection/test_counts.py b/tests/object_detection/test_counts.py index 2ddc55167..03c3f0a09 100644 --- a/tests/object_detection/test_counts.py +++ b/tests/object_detection/test_counts.py @@ -36,10 +36,10 @@ def test_counts_metrics_first_class( assert evaluator.ignored_prediction_labels == [] assert evaluator.missing_prediction_labels == [] - assert evaluator.n_datums == 2 - assert evaluator.n_labels == 1 - assert evaluator.n_groundtruths == 2 - assert evaluator.n_predictions == 1 + assert evaluator.metadata.number_of_datums == 2 + assert evaluator.metadata.number_of_labels == 1 + assert evaluator.metadata.number_of_ground_truths == 2 + assert evaluator.metadata.number_of_predictions == 1 # test Counts actual_metrics = [m.to_dict() for m in metrics[MetricType.Counts]] @@ -137,10 +137,10 @@ def test_counts_metrics_second_class( assert evaluator.ignored_prediction_labels == [] assert evaluator.missing_prediction_labels == [] - assert evaluator.n_datums == 2 - assert evaluator.n_labels == 1 - assert evaluator.n_groundtruths == 1 - assert evaluator.n_predictions == 1 + assert evaluator.metadata.number_of_datums == 2 + assert evaluator.metadata.number_of_labels == 1 + assert evaluator.metadata.number_of_ground_truths == 1 + assert evaluator.metadata.number_of_predictions == 1 # test Counts actual_metrics = [m.to_dict() for m in metrics[MetricType.Counts]] @@ -522,15 +522,14 @@ def test_counts_ranked_pair_ordering( method(loader, detections=[input_]) evaluator = loader.finalize() - assert evaluator.metadata == { - "ignored_prediction_labels": [ - "label4", - ], - "missing_prediction_labels": [], - "n_datums": 1, - "n_groundtruths": 3, - "n_labels": 4, - "n_predictions": 4, + assert evaluator.ignored_prediction_labels == ["label4"] + assert evaluator.missing_prediction_labels == [] + assert evaluator.metadata.to_dict() == { + "number_of_datums": 1, + "number_of_ground_truths": 3, + "number_of_labels": 4, + "number_of_predictions": 4, + "is_filtered": False, } metrics = evaluator.evaluate( diff --git a/tests/object_detection/test_dataloader.py b/tests/object_detection/test_dataloader.py index a376b8563..b38a448b2 100644 --- a/tests/object_detection/test_dataloader.py +++ b/tests/object_detection/test_dataloader.py @@ -16,7 +16,8 @@ def test_no_data(): loader = DataLoader() - loader.finalize() + with pytest.warns(UserWarning): + loader.finalize() assert loader._detailed_pairs.size == 0 assert loader._ranked_pairs.size == 0 assert loader._label_metadata.size == 0 diff --git a/tests/object_detection/test_evaluator.py b/tests/object_detection/test_evaluator.py index 7015df0a9..c4a71a266 100644 --- a/tests/object_detection/test_evaluator.py +++ b/tests/object_detection/test_evaluator.py @@ -22,18 +22,19 @@ def test_metadata_using_torch_metrics_example( assert evaluator.ignored_prediction_labels == ["3"] assert evaluator.missing_prediction_labels == [] - assert evaluator.n_datums == 4 - assert evaluator.n_labels == 6 - assert evaluator.n_groundtruths == 20 - assert evaluator.n_predictions == 19 - - assert evaluator.metadata == { - "ignored_prediction_labels": ["3"], - "missing_prediction_labels": [], - "n_datums": 4, - "n_labels": 6, - "n_groundtruths": 20, - "n_predictions": 19, + assert evaluator.metadata.number_of_datums == 4 + assert evaluator.metadata.number_of_labels == 6 + assert evaluator.metadata.number_of_ground_truths == 20 + assert evaluator.metadata.number_of_predictions == 19 + + assert evaluator.ignored_prediction_labels == ["3"] + assert evaluator.missing_prediction_labels == [] + assert evaluator.metadata.to_dict() == { + "number_of_datums": 4, + "number_of_labels": 6, + "number_of_ground_truths": 20, + "number_of_predictions": 19, + "is_filtered": False, } @@ -80,15 +81,16 @@ def test_no_groundtruths(detections_no_groundtruths): assert evaluator.ignored_prediction_labels == ["v1"] assert evaluator.missing_prediction_labels == [] - assert evaluator.n_datums == 2 - assert evaluator.n_labels == 1 - assert evaluator.n_groundtruths == 0 - assert evaluator.n_predictions == 2 + assert evaluator.metadata.number_of_datums == 2 + assert evaluator.metadata.number_of_labels == 1 + assert evaluator.metadata.number_of_ground_truths == 0 + assert evaluator.metadata.number_of_predictions == 2 - metrics = evaluator.evaluate( - iou_thresholds=[0.5], - score_thresholds=[0.5], - ) + with pytest.warns(UserWarning): + metrics = evaluator.evaluate( + iou_thresholds=[0.5], + score_thresholds=[0.5], + ) assert len(metrics[MetricType.AP]) == 0 @@ -101,15 +103,16 @@ def test_no_predictions(detections_no_predictions): assert evaluator.ignored_prediction_labels == [] assert evaluator.missing_prediction_labels == ["v1"] - assert evaluator.n_datums == 2 - assert evaluator.n_labels == 1 - assert evaluator.n_groundtruths == 2 - assert evaluator.n_predictions == 0 + assert evaluator.metadata.number_of_datums == 2 + assert evaluator.metadata.number_of_labels == 1 + assert evaluator.metadata.number_of_ground_truths == 2 + assert evaluator.metadata.number_of_predictions == 0 - metrics = evaluator.evaluate( - iou_thresholds=[0.5], - score_thresholds=[0.5], - ) + with pytest.warns(UserWarning): + metrics = evaluator.evaluate( + iou_thresholds=[0.5], + score_thresholds=[0.5], + ) assert len(metrics[MetricType.AP]) == 1 diff --git a/tests/object_detection/test_f1.py b/tests/object_detection/test_f1.py index aa6de5b85..9e348b83e 100644 --- a/tests/object_detection/test_f1.py +++ b/tests/object_detection/test_f1.py @@ -74,10 +74,10 @@ def test_f1_metrics_first_class( assert evaluator.ignored_prediction_labels == [] assert evaluator.missing_prediction_labels == [] - assert evaluator.n_datums == 2 - assert evaluator.n_labels == 1 - assert evaluator.n_groundtruths == 2 - assert evaluator.n_predictions == 1 + assert evaluator.metadata.number_of_datums == 2 + assert evaluator.metadata.number_of_labels == 1 + assert evaluator.metadata.number_of_ground_truths == 2 + assert evaluator.metadata.number_of_predictions == 1 # test F1 actual_metrics = [m.to_dict() for m in metrics[MetricType.F1]] @@ -158,10 +158,10 @@ def test_f1_metrics_second_class( assert evaluator.ignored_prediction_labels == [] assert evaluator.missing_prediction_labels == [] - assert evaluator.n_datums == 2 - assert evaluator.n_labels == 1 - assert evaluator.n_groundtruths == 1 - assert evaluator.n_predictions == 1 + assert evaluator.metadata.number_of_datums == 2 + assert evaluator.metadata.number_of_labels == 1 + assert evaluator.metadata.number_of_ground_truths == 1 + assert evaluator.metadata.number_of_predictions == 1 # test F1 actual_metrics = [m.to_dict() for m in metrics[MetricType.F1]] diff --git a/tests/object_detection/test_filtering.py b/tests/object_detection/test_filtering.py index 1deb557f7..585e513c5 100644 --- a/tests/object_detection/test_filtering.py +++ b/tests/object_detection/test_filtering.py @@ -88,12 +88,13 @@ def test_filtering_one_detection(one_detection: list[Detection]): loader.add_bounding_boxes(one_detection) evaluator = loader.finalize() - assert (evaluator.label_metadata == np.array([[1, 1], [1, 0]])).all() + assert (evaluator._label_metadata == np.array([[1, 1], [1, 0]])).all() # test datum filtering - evaluator.apply_filter(datum_ids=["uid1"]) + filter_ = evaluator.create_filter(datum_ids=["uid1"]) + detailed_pairs, _, label_metadata = evaluator.filter(filter_) assert np.all( - evaluator.detailed_pairs + detailed_pairs == np.array( [ [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.3], @@ -102,7 +103,7 @@ def test_filtering_one_detection(one_detection: list[Detection]): ) ) assert ( - evaluator.label_metadata + label_metadata == np.array( [ [ @@ -115,37 +116,38 @@ def test_filtering_one_detection(one_detection: list[Detection]): ).all() with pytest.raises(KeyError) as e: - evaluator.apply_filter(datum_ids=["uid2"]) + filter_ = evaluator.create_filter(datum_ids=["uid2"]) + detailed_pairs, _, label_metadata = evaluator.filter(filter_) assert "uid2" in str(e) # test label filtering - evaluator.apply_filter(labels=["v1"]) + filter_ = evaluator.create_filter(labels=["v1"]) + detailed_pairs, _, label_metadata = evaluator.filter(filter_) assert np.all( - evaluator.detailed_pairs - == np.array([[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.3]]) + detailed_pairs == np.array([[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.3]]) ) - assert (evaluator.label_metadata == np.array([[1, 1], [0, 0]])).all() + assert (label_metadata == np.array([[1, 1], [0, 0]])).all() - evaluator.apply_filter(labels=["v2"]) + filter_ = evaluator.create_filter(labels=["v2"]) + detailed_pairs, _, label_metadata = evaluator.filter(filter_) assert np.all( - evaluator.detailed_pairs - == np.array([[0.0, 1.0, -1.0, 1.0, -1.0, 0.0, -1.0]]) + detailed_pairs == np.array([[0.0, 1.0, -1.0, 1.0, -1.0, 0.0, -1.0]]) ) - assert (evaluator.label_metadata == np.array([[0, 0], [1, 0]])).all() + assert (label_metadata == np.array([[0, 0], [1, 0]])).all() # test combo - evaluator.apply_filter( + filter_ = evaluator.create_filter( datum_ids=["uid1"], labels=["v1"], ) + detailed_pairs, _, label_metadata = evaluator.filter(filter_) assert np.all( - evaluator.detailed_pairs - == np.array([[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.3]]) + detailed_pairs == np.array([[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.3]]) ) - assert (evaluator.label_metadata == np.array([[1, 1], [0, 0]])).all() + assert (label_metadata == np.array([[1, 1], [0, 0]])).all() # test evaluation - evaluator.apply_filter(datum_ids=["uid1"]) + filter_ = evaluator.create_filter(datum_ids=["uid1"]) metrics = evaluator.evaluate(iou_thresholds=[0.5]) actual_metrics = [m.to_dict() for m in metrics[MetricType.AP]] expected_metrics = [ @@ -194,12 +196,13 @@ def test_filtering_two_detections(two_detections: list[Detection]): loader.add_bounding_boxes(two_detections) evaluator = loader.finalize() - assert (evaluator.label_metadata == np.array([[2, 1], [1, 1]])).all() + assert (evaluator._label_metadata == np.array([[2, 1], [1, 1]])).all() # test datum filtering - evaluator.apply_filter(datum_ids=["uid1"]) + filter_ = evaluator.create_filter(datum_ids=["uid1"]) + detailed_pairs, _, label_metadata = evaluator.filter(filter_) assert np.all( - evaluator.detailed_pairs + detailed_pairs == np.array( [ [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.3], @@ -207,15 +210,15 @@ def test_filtering_two_detections(two_detections: list[Detection]): ] ) ) - assert (evaluator.label_metadata == np.array([[1, 1], [1, 0]])).all() + assert (label_metadata == np.array([[1, 1], [1, 0]])).all() - evaluator.apply_filter(datum_ids=["uid2"]) + filter_ = evaluator.create_filter(datum_ids=["uid2"]) + detailed_pairs, _, label_metadata = evaluator.filter(filter_) assert np.all( - evaluator.detailed_pairs - == np.array([[1.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.98]]) + detailed_pairs == np.array([[1.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.98]]) ) assert ( - evaluator.label_metadata + label_metadata == np.array( [ [ @@ -231,9 +234,10 @@ def test_filtering_two_detections(two_detections: list[Detection]): ).all() # test label filtering - evaluator.apply_filter(labels=["v1"]) + filter_ = evaluator.create_filter(labels=["v1"]) + detailed_pairs, _, label_metadata = evaluator.filter(filter_) assert np.all( - evaluator.detailed_pairs + detailed_pairs == np.array( [ [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.3], @@ -241,11 +245,12 @@ def test_filtering_two_detections(two_detections: list[Detection]): ] ) ) - assert (evaluator.label_metadata == np.array([[2, 1], [0, 0]])).all() + assert (label_metadata == np.array([[2, 1], [0, 0]])).all() - evaluator.apply_filter(labels=["v2"]) + filter_ = evaluator.create_filter(labels=["v2"]) + detailed_pairs, _, label_metadata = evaluator.filter(filter_) assert np.all( - evaluator.detailed_pairs + detailed_pairs == np.array( [ [1.0, -1.0, 1.0, -1.0, 1.0, 0.0, 0.98], @@ -253,23 +258,22 @@ def test_filtering_two_detections(two_detections: list[Detection]): ] ) ) - assert (evaluator.label_metadata == np.array([[0, 0], [1, 1]])).all() + assert (label_metadata == np.array([[0, 0], [1, 1]])).all() # test combo - evaluator.apply_filter( + filter_ = evaluator.create_filter( datum_ids=["uid1"], labels=["v1"], ) + detailed_pairs, _, label_metadata = evaluator.filter(filter_) assert np.all( - evaluator.detailed_pairs - == np.array([[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.3]]) + detailed_pairs == np.array([[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.3]]) ) - assert (evaluator.label_metadata == np.array([[1, 1], [0, 0]])).all() + assert (label_metadata == np.array([[1, 1], [0, 0]])).all() # test evaluation - evaluator.apply_filter(datum_ids=["uid1"]) - metrics = evaluator.evaluate(iou_thresholds=[0.5]) - + filter_ = evaluator.create_filter(datum_ids=["uid1"]) + metrics = evaluator.evaluate(iou_thresholds=[0.5], filter_=filter_) actual_metrics = [m.to_dict() for m in metrics[MetricType.AP]] expected_metrics = [ { @@ -320,12 +324,13 @@ def test_filtering_four_detections(four_detections: list[Detection]): loader.add_bounding_boxes(four_detections) evaluator = loader.finalize() - assert (evaluator.label_metadata == np.array([[4, 2], [2, 2]])).all() + assert (evaluator._label_metadata == np.array([[4, 2], [2, 2]])).all() # test datum filtering - evaluator.apply_filter(datum_ids=["uid1"]) + filter_ = evaluator.create_filter(datum_ids=["uid1"]) + detailed_pairs, _, label_metadata = evaluator.filter(filter_) assert np.all( - evaluator.detailed_pairs + detailed_pairs == np.array( [ [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.3], @@ -333,19 +338,20 @@ def test_filtering_four_detections(four_detections: list[Detection]): ] ) ) - assert (evaluator.label_metadata == np.array([[1, 1], [1, 0]])).all() + assert (label_metadata == np.array([[1, 1], [1, 0]])).all() - evaluator.apply_filter(datum_ids=["uid2"]) + filter_ = evaluator.create_filter(datum_ids=["uid2"]) + detailed_pairs, _, label_metadata = evaluator.filter(filter_) assert np.all( - evaluator.detailed_pairs - == np.array([[1.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.98]]) + detailed_pairs == np.array([[1.0, 2.0, 1.0, 0.0, 1.0, 1.0, 0.98]]) ) - assert (evaluator.label_metadata == np.array([[1, 0], [0, 1]])).all() + assert (label_metadata == np.array([[1, 0], [0, 1]])).all() # test label filtering - evaluator.apply_filter(labels=["v1"]) + filter_ = evaluator.create_filter(labels=["v1"]) + detailed_pairs, _, label_metadata = evaluator.filter(filter_) assert np.all( - evaluator.detailed_pairs + detailed_pairs == np.array( [ [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.3], @@ -355,11 +361,12 @@ def test_filtering_four_detections(four_detections: list[Detection]): ] ) ) - assert (evaluator.label_metadata == np.array([[4, 2], [0, 0]])).all() + assert (label_metadata == np.array([[4, 2], [0, 0]])).all() - evaluator.apply_filter(labels=["v2"]) + filter_ = evaluator.create_filter(labels=["v2"]) + detailed_pairs, _, label_metadata = evaluator.filter(filter_) assert np.all( - evaluator.detailed_pairs + detailed_pairs == np.array( [ [1.0, -1.0, 1.0, -1.0, 1.0, 0.0, 0.98], @@ -369,19 +376,19 @@ def test_filtering_four_detections(four_detections: list[Detection]): ] ) ) - assert (evaluator.label_metadata == np.array([[0, 0], [2, 2]])).all() + assert (label_metadata == np.array([[0, 0], [2, 2]])).all() # test combo - evaluator.apply_filter( + filter_ = evaluator.create_filter( datum_ids=["uid1"], labels=["v1"], ) + detailed_pairs, _, label_metadata = evaluator.filter(filter_) assert np.all( - evaluator.detailed_pairs - == np.array([[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.3]]) + detailed_pairs == np.array([[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.3]]) ) assert ( - evaluator.label_metadata + label_metadata == np.array( [ [ @@ -397,8 +404,8 @@ def test_filtering_four_detections(four_detections: list[Detection]): ).all() # test evaluation - evaluator.apply_filter(datum_ids=["uid1"]) - metrics = evaluator.evaluate(iou_thresholds=[0.5]) + filter_ = evaluator.create_filter(datum_ids=["uid1"]) + metrics = evaluator.evaluate(iou_thresholds=[0.5], filter_=filter_) actual_metrics = [m.to_dict() for m in metrics[MetricType.AP]] expected_metrics = [ { @@ -449,13 +456,16 @@ def test_filtering_all_detections(four_detections: list[Detection]): loader.add_bounding_boxes(four_detections) evaluator = loader.finalize() - assert (evaluator.label_metadata == np.array([[4, 2], [2, 2]])).all() + assert (evaluator._label_metadata == np.array([[4, 2], [2, 2]])).all() # test datum filtering - evaluator.apply_filter(datum_ids=[]) - assert np.all(evaluator.detailed_pairs == np.array([])) + with pytest.warns(UserWarning): + filter_ = evaluator.create_filter(datum_ids=[]) + with pytest.warns(UserWarning): + detailed_pairs, _, label_metadata = evaluator.filter(filter_) + assert np.all(detailed_pairs == np.array([])) assert ( - evaluator.label_metadata + label_metadata == np.array( [ [ @@ -471,9 +481,12 @@ def test_filtering_all_detections(four_detections: list[Detection]): ).all() # test ground truth annotation filtering - evaluator.apply_filter(groundtruth_ids=[]) + with pytest.warns(UserWarning): + filter_ = evaluator.create_filter(groundtruth_ids=[]) + with pytest.warns(UserWarning): + detailed_pairs, _, label_metadata = evaluator.filter(filter_) assert np.all( - evaluator.detailed_pairs + detailed_pairs == np.array( [ [1.0, -1.0, 1.0, -1.0, 1.0, 0.0, 0.98], @@ -484,7 +497,7 @@ def test_filtering_all_detections(four_detections: list[Detection]): ) ) assert ( - evaluator.label_metadata + label_metadata == np.array( [ [0, 2], @@ -493,9 +506,10 @@ def test_filtering_all_detections(four_detections: list[Detection]): ) ).all() - evaluator.apply_filter(groundtruth_ids=["uid1_gt_0"]) + filter_ = evaluator.create_filter(groundtruth_ids=["uid1_gt_0"]) + detailed_pairs, _, label_metadata = evaluator.filter(filter_) assert np.all( - evaluator.detailed_pairs + detailed_pairs == np.array( [ [1.0, -1.0, 1.0, -1.0, 1.0, 0.0, 0.98], @@ -506,7 +520,7 @@ def test_filtering_all_detections(four_detections: list[Detection]): ) ) assert ( - evaluator.label_metadata + label_metadata == np.array( [ [1, 2], @@ -516,9 +530,12 @@ def test_filtering_all_detections(four_detections: list[Detection]): ).all() # test prediction annotation filtering - evaluator.apply_filter(prediction_ids=[]) + with pytest.warns(UserWarning): + filter_ = evaluator.create_filter(prediction_ids=[]) + with pytest.warns(UserWarning): + detailed_pairs, _, label_metadata = evaluator.filter(filter_) assert np.all( - evaluator.detailed_pairs + detailed_pairs == np.array( [ [0.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0], @@ -531,7 +548,7 @@ def test_filtering_all_detections(four_detections: list[Detection]): ) ) assert ( - evaluator.label_metadata + label_metadata == np.array( [ [4, 0], @@ -540,9 +557,10 @@ def test_filtering_all_detections(four_detections: list[Detection]): ) ).all() - evaluator.apply_filter(prediction_ids=["uid1_pd_0"]) + filter_ = evaluator.create_filter(prediction_ids=["uid1_pd_0"]) + detailed_pairs, _, label_metadata = evaluator.filter(filter_) assert np.all( - evaluator.detailed_pairs + detailed_pairs == np.array( [ [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.3], @@ -555,7 +573,7 @@ def test_filtering_all_detections(four_detections: list[Detection]): ) ) assert ( - evaluator.label_metadata + label_metadata == np.array( [ [4, 1], @@ -565,10 +583,13 @@ def test_filtering_all_detections(four_detections: list[Detection]): ).all() # test label filtering - evaluator.apply_filter(labels=[]) - assert np.all(evaluator.detailed_pairs == np.array([])) + with pytest.warns(UserWarning): + filter_ = evaluator.create_filter(labels=[]) + with pytest.warns(UserWarning): + detailed_pairs, _, label_metadata = evaluator.filter(filter_) + assert np.all(detailed_pairs == np.array([])) assert ( - evaluator.label_metadata + label_metadata == np.array( [ [ @@ -584,13 +605,16 @@ def test_filtering_all_detections(four_detections: list[Detection]): ).all() # test combo - evaluator.apply_filter( - datum_ids=[], - labels=["v1"], - ) - assert np.all(evaluator.detailed_pairs == np.array([])) + with pytest.warns(UserWarning): + filter_ = evaluator.create_filter( + datum_ids=[], + labels=["v1"], + ) + with pytest.warns(UserWarning): + detailed_pairs, _, label_metadata = evaluator.filter(filter_) + assert np.all(detailed_pairs == np.array([])) assert ( - evaluator.label_metadata + label_metadata == np.array( [ [ @@ -603,13 +627,16 @@ def test_filtering_all_detections(four_detections: list[Detection]): ).all() # test evaluation - evaluator.apply_filter(datum_ids=[]) - metrics = evaluator.evaluate(iou_thresholds=[0.5]) - evaluator.compute_confusion_matrix( - iou_thresholds=[0.5], - score_thresholds=[0.5], - ) - + with pytest.warns(UserWarning): + filter_ = evaluator.create_filter(datum_ids=[]) + with pytest.warns(UserWarning): + metrics = evaluator.evaluate(iou_thresholds=[0.5], filter_=filter_) + with pytest.warns(UserWarning): + evaluator.compute_confusion_matrix( + iou_thresholds=[0.5], + score_thresholds=[0.5], + filter_=filter_, + ) actual_metrics = [m.to_dict() for m in metrics[MetricType.AP]] assert len(actual_metrics) == 0 @@ -618,8 +645,8 @@ def test_filtering_random_detections(): loader = DataLoader() loader.add_bounding_boxes(_generate_random_detections(13, 4, "abc")) evaluator = loader.finalize() - evaluator.apply_filter(datum_ids=["uid1"]) - evaluator.evaluate() + filter_ = evaluator.create_filter(datum_ids=["uid1"]) + evaluator.evaluate(filter_=filter_) def test_is_filtered(basic_detections: list[Detection]): @@ -627,22 +654,7 @@ def test_is_filtered(basic_detections: list[Detection]): manager.add_bounding_boxes(basic_detections) evaluator = manager.finalize() - assert evaluator.is_filtered is False - assert evaluator._filtered_detailed_pairs is None - assert evaluator._filtered_ranked_pairs is None - assert evaluator._filtered_label_metadata is None - - evaluator.apply_filter(datum_ids=["uid1"]) - assert evaluator.is_filtered is True - assert evaluator._filtered_detailed_pairs is not None - assert evaluator._filtered_detailed_pairs.shape == (2, 7) - assert evaluator._filtered_ranked_pairs is not None - assert evaluator._filtered_ranked_pairs.shape == (1, 7) - assert evaluator._filtered_label_metadata is not None - assert evaluator._filtered_label_metadata.shape == (2, 2) - - evaluator.clear_filter() - assert evaluator.is_filtered is False - assert evaluator._filtered_detailed_pairs is None - assert evaluator._filtered_ranked_pairs is None - assert evaluator._filtered_label_metadata is None + assert evaluator.metadata.is_filtered is False + + filter_ = evaluator.create_filter(datum_ids=["uid1"]) + assert filter_.metadata.is_filtered is True diff --git a/tests/object_detection/test_pr_curve.py b/tests/object_detection/test_pr_curve.py index fc1aa295d..0298c4adf 100644 --- a/tests/object_detection/test_pr_curve.py +++ b/tests/object_detection/test_pr_curve.py @@ -51,10 +51,10 @@ def test_pr_curve_using_torch_metrics_example( assert evaluator.ignored_prediction_labels == ["3"] assert evaluator.missing_prediction_labels == [] - assert evaluator.n_datums == 4 - assert evaluator.n_labels == 6 - assert evaluator.n_groundtruths == 20 - assert evaluator.n_predictions == 19 + assert evaluator.metadata.number_of_datums == 4 + assert evaluator.metadata.number_of_labels == 6 + assert evaluator.metadata.number_of_ground_truths == 20 + assert evaluator.metadata.number_of_predictions == 19 metrics = evaluator.evaluate( iou_thresholds=[0.5, 0.75], diff --git a/tests/object_detection/test_precision.py b/tests/object_detection/test_precision.py index a6ccb7196..eda0c2b43 100644 --- a/tests/object_detection/test_precision.py +++ b/tests/object_detection/test_precision.py @@ -75,10 +75,10 @@ def test_precision_metrics_first_class( assert evaluator.ignored_prediction_labels == [] assert evaluator.missing_prediction_labels == [] - assert evaluator.n_datums == 2 - assert evaluator.n_labels == 1 - assert evaluator.n_groundtruths == 2 - assert evaluator.n_predictions == 1 + assert evaluator.metadata.number_of_datums == 2 + assert evaluator.metadata.number_of_labels == 1 + assert evaluator.metadata.number_of_ground_truths == 2 + assert evaluator.metadata.number_of_predictions == 1 # test Precision actual_metrics = [m.to_dict() for m in metrics[MetricType.Precision]] @@ -159,10 +159,10 @@ def test_precision_metrics_second_class( assert evaluator.ignored_prediction_labels == [] assert evaluator.missing_prediction_labels == [] - assert evaluator.n_datums == 2 - assert evaluator.n_labels == 1 - assert evaluator.n_groundtruths == 1 - assert evaluator.n_predictions == 1 + assert evaluator.metadata.number_of_datums == 2 + assert evaluator.metadata.number_of_labels == 1 + assert evaluator.metadata.number_of_ground_truths == 1 + assert evaluator.metadata.number_of_predictions == 1 # test Precision actual_metrics = [m.to_dict() for m in metrics[MetricType.Precision]] diff --git a/tests/object_detection/test_recall.py b/tests/object_detection/test_recall.py index 52a0a99c4..81a146356 100644 --- a/tests/object_detection/test_recall.py +++ b/tests/object_detection/test_recall.py @@ -73,10 +73,10 @@ def test_recall_metrics_first_class( assert evaluator.ignored_prediction_labels == [] assert evaluator.missing_prediction_labels == [] - assert evaluator.n_datums == 2 - assert evaluator.n_labels == 1 - assert evaluator.n_groundtruths == 2 - assert evaluator.n_predictions == 1 + assert evaluator.metadata.number_of_datums == 2 + assert evaluator.metadata.number_of_labels == 1 + assert evaluator.metadata.number_of_ground_truths == 2 + assert evaluator.metadata.number_of_predictions == 1 # test Recall actual_metrics = [m.to_dict() for m in metrics[MetricType.Recall]] @@ -157,10 +157,10 @@ def test_recall_metrics_second_class( assert evaluator.ignored_prediction_labels == [] assert evaluator.missing_prediction_labels == [] - assert evaluator.n_datums == 2 - assert evaluator.n_labels == 1 - assert evaluator.n_groundtruths == 1 - assert evaluator.n_predictions == 1 + assert evaluator.metadata.number_of_datums == 2 + assert evaluator.metadata.number_of_labels == 1 + assert evaluator.metadata.number_of_ground_truths == 1 + assert evaluator.metadata.number_of_predictions == 1 # test Recall actual_metrics = [m.to_dict() for m in metrics[MetricType.Recall]] diff --git a/tests/object_detection/test_stability.py b/tests/object_detection/test_stability.py index 021b590ba..3ce06d39a 100644 --- a/tests/object_detection/test_stability.py +++ b/tests/object_detection/test_stability.py @@ -75,10 +75,11 @@ def test_fuzz_detections_with_filtering(): datum_subset = [f"uid{i}" for i in range(len(detections) // 2)] - evaluator.apply_filter(datum_ids=datum_subset) + filter_ = evaluator.create_filter(datum_ids=datum_subset) evaluator.evaluate( iou_thresholds=[0.25, 0.75], score_thresholds=[0.25, 0.75], + filter_=filter_, ) @@ -87,13 +88,14 @@ def test_fuzz_confusion_matrix(): loader = DataLoader() loader.add_bounding_boxes(dets) evaluator = loader.finalize() - assert evaluator.metadata == { - "ignored_prediction_labels": [], - "missing_prediction_labels": [], - "n_datums": 1000, - "n_groundtruths": 30000, - "n_predictions": 30000, - "n_labels": 5, + assert evaluator.ignored_prediction_labels == [] + assert evaluator.missing_prediction_labels == [] + assert evaluator.metadata.to_dict() == { + "number_of_datums": 1000, + "number_of_ground_truths": 30000, + "number_of_predictions": 30000, + "number_of_labels": 5, + "is_filtered": False, } evaluator.evaluate( iou_thresholds=[0.25, 0.75], From 1447b17350a9a2090649d3415f0f106511df1410 Mon Sep 17 00:00:00 2001 From: Charles Zaloom Date: Wed, 28 May 2025 17:16:11 -0400 Subject: [PATCH 2/3] fix benchmarks --- benchmarks/benchmark_objdet.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/benchmarks/benchmark_objdet.py b/benchmarks/benchmark_objdet.py index def9264fa..62b4e2f01 100644 --- a/benchmarks/benchmark_objdet.py +++ b/benchmarks/benchmark_objdet.py @@ -322,7 +322,7 @@ def run_benchmarking_analysis( ) if eval_time > evaluation_timeout and evaluation_timeout != -1: raise TimeoutError( - f"Base evaluation timed out with {evaluator.n_datums} datums." + f"Base evaluation timed out with {evaluator.metadata.number_of_datums} datums." ) # evaluate - base metrics + detailed @@ -337,16 +337,16 @@ def run_benchmarking_analysis( and evaluation_timeout != -1 ): raise TimeoutError( - f"Detailed evaluation timed out with {evaluator.n_datums} datums." + f"Detailed evaluation timed out with {evaluator.metadata.number_of_datums} datums." ) results.append( Benchmark( limit=limit, - n_datums=evaluator.n_datums, - n_groundtruths=evaluator.n_groundtruths, - n_predictions=evaluator.n_predictions, - n_labels=evaluator.n_labels, + n_datums=evaluator.metadata.number_of_datums, + n_groundtruths=evaluator.metadata.number_of_ground_truths, + n_predictions=evaluator.metadata.number_of_predictions, + n_labels=evaluator.metadata.number_of_labels, gt_type=gt_type, pd_type=pd_type, chunk_size=chunk_size, From 22631485213b0cc1893b010e00ad9dc3f588a6b6 Mon Sep 17 00:00:00 2001 From: Charles Zaloom Date: Wed, 28 May 2025 17:22:22 -0400 Subject: [PATCH 3/3] removed is_filtered --- src/valor_lite/object_detection/manager.py | 9 ++------- tests/object_detection/test_average_precision.py | 1 - tests/object_detection/test_average_recall.py | 1 - tests/object_detection/test_confusion_matrix.py | 1 - tests/object_detection/test_counts.py | 1 - tests/object_detection/test_evaluator.py | 1 - tests/object_detection/test_filtering.py | 15 ++++++++++++--- tests/object_detection/test_stability.py | 1 - 8 files changed, 14 insertions(+), 16 deletions(-) diff --git a/src/valor_lite/object_detection/manager.py b/src/valor_lite/object_detection/manager.py index 0e060da77..fd36af8d0 100644 --- a/src/valor_lite/object_detection/manager.py +++ b/src/valor_lite/object_detection/manager.py @@ -54,7 +54,6 @@ class Metadata: number_of_ground_truths: int = 0 number_of_predictions: int = 0 number_of_labels: int = 0 - is_filtered: bool = False @classmethod def create( @@ -62,7 +61,6 @@ def create( detailed_pairs: NDArray[np.float64], number_of_datums: int, number_of_labels: int, - is_filtered: bool, ): # count number of ground truths mask_valid_gts = detailed_pairs[:, 1] >= 0 @@ -83,7 +81,6 @@ def create( number_of_ground_truths=number_of_ground_truths, number_of_predictions=number_of_predictions, number_of_labels=number_of_labels, - is_filtered=is_filtered, ) def to_dict(self) -> dict[str, int | bool]: @@ -507,7 +504,6 @@ def finalize(self): detailed_pairs=self._detailed_pairs, number_of_datums=n_datums, number_of_labels=n_labels, - is_filtered=False, ) return self @@ -542,7 +538,7 @@ def create_filter( mask_datums=np.zeros_like(mask_datums), mask_groundtruths=np.array([], dtype=np.bool_), mask_predictions=np.array([], dtype=np.bool_), - metadata=Metadata(is_filtered=True), + metadata=Metadata(), ) valid_datum_indices = np.array( [self.datum_id_to_index[uid] for uid in datum_ids], @@ -597,7 +593,7 @@ def create_filter( mask_datums=mask_datums, mask_groundtruths=np.ones_like(mask_datums), mask_predictions=np.ones_like(mask_datums), - metadata=Metadata(is_filtered=True), + metadata=Metadata(), ) valid_label_indices = np.array( [self.label_to_index[label] for label in labels] + [-1] @@ -631,7 +627,6 @@ def create_filter( detailed_pairs=filtered_detailed_pairs, number_of_datums=number_of_datums, number_of_labels=len(self.index_to_label), - is_filtered=True, ), ) diff --git a/tests/object_detection/test_average_precision.py b/tests/object_detection/test_average_precision.py index c684624d7..3736287b1 100644 --- a/tests/object_detection/test_average_precision.py +++ b/tests/object_detection/test_average_precision.py @@ -679,7 +679,6 @@ def test_ap_ranked_pair_ordering( "number_of_ground_truths": 3, "number_of_labels": 4, "number_of_predictions": 4, - "is_filtered": False, } metrics = evaluator.evaluate( diff --git a/tests/object_detection/test_average_recall.py b/tests/object_detection/test_average_recall.py index 130ae1597..43078a254 100644 --- a/tests/object_detection/test_average_recall.py +++ b/tests/object_detection/test_average_recall.py @@ -544,7 +544,6 @@ def test_ar_ranked_pair_ordering( "number_of_ground_truths": 3, "number_of_labels": 4, "number_of_predictions": 4, - "is_filtered": False, } metrics = evaluator.evaluate( diff --git a/tests/object_detection/test_confusion_matrix.py b/tests/object_detection/test_confusion_matrix.py index a0de5aa5b..a96591498 100644 --- a/tests/object_detection/test_confusion_matrix.py +++ b/tests/object_detection/test_confusion_matrix.py @@ -1530,7 +1530,6 @@ def test_confusion_matrix_ranked_pair_ordering( "number_of_ground_truths": 3, "number_of_labels": 4, "number_of_predictions": 4, - "is_filtered": False, } actual_metrics = evaluator.compute_confusion_matrix( diff --git a/tests/object_detection/test_counts.py b/tests/object_detection/test_counts.py index 03c3f0a09..a3755aa23 100644 --- a/tests/object_detection/test_counts.py +++ b/tests/object_detection/test_counts.py @@ -529,7 +529,6 @@ def test_counts_ranked_pair_ordering( "number_of_ground_truths": 3, "number_of_labels": 4, "number_of_predictions": 4, - "is_filtered": False, } metrics = evaluator.evaluate( diff --git a/tests/object_detection/test_evaluator.py b/tests/object_detection/test_evaluator.py index c4a71a266..8dd56e893 100644 --- a/tests/object_detection/test_evaluator.py +++ b/tests/object_detection/test_evaluator.py @@ -34,7 +34,6 @@ def test_metadata_using_torch_metrics_example( "number_of_labels": 6, "number_of_ground_truths": 20, "number_of_predictions": 19, - "is_filtered": False, } diff --git a/tests/object_detection/test_filtering.py b/tests/object_detection/test_filtering.py index 585e513c5..1c1e101db 100644 --- a/tests/object_detection/test_filtering.py +++ b/tests/object_detection/test_filtering.py @@ -649,12 +649,21 @@ def test_filtering_random_detections(): evaluator.evaluate(filter_=filter_) -def test_is_filtered(basic_detections: list[Detection]): +def test_filter_metadata(basic_detections: list[Detection]): manager = DataLoader() manager.add_bounding_boxes(basic_detections) evaluator = manager.finalize() - assert evaluator.metadata.is_filtered is False + assert evaluator.metadata.number_of_datums == 2 + assert evaluator.metadata.number_of_ground_truths == 3 + assert evaluator.metadata.number_of_predictions == 2 filter_ = evaluator.create_filter(datum_ids=["uid1"]) - assert filter_.metadata.is_filtered is True + assert filter_.metadata.number_of_datums == 1 + assert filter_.metadata.number_of_ground_truths == 2 + assert filter_.metadata.number_of_predictions == 1 + + assert ( + evaluator.metadata.number_of_labels + == filter_.metadata.number_of_labels + ) diff --git a/tests/object_detection/test_stability.py b/tests/object_detection/test_stability.py index 3ce06d39a..a850b4511 100644 --- a/tests/object_detection/test_stability.py +++ b/tests/object_detection/test_stability.py @@ -95,7 +95,6 @@ def test_fuzz_confusion_matrix(): "number_of_ground_truths": 30000, "number_of_predictions": 30000, "number_of_labels": 5, - "is_filtered": False, } evaluator.evaluate( iou_thresholds=[0.25, 0.75],