Skip to content

Commit 411dc4d

Browse files
Fix/anomaly scores normalization consistency (#156)
* fix: anomaly scores normalization by ensuring classification with raw and normalized scores * fix: device in ensuring predictions after normalization * fix: upgrade anomalib * fix: ensure_scores_consistency now select minimum between nextafter and 1e-3 for avoiding inconsistencies after rounding
1 parent 24bc19f commit 411dc4d

7 files changed

Lines changed: 413 additions & 47 deletions

File tree

CHANGELOG.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,18 @@ Starting from version 2.6.1, releases are automatically created when changes are
1414

1515
**Note**: If a tag for the current version already exists, the workflow will skip tag and release creation to avoid duplicates.
1616

17+
### [2.8.1]
18+
19+
#### Updated
20+
21+
- Anomalib-orobix to v0.7.0.dev151 in order to make optimal threshold selection more robust with respect to floating point operations.
22+
23+
#### Fixed
24+
25+
- `normalize_anomaly_score` now accepts an optional `eval_threshold` (`EvalThreshold`) parameter. When provided, consistency enforcement uses the actual evaluation boundary instead of always using the training threshold at 100.0, preventing misclassification of samples whose raw score falls close to the evaluation thresholds.
26+
- Consistency enforcement in anomaly score normalization now uses `np.nextafter`/`torch.nextafter` (dtype-aware) instead of hardcoded epsilon values, eliminating ULP-gap misclassifications especially at low-precision (fp16) boundaries.
27+
- `AnomalibEvaluation` now builds an `EvalThreshold` from the optimal evaluation threshold and passes it to `normalize_anomaly_score`, ensuring consistent predictions between raw and normalized anomaly scores and anomaly maps.
28+
1729
### [2.8.0]
1830

1931
#### Added

poetry.lock

Lines changed: 4 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "quadra"
3-
version = "2.8.0"
3+
version = "2.8.1"
44
description = "Deep Learning experiment orchestration library"
55
authors = [
66
"Federico Belotti <federico.belotti@orobix.com>",
@@ -73,7 +73,7 @@ h5py = "~3.8"
7373
timm = "1.0.24"
7474
segmentation_models_pytorch = "0.5.0"
7575

76-
anomalib-orobix = "0.7.0.dev150"
76+
anomalib-orobix = "0.7.0.dev151"
7777
xxhash = "~3.2"
7878
torchinfo = "~1.8"
7979
typing_extensions = { version = "4.11.0", python = "<3.10" }

quadra/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
__version__ = "2.8.0"
1+
__version__ = "2.8.1"
22

33

44
def get_version():

quadra/tasks/anomaly.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
from quadra.modules.base import ModelSignatureWrapper
2727
from quadra.tasks.base import Evaluation, LightningTask
2828
from quadra.utils import utils
29-
from quadra.utils.anomaly import MapOrValue, ThresholdNormalizationCallback, normalize_anomaly_score
29+
from quadra.utils.anomaly import EvalThreshold, MapOrValue, ThresholdNormalizationCallback, normalize_anomaly_score
3030
from quadra.utils.classification import get_results
3131
from quadra.utils.evaluation import automatic_datamodule_batch_size
3232
from quadra.utils.export import export_model
@@ -504,7 +504,12 @@ def generate_report(self) -> None:
504504
),
505505
).item()
506506

507-
anomaly_scores = normalize_anomaly_score(anomaly_scores, training_threshold)
507+
# Build an EvalThreshold so that consistency enforcement in normalize_anomaly_score uses the
508+
# actual evaluation boundary for checking the consistencies after normalization. This prevents
509+
# potential inconsistent classification when switching between raw and normalized scores.
510+
eval_threshold = EvalThreshold(raw=float(optimal_threshold), normalized=normalized_optimal_threshold)
511+
512+
anomaly_scores = normalize_anomaly_score(anomaly_scores, training_threshold, eval_threshold=eval_threshold)
508513

509514
if not isinstance(anomaly_scores, np.ndarray):
510515
raise ValueError("Anomaly scores must be a numpy array")
@@ -543,7 +548,9 @@ def generate_report(self) -> None:
543548
if hasattr(self.datamodule, "crop_area") and self.datamodule.crop_area is not None:
544549
crop_area = self.datamodule.crop_area
545550

546-
anomaly_maps = normalize_anomaly_score(self.metadata["anomaly_maps"], training_threshold)
551+
anomaly_maps = normalize_anomaly_score(
552+
self.metadata["anomaly_maps"], training_threshold, eval_threshold=eval_threshold
553+
)
547554

548555
if not isinstance(anomaly_maps, torch.Tensor):
549556
raise ValueError("Anomaly maps must be a tensor")

quadra/utils/anomaly.py

Lines changed: 107 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -20,64 +20,144 @@
2020
import pytorch_lightning as pl
2121
import torch # pylint: disable=unused-import
2222
from anomalib.models.components import AnomalyModule
23+
from pydantic import BaseModel
2324
from pytorch_lightning import Callback
2425
from pytorch_lightning.utilities.types import STEP_OUTPUT
2526

2627
# https://github.com/python/cpython/issues/90015#issuecomment-1172996118
2728
MapOrValue: TypeAlias = "float | torch.Tensor | np.ndarray"
2829

2930

30-
def normalize_anomaly_score(raw_score: MapOrValue, threshold: float) -> MapOrValue:
31-
"""Normalize anomaly score value or map based on threshold.
31+
class EvalThreshold(BaseModel):
32+
"""Pair of raw and normalized threshold values used for consistency enforcement.
33+
34+
Attributes:
35+
raw: The unnormalized threshold.
36+
normalized: The corresponding normalized threshold.
37+
"""
38+
39+
raw: float
40+
normalized: float
41+
42+
43+
def ensure_scores_consistency(
44+
normalized_score: MapOrValue,
45+
raw_score: MapOrValue,
46+
eval_threshold: EvalThreshold,
47+
) -> MapOrValue:
48+
"""Enforce that the classification based on normalized scores matches the raw classification.
49+
50+
For every sample, if `raw_score >= eval_threshold.raw` (anomaly), the normalized score is
51+
clipped to be at least `eval_threshold.normalized`. If `raw_score < eval_threshold.raw`
52+
(normal), the normalized score is clipped to be strictly below `eval_threshold.normalized`
53+
using `np.nextafter` so that no hard-coded epsilon is required.
3254
3355
Args:
34-
raw_score: Raw anomaly score valure or map
35-
threshold: Threshold for anomaly detection
56+
normalized_score: Normalized anomaly score value or map to adjust.
57+
raw_score: Original (unnormalized) anomaly score used to determine the ground-truth
58+
classification for each sample.
59+
eval_threshold: Threshold pair defining the decision boundary in both spaces.
3660
3761
Returns:
38-
Normalized anomaly score value or map clipped between 0 and 1000
62+
Normalized score with consistent predictions.
3963
"""
40-
if threshold > 0:
41-
normalized_score = (raw_score / threshold) * 100.0
42-
elif threshold == 0:
43-
# TODO: Is this the best way to handle this case?
44-
normalized_score = (raw_score + 1) * 100.0
45-
else:
46-
normalized_score = 200.0 - ((raw_score / threshold) * 100.0)
47-
48-
# Ensures that the normalized scores are consistent with the raw scores
49-
# For all the items whose prediction changes after normalization, force the normalized score to be
50-
# consistent with the prediction made on the raw score by clipping the score:
51-
# - to 100.0 if the prediction was "anomaly" on the raw score and "good" on the normalized score
52-
# - to 99.99 if the prediction was "good" on the raw score and "anomaly" on the normalized score
5364
score = raw_score
5465
if isinstance(score, torch.Tensor):
5566
score = score.cpu().numpy()
56-
# Anomalib classify as anomaly if anomaly_score gte threshold
57-
is_anomaly_mask = score >= threshold
67+
68+
boundary = eval_threshold.normalized
69+
is_anomaly_mask = score >= eval_threshold.raw
5870
is_not_anomaly_mask = np.bitwise_not(is_anomaly_mask)
71+
72+
_inf: torch.Tensor | np.ndarray
73+
below_boundary: torch.Tensor | np.ndarray
74+
anomaly_boundary: torch.Tensor | np.ndarray
75+
epsilon = 1e-3
5976
if isinstance(normalized_score, torch.Tensor):
77+
device = normalized_score.device
78+
# Work in scores dtype, cast boundaries to the same dype to ensure that casts take effect
79+
_inf = torch.tensor(float("inf"), dtype=normalized_score.dtype, device=device)
80+
boundary_tensor = torch.tensor(boundary, dtype=normalized_score.dtype, device=device)
81+
anomaly_boundary = boundary_tensor.clone()
82+
# If dtype cast causes anomaly_boundary to be smaller than normalized boundary (float),
83+
# increase it up to the next representable value
84+
if float(anomaly_boundary) < boundary:
85+
anomaly_boundary = torch.nextafter(anomaly_boundary, _inf)
86+
# Ensure consistency after rouding to 3 decimal places
87+
below_boundary = torch.min(torch.nextafter(boundary_tensor, -_inf), boundary_tensor - epsilon)
88+
6089
if normalized_score.dim() == 0:
6190
normalized_score = (
62-
normalized_score.clamp(min=100.0) if is_anomaly_mask else normalized_score.clamp(max=99.99)
91+
normalized_score.clamp(min=anomaly_boundary)
92+
if is_anomaly_mask
93+
else normalized_score.clamp(max=below_boundary)
6394
)
6495
else:
65-
normalized_score[is_anomaly_mask] = normalized_score[is_anomaly_mask].clamp(min=100.0)
66-
normalized_score[is_not_anomaly_mask] = normalized_score[is_not_anomaly_mask].clamp(max=99.99)
96+
normalized_score[is_anomaly_mask] = normalized_score[is_anomaly_mask].clamp(min=anomaly_boundary)
97+
normalized_score[is_not_anomaly_mask] = normalized_score[is_not_anomaly_mask].clamp(max=below_boundary)
6798
elif isinstance(normalized_score, np.ndarray) or np.isscalar(normalized_score):
99+
# Work in scores dtype, cast boundaries to the same dype to ensure that casts take effect
100+
dtype = normalized_score.dtype if isinstance(normalized_score, np.ndarray) else np.float64
101+
_inf = np.array(np.inf, dtype=dtype)
102+
boundary_array = np.array(boundary, dtype=dtype)
103+
anomaly_boundary = boundary_array.copy()
104+
# If dtype cast causes anomaly_boundary to be smaller than normalized boundary (float),
105+
# increase it up to the next representable value
106+
if float(anomaly_boundary) < boundary:
107+
anomaly_boundary = np.nextafter(anomaly_boundary, _inf)
108+
# Ensure consistency after rouding to 3 decimal places
109+
below_boundary = np.minimum(np.nextafter(boundary_array, -_inf), boundary_array - epsilon)
110+
68111
if np.isscalar(normalized_score) or normalized_score.ndim == 0: # type: ignore[union-attr]
69112
normalized_score = (
70-
np.clip(normalized_score, a_min=100.0, a_max=None)
113+
np.clip(normalized_score, a_min=anomaly_boundary, a_max=None)
71114
if is_anomaly_mask
72-
else np.clip(normalized_score, a_min=None, a_max=99.99)
115+
else np.clip(normalized_score, a_min=None, a_max=below_boundary)
73116
)
74117
else:
75118
normalized_score = cast(np.ndarray, normalized_score)
76-
normalized_score[is_anomaly_mask] = np.clip(normalized_score[is_anomaly_mask], a_min=100.0, a_max=None)
119+
normalized_score[is_anomaly_mask] = np.clip(
120+
normalized_score[is_anomaly_mask], a_min=anomaly_boundary, a_max=None
121+
)
77122
normalized_score[is_not_anomaly_mask] = np.clip(
78-
normalized_score[is_not_anomaly_mask], a_min=None, a_max=99.99
123+
normalized_score[is_not_anomaly_mask], a_min=None, a_max=below_boundary
79124
)
80125

126+
return normalized_score
127+
128+
129+
def normalize_anomaly_score(
130+
raw_score: MapOrValue,
131+
threshold: float,
132+
eval_threshold: EvalThreshold | None = None,
133+
) -> MapOrValue:
134+
"""Normalize anomaly score value or map based on threshold.
135+
136+
The training threshold maps to 100.0 in normalized space. After the linear scaling,
137+
`ensure_scores_consistency` is called to guarantee that every sample's normalized
138+
classification matches its raw classification.
139+
140+
Args:
141+
raw_score: Raw anomaly score value or map.
142+
threshold: Threshold for anomaly detection, usually it is the training threshold.
143+
eval_threshold: Threshold used during evaluation. It is used for ensure consistency of raw scores
144+
and normalized scores. When `None`, an `EvalThreshold` with `raw=threshold` and `normalized=100.0` is used,
145+
which reproduces the original behaviour for the training-threshold case.
146+
147+
Returns:
148+
Normalized anomaly score value or map clipped between 0 and 1000
149+
"""
150+
if threshold > 0:
151+
normalized_score = (raw_score / threshold) * 100.0
152+
elif threshold == 0:
153+
# TODO: Is this the best way to handle this case?
154+
normalized_score = (raw_score + 1) * 100.0
155+
else:
156+
normalized_score = 200.0 - ((raw_score / threshold) * 100.0)
157+
158+
_eval_threshold = eval_threshold if eval_threshold is not None else EvalThreshold(raw=threshold, normalized=100.0)
159+
normalized_score = ensure_scores_consistency(normalized_score, raw_score, _eval_threshold)
160+
81161
if isinstance(normalized_score, torch.Tensor):
82162
return torch.clamp(normalized_score, 0.0, 1000.0)
83163

0 commit comments

Comments
 (0)