Skip to content

Commit 97799e0

Browse files
ram-senthaayush-se
andauthored
chore(anomaly-detection): add power transformation and z-score based scorer (#1837)
Co-authored-by: Aayush Seth <[email protected]>
1 parent b9aa672 commit 97799e0

File tree

9 files changed

+709
-97
lines changed

9 files changed

+709
-97
lines changed

src/seer/anomaly_detection/detectors/__init__.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
from seer.anomaly_detection.detectors import (
22
anomaly_detectors,
3+
mp_boxcox_scorer,
4+
mp_cascading_scorer,
35
mp_scorers,
46
mp_utils,
57
normalizers,
@@ -15,8 +17,10 @@
1517
SuSSWindowSizeSelector = window_size_selectors.SuSSWindowSizeSelector
1618
FlagsAndScores = mp_scorers.FlagsAndScores
1719
MPScorer = mp_scorers.MPScorer
18-
MPCascadingScorer = mp_scorers.MPCascadingScorer
19-
20+
MPCascadingScorer = mp_cascading_scorer.MPCascadingScorer
21+
LowVarianceScorer = mp_scorers.LowVarianceScorer
22+
MPBoxCoxScorer = mp_boxcox_scorer.MPBoxCoxScorer
23+
MPIQRScorer = mp_scorers.MPIQRScorer
2024
Normalizer = normalizers.Normalizer
2125
MinMaxNormalizer = normalizers.MinMaxNormalizer
2226

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,268 @@
1+
import datetime
2+
from typing import Dict, List, Tuple
3+
4+
import numpy as np
5+
import numpy.typing as npt
6+
import sentry_sdk
7+
from pydantic import Field
8+
from scipy import special, stats
9+
10+
from seer.anomaly_detection.detectors.location_detectors import LocationDetector
11+
from seer.anomaly_detection.detectors.mp_scorers import FlagsAndScores, MPScorer
12+
from seer.anomaly_detection.models import (
13+
AlgoConfig,
14+
AnomalyDetectionConfig,
15+
AnomalyFlags,
16+
Directions,
17+
PointLocation,
18+
Sensitivities,
19+
Threshold,
20+
ThresholdType,
21+
)
22+
from seer.dependency_injection import inject, injected
23+
from seer.exceptions import ClientError, ServerError
24+
25+
26+
class MPBoxCoxScorer(MPScorer):
27+
"""
28+
This class implements a scoring method for detecting anomalies in time series data using the Box-Cox transformation.
29+
The Box-Cox transformation is applied to normalize the data, followed by z-score based anomaly detection.
30+
"""
31+
32+
z_score_thresholds: Dict[Sensitivities, float] = Field(
33+
{
34+
"high": 1.28, # 90% confidence interval
35+
"medium": 1.64, # 95% confidence interval
36+
"low": 2.32, # 99% confidence interval
37+
},
38+
description="Z-score thresholds for different sensitivity levels",
39+
)
40+
41+
def _inverse_box_cox_transform(self, x: float, bc_lambda: float, min_val: float) -> float:
42+
"""Apply inverse Box-Cox transformation to return data to original scale.
43+
44+
Parameters:
45+
x: The Box-Cox transformed data
46+
bc_lambda: The lambda parameter for the Box-Cox transformation
47+
48+
Returns:
49+
The inverse transformed data in the original scale
50+
"""
51+
52+
if bc_lambda <= 0:
53+
converted = np.exp([x])[0]
54+
else:
55+
converted = special.inv_boxcox([x], bc_lambda)[0]
56+
if min_val <= 0:
57+
return converted + min_val - 1
58+
else:
59+
return converted
60+
61+
def _box_cox_transform(
62+
self, x: npt.NDArray[np.float64]
63+
) -> Tuple[npt.NDArray[np.float64], float, float]:
64+
"""Apply Box-Cox transformation to the data.
65+
66+
Parameters:
67+
x: The data to be transformed
68+
69+
Returns:
70+
The Box-Cox transformed data
71+
"""
72+
# Get indices of nan values to restore them later
73+
nan_indices = np.isnan(x)
74+
nan_count = np.sum(nan_indices)
75+
x_clean = x[~nan_indices]
76+
77+
min_val = np.min(x_clean)
78+
79+
if min_val <= 0:
80+
x_positive = x_clean - min_val + 1
81+
else:
82+
x_positive = x_clean
83+
84+
# Don't transform if values are constant
85+
if np.all(x == x[0]):
86+
transformed = x
87+
bc_lambda = 0
88+
else:
89+
transformed, bc_lambda = stats.boxcox(x_positive)
90+
if bc_lambda <= 0:
91+
transformed = np.log(x_positive)
92+
93+
# Add nan values back to front of array
94+
if nan_count > 0:
95+
transformed = np.concatenate([np.full(nan_count, np.nan), transformed])
96+
97+
return transformed, bc_lambda, min_val
98+
99+
def _get_z_scores(
100+
self, values: npt.NDArray[np.float64], sensitivity: Sensitivities
101+
) -> Tuple[npt.NDArray[np.float64], float, float, float]:
102+
"""Calculate z-scores and threshold."""
103+
if sensitivity not in self.z_score_thresholds:
104+
raise ClientError(f"Invalid sensitivity: {sensitivity}")
105+
106+
# Get indices of nan values to restore them later
107+
nan_indices = np.isnan(values)
108+
values_no_nan = values[~nan_indices]
109+
110+
transformed, bc_lambda, min_val = self._box_cox_transform(values_no_nan)
111+
mean = float(np.mean(transformed))
112+
std = float(np.std(transformed))
113+
z_scores = (transformed - mean) / std if std > 0 else np.zeros_like(transformed)
114+
115+
threshold = self.z_score_thresholds[sensitivity]
116+
threshold_transformed = self._inverse_box_cox_transform(
117+
(threshold * std) + mean, bc_lambda, min_val
118+
)
119+
120+
# Add nans back in the same positions
121+
z_scores_with_nans = np.empty(len(values))
122+
z_scores_with_nans[~nan_indices] = z_scores
123+
z_scores_with_nans[nan_indices] = np.nan
124+
125+
return z_scores_with_nans, threshold, std, threshold_transformed
126+
127+
@inject
128+
def batch_score(
129+
self,
130+
values: npt.NDArray[np.float64],
131+
timestamps: npt.NDArray[np.float64],
132+
mp_dist: npt.NDArray[np.float64],
133+
ad_config: AnomalyDetectionConfig,
134+
window_size: int,
135+
time_budget_ms: int | None = None,
136+
algo_config: AlgoConfig = injected,
137+
location_detector: LocationDetector = injected,
138+
) -> FlagsAndScores:
139+
z_scores, threshold, std, threshold_transformed = self._get_z_scores(
140+
mp_dist, ad_config.sensitivity
141+
)
142+
scores = []
143+
flags = []
144+
thresholds = []
145+
time_allocated = datetime.timedelta(milliseconds=time_budget_ms) if time_budget_ms else None
146+
time_start = datetime.datetime.now()
147+
idx_to_detect_location_from = (
148+
len(mp_dist) - algo_config.direction_detection_num_timesteps_in_batch_mode
149+
)
150+
batch_size = 10 if len(mp_dist) > 10 else 1
151+
for i, score in enumerate(z_scores):
152+
if time_allocated is not None and i % batch_size == 0:
153+
time_elapsed = datetime.datetime.now() - time_start
154+
if time_allocated is not None and time_elapsed > time_allocated:
155+
sentry_sdk.set_extra("time_taken_for_batch_detection", time_elapsed)
156+
sentry_sdk.set_extra("time_allocated_for_batch_detection", time_allocated)
157+
sentry_sdk.capture_message(
158+
"batch_detection_took_too_long",
159+
level="error",
160+
)
161+
raise ServerError("Batch detection took too long")
162+
flag: AnomalyFlags = "none"
163+
location_thresholds: List[Threshold] = []
164+
165+
if std != 0 and score > threshold:
166+
flag = "anomaly_higher_confidence"
167+
if i >= idx_to_detect_location_from:
168+
flag, location_thresholds = self._adjust_flag_for_direction(
169+
flag,
170+
ad_config.direction,
171+
mp_dist[i],
172+
timestamps[i],
173+
mp_dist[:i],
174+
timestamps[:i],
175+
location_detector,
176+
)
177+
cur_thresholds = [
178+
Threshold(
179+
type=ThresholdType.BOX_COX_THRESHOLD,
180+
upper=threshold_transformed,
181+
lower=-threshold_transformed,
182+
)
183+
]
184+
185+
scores.append(score)
186+
flags.append(flag)
187+
cur_thresholds.extend(location_thresholds)
188+
thresholds.append(cur_thresholds)
189+
190+
return FlagsAndScores(flags=flags, scores=scores, thresholds=thresholds)
191+
192+
@inject
193+
def stream_score(
194+
self,
195+
streamed_value: np.float64,
196+
streamed_timestamp: np.float64,
197+
streamed_mp_dist: np.float64,
198+
history_values: npt.NDArray[np.float64],
199+
history_timestamps: npt.NDArray[np.float64],
200+
history_mp_dist: npt.NDArray[np.float64],
201+
ad_config: AnomalyDetectionConfig,
202+
window_size: int,
203+
algo_config: AlgoConfig = injected,
204+
location_detector: LocationDetector = injected,
205+
) -> FlagsAndScores:
206+
# Include current value in z-score calculation
207+
values = np.append(history_mp_dist, streamed_mp_dist)
208+
z_scores, threshold, std, threshold_transformed = self._get_z_scores(
209+
values, ad_config.sensitivity
210+
)
211+
212+
# Get z-score for streamed value
213+
score = z_scores[-1]
214+
215+
if std == 0 or score <= threshold:
216+
flag: AnomalyFlags = "none"
217+
thresholds: List[Threshold] = []
218+
else:
219+
flag, thresholds = self._adjust_flag_for_direction(
220+
"anomaly_higher_confidence",
221+
ad_config.direction,
222+
streamed_value,
223+
streamed_timestamp,
224+
history_values,
225+
history_timestamps,
226+
location_detector,
227+
)
228+
229+
thresholds.append(
230+
Threshold(
231+
type=ThresholdType.BOX_COX_THRESHOLD,
232+
upper=threshold_transformed,
233+
lower=-threshold_transformed,
234+
)
235+
)
236+
237+
return FlagsAndScores(
238+
flags=[flag],
239+
scores=[score],
240+
thresholds=[thresholds],
241+
)
242+
243+
def _adjust_flag_for_direction(
244+
self,
245+
flag: AnomalyFlags,
246+
direction: Directions,
247+
streamed_value: np.float64,
248+
streamed_timestamp: np.float64,
249+
history_values: npt.NDArray[np.float64],
250+
history_timestamps: npt.NDArray[np.float64],
251+
location_detector: LocationDetector,
252+
) -> Tuple[AnomalyFlags, List[Threshold]]:
253+
if flag == "none" or direction == "both":
254+
return flag, []
255+
256+
if len(history_values) == 0:
257+
raise ValueError("No history values to detect location")
258+
relative_location = location_detector.detect(
259+
streamed_value, streamed_timestamp, history_values, history_timestamps
260+
)
261+
if relative_location is None:
262+
return flag, []
263+
264+
if (direction == "up" and relative_location.location != PointLocation.UP) or (
265+
direction == "down" and relative_location.location != PointLocation.DOWN
266+
):
267+
return "none", relative_location.thresholds
268+
return flag, relative_location.thresholds
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
from typing import Optional
2+
3+
import numpy as np
4+
import numpy.typing as npt
5+
from pydantic import Field
6+
7+
from seer.anomaly_detection.detectors.location_detectors import LocationDetector
8+
from seer.anomaly_detection.detectors.mp_boxcox_scorer import MPBoxCoxScorer
9+
from seer.anomaly_detection.detectors.mp_scorers import FlagsAndScores, LowVarianceScorer, MPScorer
10+
from seer.anomaly_detection.models import AlgoConfig, AnomalyDetectionConfig
11+
from seer.dependency_injection import inject, injected
12+
13+
14+
class MPCascadingScorer(MPScorer):
15+
"""
16+
This class implements a cascading scoring mechanism for Matrix Profile-based anomaly detection.
17+
It applies multiple scorers in sequence, returning the result of the first scorer that produces a valid output.
18+
This approach allows for fallback strategies and potentially more robust anomaly detection.
19+
20+
The default implementation uses the LowVarianceScorer and the MPBoxCoxScorer.
21+
"""
22+
23+
scorers: list[MPScorer] = Field(
24+
[LowVarianceScorer(), MPBoxCoxScorer()], description="The list of scorers to cascade"
25+
)
26+
27+
@inject
28+
def batch_score(
29+
self,
30+
values: npt.NDArray[np.float64],
31+
timestamps: npt.NDArray[np.float64],
32+
mp_dist: npt.NDArray[np.float64],
33+
ad_config: AnomalyDetectionConfig,
34+
window_size: int,
35+
time_budget_ms: int | None = None,
36+
algo_config: AlgoConfig = injected,
37+
location_detector: LocationDetector = injected,
38+
) -> Optional[FlagsAndScores]:
39+
for scorer in self.scorers:
40+
flags_and_scores = scorer.batch_score(
41+
values,
42+
timestamps,
43+
mp_dist,
44+
ad_config,
45+
window_size,
46+
time_budget_ms,
47+
algo_config,
48+
location_detector,
49+
)
50+
if flags_and_scores is not None:
51+
return flags_and_scores
52+
return None
53+
54+
@inject
55+
def stream_score(
56+
self,
57+
streamed_value: np.float64,
58+
streamed_timestamp: np.float64,
59+
streamed_mp_dist: np.float64,
60+
history_values: npt.NDArray[np.float64],
61+
history_timestamps: npt.NDArray[np.float64],
62+
history_mp_dist: npt.NDArray[np.float64],
63+
ad_config: AnomalyDetectionConfig,
64+
window_size: int,
65+
algo_config: AlgoConfig = injected,
66+
location_detector: LocationDetector = injected,
67+
) -> Optional[FlagsAndScores]:
68+
for scorer in self.scorers:
69+
flags_and_scores = scorer.stream_score(
70+
streamed_value,
71+
streamed_timestamp,
72+
streamed_mp_dist,
73+
history_values,
74+
history_timestamps,
75+
history_mp_dist,
76+
ad_config,
77+
window_size,
78+
algo_config,
79+
location_detector,
80+
)
81+
if flags_and_scores is not None:
82+
return flags_and_scores
83+
return None

0 commit comments

Comments
 (0)