Skip to content

Commit e5dec07

Browse files
authored
Merge pull request #104 from amosproj/feature/iqr-anomaly-detection
Add IQR anomaly detection RTDIP component
2 parents ff448d6 + 9437736 commit e5dec07

File tree

8 files changed

+1514
-0
lines changed

8 files changed

+1514
-0
lines changed

.gitignore

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,3 +140,20 @@ config.share
140140

141141
# JetBrains
142142
.idea/
143+
144+
145+
146+
# Notebooks & outputs
147+
*.ipynb
148+
outputs/
149+
150+
# Local data
151+
amos_team_resources/shell/data/
152+
153+
# Python cache
154+
__pycache__/
155+
156+
157+
# Backup folders
158+
src/sdk/python/rtdip_sdk/pipelines/anomaly_detection/spark/iqr-backup/
159+

amos_team_resources/anomaly_detection/iqr/ad_shell_iqr.ipynb

Lines changed: 1360 additions & 0 deletions
Large diffs are not rendered by default.
64.3 KB
Loading
77.3 KB
Loading
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
from .iqr_anomaly_detection import IQRAnomalyDetectionComponent
2+
from .decomposition_iqr_anomaly_detection import (
3+
DecompositionIQRAnomalyDetectionComponent
4+
)
5+
6+
__all__ = [
7+
"IQRAnomalyDetectionComponent",
8+
"DecompositionIQRAnomalyDetectionComponent",
9+
]
10+
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
import pandas as pd
2+
3+
from .iqr_anomaly_detection import IQRAnomalyDetectionComponent
4+
from .interfaces import IQRAnomalyDetectionConfig
5+
6+
7+
class DecompositionIQRAnomalyDetectionComponent(
8+
IQRAnomalyDetectionComponent
9+
):
10+
"""
11+
IQR anomaly detection on decomposed time series.
12+
13+
Expected input columns:
14+
- residual (default)
15+
- trend
16+
- seasonal
17+
"""
18+
19+
def __init__(self, config: IQRAnomalyDetectionConfig):
20+
super().__init__(config)
21+
self.input_component: str = config.get("input_component", "residual")
22+
23+
def run(self, df: pd.DataFrame) -> pd.DataFrame:
24+
"""
25+
Run anomaly detection on a selected decomposition component.
26+
"""
27+
28+
if self.input_component not in df.columns:
29+
raise ValueError(
30+
f"Column '{self.input_component}' not found in input DataFrame"
31+
)
32+
33+
df = df.copy()
34+
df[self.value_column] = df[self.input_component]
35+
36+
return super().run(df)
37+
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
from typing import TypedDict, Optional
2+
3+
4+
class IQRAnomalyDetectionConfig(TypedDict, total=False):
5+
"""
6+
Configuration schema for IQR anomaly detection components.
7+
"""
8+
9+
# IQR sensitivity factor
10+
k: float
11+
12+
# Rolling window size (None = global IQR)
13+
window: Optional[int]
14+
15+
# Column names
16+
value_column: str
17+
time_column: str
18+
19+
# Used only for decomposition-based component
20+
input_component: str
21+
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
import pandas as pd
2+
from typing import Optional
3+
4+
from rtdip_sdk.pipelines.interfaces import PipelineComponent
5+
from .interfaces import IQRAnomalyDetectionConfig
6+
7+
8+
class IQRAnomalyDetectionComponent(PipelineComponent):
9+
"""
10+
RTDIP component implementing IQR-based anomaly detection.
11+
12+
Supports:
13+
- Global IQR (window=None)
14+
- Rolling IQR (window=int)
15+
"""
16+
17+
def __init__(self, config: IQRAnomalyDetectionConfig):
18+
self.k: float = config.get("k", 1.5)
19+
self.window: Optional[int] = config.get("window", None)
20+
21+
self.value_column: str = config.get("value_column", "value")
22+
self.time_column: str = config.get("time_column", "timestamp")
23+
24+
def run(self, df: pd.DataFrame) -> pd.DataFrame:
25+
"""
26+
Run IQR anomaly detection on a time series DataFrame.
27+
28+
Input:
29+
df with columns [time_column, value_column]
30+
31+
Output:
32+
df with additional column:
33+
- is_anomaly (bool)
34+
"""
35+
36+
if self.value_column not in df.columns:
37+
raise ValueError(
38+
f"Column '{self.value_column}' not found in input DataFrame"
39+
)
40+
41+
values = df[self.value_column]
42+
43+
# -----------------------
44+
# Global IQR
45+
# -----------------------
46+
if self.window is None:
47+
q1 = values.quantile(0.25)
48+
q3 = values.quantile(0.75)
49+
iqr = q3 - q1
50+
51+
lower = q1 - self.k * iqr
52+
upper = q3 + self.k * iqr
53+
54+
# -----------------------
55+
# Rolling IQR
56+
# -----------------------
57+
else:
58+
q1 = values.rolling(self.window).quantile(0.25)
59+
q3 = values.rolling(self.window).quantile(0.75)
60+
iqr = q3 - q1
61+
62+
lower = q1 - self.k * iqr
63+
upper = q3 + self.k * iqr
64+
65+
df = df.copy()
66+
df["is_anomaly"] = (values < lower) | (values > upper)
67+
68+
return df
69+

0 commit comments

Comments
 (0)