Skip to content

Commit 428b39a

Browse files
committed
added matplot and plotly visualization; fixed import bug in mad ad
Signed-off-by: Christoph Huy <christoph.huy@campus.tu-berlin.de>
1 parent 1f5ca9b commit 428b39a

File tree

8 files changed

+5032
-0
lines changed

8 files changed

+5032
-0
lines changed

amos_team_resources/anomaly_detection/mad/anomaly_detection_plot.html

Lines changed: 3888 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 9,
6+
"id": "5682a007",
7+
"metadata": {},
8+
"outputs": [
9+
{
10+
"name": "stdout",
11+
"output_type": "stream",
12+
"text": [
13+
"Imports complete\n"
14+
]
15+
}
16+
],
17+
"source": [
18+
"import pandas as pd\n",
19+
"import numpy as np\n",
20+
"import matplotlib.pyplot as plt\n",
21+
"import sys\n",
22+
"import json\n",
23+
"from pathlib import Path\n",
24+
"\n",
25+
"# Add SDK to path\n",
26+
"SDK_PATH = Path().resolve().parents[2] / \"src\" / \"sdk\" / \"python\"\n",
27+
"sys.path.insert(0, str(SDK_PATH))\n",
28+
"\n",
29+
"from rtdip_sdk.pipelines.visualization.plotly.anomaly_detection import AnomalyDetectionPlotly\n",
30+
"from rtdip_sdk.pipelines.visualization.matplotlib.anomaly_detection import AnomalyDetectionPlot\n",
31+
"\n",
32+
"from rtdip_sdk.pipelines.anomaly_detection.spark.mad.mad_anomaly_detection import MadAnomalyDetection\n",
33+
"\n",
34+
"print(\"Imports complete\")"
35+
]
36+
},
37+
{
38+
"cell_type": "code",
39+
"execution_count": 22,
40+
"id": "9a3db7c3",
41+
"metadata": {},
42+
"outputs": [],
43+
"source": [
44+
"from pyspark.sql import SparkSession\n",
45+
"from datetime import datetime, timedelta\n",
46+
"\n",
47+
"session = SparkSession.builder.appName(\"PlotlyDemoAD\").getOrCreate()\n",
48+
"\n",
49+
"\n",
50+
"base = datetime(2024, 1, 1)\n",
51+
"\n",
52+
"data = [\n",
53+
" (base + timedelta(seconds=i), v)\n",
54+
" for i, v in enumerate(\n",
55+
" [10.0, 12.0, 10.5, 11.0, 30.0, 10.2, 9.8, 10.1, 10.3, 10.0]\n",
56+
" )\n",
57+
"]\n",
58+
"columns = [\"timestamp\", \"value\"]\n",
59+
"\n",
60+
"df = session.createDataFrame(data, columns)"
61+
]
62+
},
63+
{
64+
"cell_type": "code",
65+
"execution_count": 23,
66+
"id": "f290695a",
67+
"metadata": {},
68+
"outputs": [],
69+
"source": [
70+
"ad_scorer = MadAnomalyDetection()\n",
71+
"\n",
72+
"anoms = ad_scorer.detect(df)"
73+
]
74+
},
75+
{
76+
"cell_type": "code",
77+
"execution_count": 24,
78+
"id": "abca5439",
79+
"metadata": {},
80+
"outputs": [
81+
{
82+
"data": {
83+
"text/plain": [
84+
"PosixPath('anomaly_detection_plot.html')"
85+
]
86+
},
87+
"execution_count": 24,
88+
"metadata": {},
89+
"output_type": "execute_result"
90+
}
91+
],
92+
"source": [
93+
"plotly_visualizer = AnomalyDetectionPlotly(ts_data=df, ad_data=anoms, sensor_id=\"Plotly_Demo\")\n",
94+
"plotly_visualizer.plot()\n",
95+
"plotly_visualizer.save(\"anomaly_detection_plot.html\")"
96+
]
97+
}
98+
],
99+
"metadata": {
100+
"kernelspec": {
101+
"display_name": "rtdip-sdk",
102+
"language": "python",
103+
"name": "python3"
104+
},
105+
"language_info": {
106+
"codemirror_mode": {
107+
"name": "ipython",
108+
"version": 3
109+
},
110+
"file_extension": ".py",
111+
"mimetype": "text/x-python",
112+
"name": "python",
113+
"nbconvert_exporter": "python",
114+
"pygments_lexer": "ipython3",
115+
"version": "3.12.12"
116+
}
117+
},
118+
"nbformat": 4,
119+
"nbformat_minor": 5
120+
}
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
## Time Series Anomaly Visualization – Guideline
2+
3+
This visualization is used consistently across the project to answer the
4+
following standard questions for time series anomaly analysis.
5+
6+
### Standard Questions
7+
8+
1. Where in the time series do unusual deviations from typical signal behavior occur?
9+
10+
2. Do highlighted deviations appear as isolated events or as contiguous time segments?
11+
12+
3. How do highlighted deviations relate to the local signal shape
13+
(e.g., extremes, transitions, plateaus, or abrupt level changes)?
14+
15+
4. What are the exact timestamp and value of a detected anomaly when closer inspection is required?
16+
17+
### Visualization Constraints
18+
19+
- A single continuous line plot is used to show the full time series context.
20+
- Highlighted markers are overlaid directly on the signal.
21+
- No additional subplots, thresholds, or secondary axes are introduced.
22+
- The same visual encoding is used across different detection methods.
23+
- Interactive features (e.g., hover tooltips) are optional and may be used
24+
to support precise inspection of individual anomaly points without altering
25+
the overall visual structure.
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# Copyright 2025 RTDIP
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
Lines changed: 234 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,234 @@
1+
# Copyright 2025 RTDIP
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from pathlib import Path
16+
from typing import Optional, Union
17+
18+
import matplotlib.pyplot as plt
19+
from matplotlib.figure import Figure, SubFigure
20+
from matplotlib.axes import Axes
21+
22+
import pandas as pd
23+
from pyspark.sql import DataFrame as SparkDataFrame
24+
25+
from ..interfaces import MatplotlibVisualizationInterface
26+
27+
28+
class AnomalyDetectionPlot(MatplotlibVisualizationInterface):
29+
"""
30+
Plot time series data with detected anomalies highlighted.
31+
32+
This component visualizes the original time series data alongside detected
33+
anomalies, making it easy to identify and analyze outliers. Internally converts
34+
PySpark DataFrames to Pandas for visualization.
35+
36+
Parameters:
37+
ts_data (SparkDataFrame): Time series data with 'timestamp' and 'value' columns
38+
ad_data (SparkDataFrame): Anomaly detection results with 'timestamp' and 'value' columns
39+
sensor_id (str, optional): Sensor identifier for the plot title
40+
title (str, optional): Custom plot title
41+
figsize (tuple, optional): Figure size as (width, height). Defaults to (18, 6)
42+
linewidth (float, optional): Line width for time series. Defaults to 1.6
43+
anomaly_marker_size (int, optional): Marker size for anomalies. Defaults to 70
44+
anomaly_color (str, optional): Color for anomaly markers. Defaults to 'red'
45+
ts_color (str, optional): Color for time series line. Defaults to 'steelblue'
46+
47+
Example:
48+
```python
49+
from rtdip_sdk.pipelines.visualization.matplotlib.anomaly_detection import AnomalyDetectionPlot
50+
51+
plot = AnomalyDetectionPlot(
52+
ts_data=df_full_spark,
53+
ad_data=df_anomalies_spark,
54+
sensor_id='SENSOR_001'
55+
)
56+
57+
fig = plot.plot()
58+
plot.save('anomalies.png')
59+
```
60+
"""
61+
62+
def __init__(
63+
self,
64+
ts_data: SparkDataFrame,
65+
ad_data: SparkDataFrame,
66+
sensor_id: Optional[str] = None,
67+
title: Optional[str] = None,
68+
figsize: tuple = (18, 6),
69+
linewidth: float = 1.6,
70+
anomaly_marker_size: int = 70,
71+
anomaly_color: str = "red",
72+
ts_color: str = "steelblue",
73+
ax: Optional[Axes] = None,
74+
) -> None:
75+
"""
76+
Initialize the AnomalyDetectionPlot component.
77+
78+
Args:
79+
ts_data: PySpark DataFrame with 'timestamp' and 'value' columns
80+
ad_data: PySpark DataFrame with 'timestamp' and 'value' columns
81+
sensor_id: Optional sensor identifier
82+
title: Optional custom title
83+
figsize: Figure size tuple
84+
linewidth: Line width for the time series
85+
anomaly_marker_size: Size of anomaly markers
86+
anomaly_color: Color for anomaly points
87+
ts_color: Color for time series line
88+
ax: Optional existing matplotlib axis to plot on
89+
"""
90+
super().__init__()
91+
92+
# Convert PySpark DataFrames to Pandas
93+
self.ts_data = ts_data.toPandas()
94+
self.ad_data = ad_data.toPandas() if ad_data is not None else None
95+
96+
self.sensor_id = sensor_id
97+
self.title = title
98+
self.figsize = figsize
99+
self.linewidth = linewidth
100+
self.anomaly_marker_size = anomaly_marker_size
101+
self.anomaly_color = anomaly_color
102+
self.ts_color = ts_color
103+
self.ax = ax
104+
105+
self._fig: Optional[Figure | SubFigure] = None
106+
self._validate_data()
107+
108+
def _validate_data(self) -> None:
109+
"""Validate that required columns exist in DataFrames."""
110+
required_cols = {"timestamp", "value"}
111+
112+
if not required_cols.issubset(self.ts_data.columns):
113+
raise ValueError(
114+
f"ts_data must contain columns {required_cols}. "
115+
f"Got: {set(self.ts_data.columns)}"
116+
)
117+
118+
# Ensure timestamp is datetime
119+
if not pd.api.types.is_datetime64_any_dtype(self.ts_data["timestamp"]):
120+
self.ts_data["timestamp"] = pd.to_datetime(self.ts_data["timestamp"])
121+
122+
# Ensure value is numeric
123+
if not pd.api.types.is_numeric_dtype(self.ts_data["value"]):
124+
self.ts_data["value"] = pd.to_numeric(
125+
self.ts_data["value"], errors="coerce"
126+
)
127+
128+
if self.ad_data is not None and len(self.ad_data) > 0:
129+
if not required_cols.issubset(self.ad_data.columns):
130+
raise ValueError(
131+
f"ad_data must contain columns {required_cols}. "
132+
f"Got: {set(self.ad_data.columns)}"
133+
)
134+
135+
# Convert ad_data timestamp
136+
if not pd.api.types.is_datetime64_any_dtype(self.ad_data["timestamp"]):
137+
self.ad_data["timestamp"] = pd.to_datetime(self.ad_data["timestamp"])
138+
139+
# Convert ad_data value
140+
if not pd.api.types.is_numeric_dtype(self.ad_data["value"]):
141+
self.ad_data["value"] = pd.to_numeric(
142+
self.ad_data["value"], errors="coerce"
143+
)
144+
145+
def plot(self, ax: Optional[Axes] = None) -> Figure | SubFigure:
146+
"""
147+
Generate the anomaly detection visualization.
148+
149+
Args:
150+
ax: Optional matplotlib axis to plot on. If None, creates new figure.
151+
152+
Returns:
153+
matplotlib.figure.Figure: The generated figure
154+
"""
155+
# Use provided ax or instance ax
156+
use_ax = ax if ax is not None else self.ax
157+
158+
if use_ax is None:
159+
self._fig, use_ax = plt.subplots(figsize=self.figsize)
160+
else:
161+
self._fig = use_ax.figure
162+
163+
# Sort data by timestamp
164+
ts_sorted = self.ts_data.sort_values("timestamp")
165+
166+
# Plot time series line
167+
use_ax.plot(
168+
ts_sorted["timestamp"],
169+
ts_sorted["value"],
170+
label="value",
171+
color=self.ts_color,
172+
linewidth=self.linewidth,
173+
)
174+
175+
# Plot anomalies if available
176+
if self.ad_data is not None and len(self.ad_data) > 0:
177+
ad_sorted = self.ad_data.sort_values("timestamp")
178+
use_ax.scatter(
179+
ad_sorted["timestamp"],
180+
ad_sorted["value"],
181+
color=self.anomaly_color,
182+
s=self.anomaly_marker_size,
183+
label="anomaly",
184+
zorder=5,
185+
)
186+
187+
# Set title
188+
if self.title:
189+
title = self.title
190+
elif self.sensor_id:
191+
n_anomalies = len(self.ad_data) if self.ad_data is not None else 0
192+
title = f"Sensor {self.sensor_id} - Anomalies: {n_anomalies}"
193+
else:
194+
n_anomalies = len(self.ad_data) if self.ad_data is not None else 0
195+
title = f"Anomaly Detection Results - Anomalies: {n_anomalies}"
196+
197+
use_ax.set_title(title, fontsize=14)
198+
use_ax.set_xlabel("timestamp")
199+
use_ax.set_ylabel("value")
200+
use_ax.legend()
201+
use_ax.grid(True, alpha=0.3)
202+
203+
if isinstance(self._fig, Figure):
204+
self._fig.tight_layout()
205+
206+
return self._fig
207+
208+
def save(
209+
self,
210+
filepath: Union[str, Path],
211+
dpi: int = 150,
212+
**kwargs,
213+
) -> Path:
214+
"""
215+
Save the visualization to file.
216+
217+
Args:
218+
filepath: Output file path
219+
dpi: Dots per inch. Defaults to 150
220+
**kwargs: Additional arguments passed to savefig
221+
222+
Returns:
223+
Path: The path to the saved file
224+
"""
225+
226+
assert self._fig is not None, "Plot the figure before saving."
227+
228+
filepath = Path(filepath)
229+
filepath.parent.mkdir(parents=True, exist_ok=True)
230+
231+
if isinstance(self._fig, Figure):
232+
self._fig.savefig(filepath, dpi=dpi, **kwargs)
233+
234+
return filepath

0 commit comments

Comments
 (0)