Skip to content

Commit ed6e43c

Browse files
Merge pull request #53 from petrobras/documentation_improvements
Incorporation of MAIS in the 3W toolkit: conclusion of the 1st phase.
2 parents 92567f8 + e2ec835 commit ed6e43c

10 files changed

Lines changed: 147 additions & 42 deletions

File tree

toolkit/mais/README.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,15 @@ The name of theses experiments reflect what they implements, for examples, the e
7373
* mixed = both statistical and wavelet features;
7474
* select = Feature selector; and
7575
* wavelets = Wavelets
76+
77+
For example, between experiments 4 and 6, the difference is the kind of features will be computed. In the first on the exponentially weighted statistical features are used and in the second one, just the wavelets. And to do that, the difference is basically assign the correspondent feature wrapper. The following image is the wrapper for the Experiment 4 of the examples list.
78+
79+
![Statistical Wrapper](images/README/stats.jpg "Statistical features wrapper")
80+
81+
And for experiment 6, we have:
82+
83+
![Wavelets Wrapper](images/README/wavelets.jpg "Wavelets features wrapper")
84+
7685
# How to use
7786

7887
After creating the experiment and putting it into the experiment folder (for example, 'experiments/multiclass/experiments/example.py',

toolkit/mais/experiments/multiclass/experiments/multi_stats_mrl_nonan.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
"""
77
import numpy as np
88

9+
from sklearn.metrics import accuracy_score, get_scorer
910
from sklearn.preprocessing import StandardScaler, LabelEncoder
1011
from sklearn.impute import SimpleImputer
1112
from sklearn.decomposition import PCA
@@ -58,6 +59,7 @@ def raw_transform(self, event, transient_only=True, no_nans=True):
5859
labels = event["labels"]
5960
event_type = event["event_type"]
6061

62+
# trim estabilished fault if has transient
6163
if transient_only and MAEDataset.TRANSIENT_CLASS[event_type]:
6264
transients = labels.values != event_type
6365
tags = tags[transients]

toolkit/mais/experiments/multiclass/experiments/multi_wavelets_mrl_nonan.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ def raw_transform(self, event, transient_only=True, no_nans=True):
6060
labels = event["labels"]
6161
event_type = event["event_type"]
6262

63+
# trim estabilished fault if has transient
6364
if transient_only and MAEDataset.TRANSIENT_CLASS[event_type]:
6465
transients = labels.values != event_type
6566
tags = tags[transients]
29.5 KB
Loading
27.7 KB
Loading

toolkit/mais/mais/data/dataset.py

Lines changed: 38 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ class MAEDataset:
3636
3737
- **n_jobs: INT** -- number of processes to use
3838
39-
-- **events**
39+
- **events**
4040
4141
* Important fields:
4242
@@ -57,17 +57,17 @@ class MAEDataset:
5757
5858
- **_make_set()**
5959
60-
- **process(fname, event_type)**
60+
- **process(fname, event_type)**
6161
6262
"""
6363

64-
# tag corresponding to instance label
64+
# Tag corresponding to instance label
6565
LABEL_NAME = "class"
6666

67-
# tag corresponding to index
67+
# Tag corresponding to index
6868
INDEX_NAME = "timestamp"
6969

70-
# fault description
70+
# Fault description
7171
CLASS_NAMES = {
7272
0: "NORMAL",
7373
1: "ABRUPT_INCREASE_OF_BSW",
@@ -80,10 +80,10 @@ class MAEDataset:
8080
8: "HYDRATE_IN_PRODUCTION_LINE",
8181
}
8282

83-
# list of used classes
83+
# List of used classes
8484
KNOWN_CLASSES = list(CLASS_NAMES.keys())
8585

86-
# transient properties of events
86+
# Transient properties of events
8787
TRANSIENT_CLASS = {
8888
0: False,
8989
1: True,
@@ -118,7 +118,9 @@ def __init__(
118118
feature_mapper=tuple, # transformer from event to features
119119
n_jobs=-1,
120120
):
121-
"""Load and process dataset using supplied strategies"""
121+
"""
122+
Load and process dataset using supplied strategies.
123+
"""
122124

123125
# save parameters
124126
self.root_dir = root_dir
@@ -127,14 +129,16 @@ def __init__(
127129
self.n_jobs = n_jobs
128130
self.feature_mapper = feature_mapper
129131

130-
# call the heavy load _make_set passing the (maybe Null) events
132+
# Call the heavy load _make_set passing the (maybe Null) events
131133
self._make_set(events)
132134

133135
def _instance_type(fname):
134-
"""Detects if instance type is selected
136+
"""
137+
Detects if instance type is selected.
135138
136139
* Parameters:
137140
- **fname**: STRING - name of the instance file
141+
138142
* Returns:
139143
- **STRING** - string representing the instance type of the input file name
140144
@@ -147,9 +151,26 @@ def _instance_type(fname):
147151
return "real"
148152

149153
def load_events(data_root, n_jobs=-1):
150-
"""scan data_root for raw files and return dict. useful for preloads"""
154+
"""
155+
Scan data_root for raw files and return dict. useful for preloads.
156+
157+
* Parameters:
158+
- **data_root: STRING** - base location of events separated by event type
159+
160+
* Returns:
161+
- **events**: [LIST] - Optional list of preloaded events
162+
"""
151163

152164
def _read(tgt, fname):
165+
"""
166+
Return a dict with the summary of a target.
167+
168+
* Parameters:
169+
- **tgt: STRING** - Target location
170+
171+
* Returns:
172+
- **fname**: STRING - Filename
173+
"""
153174
df = pandas.read_csv(
154175
fname,
155176
index_col=MAEDataset.INDEX_NAME,
@@ -179,7 +200,9 @@ def _read(tgt, fname):
179200
def transform_events(
180201
events, raw_mapper, tgt_events=None, instance_types=None, n_jobs=-1
181202
):
182-
"""apply raw_mapper to list of events, filtering by target events and instance types"""
203+
"""
204+
Apply raw_mapper to list of events, filtering by target events and instance types
205+
"""
183206
if tgt_events is not None:
184207
events = [e for e in events if (e["event_type"] in tgt_events)]
185208
if instance_types is not None:
@@ -205,9 +228,9 @@ def gather(transformed_events):
205228
return Dataset(X=X, y=y, g=g, g_class=g_class)
206229

207230
def _make_set(self, events=None):
208-
"""Loads all instances of target classes from the desired types,
209-
transforming the raw data to obtain its features by calling the
210-
*feature_mapper()* method for each instance.
231+
"""
232+
Loads all instances of target classes from the desired types, transforming the raw data to obtain its
233+
features by calling the *feature_mapper()* method for each instance.
211234
212235
* Parameters:
213236
- **events**: [LIST] - Optional list of preloaded events

toolkit/mais/mais/data/feature_mappers.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,10 @@
99

1010

1111
class StatisticalFeatureMapper:
12-
"""generates statistical descriptor for a window of our data"""
12+
"""
13+
Generates statistical descriptor for a window of our data
14+
15+
"""
1316

1417
FEATURES = {
1518
"mean": lambda x: x.mean(),
@@ -48,7 +51,9 @@ def __call__(self, tags, event_type=None):
4851

4952

5053
class TorchStatisticalFeatureMapper:
51-
"""PyTorch implementation of the statistical feature mapper"""
54+
"""
55+
PyTorch implementation of the statistical feature mapper
56+
"""
5257

5358
FEATURES = ["mean", "std", "skew", "kurt", "min", "1qrt", "med", "3qrt", "max"]
5459

@@ -250,7 +255,10 @@ def __call__(self, tags, event_type=None):
250255

251256

252257
class MixedMapper:
253-
"""join features of multiple mappers. Feature sizes must be consistent"""
258+
"""
259+
Join features of multiple mappers. Feature sizes must be consistent.
260+
261+
"""
254262

255263
def __init__(self, *args):
256264
self.mappers = args

toolkit/mais/mais/data/label_mappers.py

Lines changed: 53 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
""" Strategies for deciding on label for a given region of data, using pandas or torch backends """
2-
31
import pandas as pd
42
import numpy as np
53
import scipy.stats as sp
@@ -9,8 +7,25 @@
97

108

119
class RollingLabelStrategy:
12-
"""Base class that just wraps applications of apply,
13-
leverages pandas' Rolling function"""
10+
"""
11+
Base class that just wraps applications of apply,
12+
leverages pandas' Rolling function
13+
14+
15+
* Constructor arguments:
16+
- **window_size: INT** -- Size of sliding window
17+
18+
- **stride: INT** -- Number of samples between consecutive windows
19+
20+
- **offset: INT** -- Control how much to offset each window
21+
22+
* Methods:
23+
24+
- **apply(y, event_type)**
25+
26+
- **__call__(labels, event_type)**
27+
28+
"""
1429

1530
def __init__(self, window_size, stride=1, offset=0):
1631
self.window_size = window_size
@@ -29,26 +44,36 @@ def f(y):
2944

3045

3146
class BinaryMCLStrategy(RollingLabelStrategy):
32-
"""Window label gets assigned to most common value,
33-
mapping transients and faults of ALL classes to true"""
47+
"""
48+
Window label gets assigned to most common value,
49+
mapping transients and faults of ALL classes to true
50+
"""
3451

3552
def apply(self, y, event_type=None):
36-
"""map all fault types to True and apply mode over window"""
53+
"""
54+
Map all fault types to True and apply mode over window
55+
"""
3756
return sp.mode(y > 0)[0]
3857

3958

4059
class MulticlassMCLStrategy(RollingLabelStrategy):
41-
"""Window label gets assigned to most common value,
42-
mapping transients and faults to the CORRESPONDING CLASS CODE"""
60+
"""
61+
Window label gets assigned to most common value,
62+
mapping transients and faults to the CORRESPONDING CLASS CODE
63+
"""
4364

4465
def apply(self, y, event_type=None):
45-
"""map transient codes to fault codes and apply mode over window"""
66+
"""
67+
Map transient codes to fault codes and apply mode over window
68+
"""
4669
return sp.mode(y % 100)[0]
4770

4871

4972
class OVAMCLStrategy(RollingLabelStrategy):
50-
"""Window label gets assigned to most common value,
51-
mapping transients and faults of SPECIFIC CLASS to true"""
73+
"""
74+
Window label gets assigned to most common value,
75+
mapping transients and faults of SPECIFIC CLASS to true
76+
"""
5277

5378
def __init__(self, fault_code, *args, **kwargs):
5479
super().__init__(*args, **kwargs)
@@ -59,8 +84,10 @@ def apply(self, y, event_type=None):
5984

6085

6186
class TorchLabelStrategy:
62-
"""Base class that just wraps applications of apply,
63-
leverages pytorch unfold function"""
87+
"""
88+
Base class that just wraps applications of apply,
89+
leverages pytorch unfold function
90+
"""
6491

6592
def __init__(self, window_size, stride=1, offset=0):
6693
self.window_size = window_size
@@ -99,21 +126,27 @@ def __call__(self, labels, event_type):
99126

100127

101128
class TorchBinaryMCLStrategy(TorchLabelStrategy):
102-
"""any fault indicator, most common label"""
129+
"""
130+
Any fault indicator, most common label
131+
"""
103132

104133
def apply(self, y, event_type=None):
105134
return torch.mode(y, dim=-1)[0] > 0
106135

107136

108137
class TorchBinaryMRLStrategy(TorchLabelStrategy):
109-
"""any fault indicator, most recent label"""
138+
"""
139+
Any fault indicator, most recent label
140+
"""
110141

111142
def apply(self, y, event_type=None):
112143
return y[:, -1] > 0
113144

114145

115146
class TorchOVAMCLStrategy(TorchLabelStrategy):
116-
"""specific class indicator, most common label"""
147+
"""
148+
Specific class indicator, most common label
149+
"""
117150

118151
def __init__(self, fault_code, *args, **kwargs):
119152
super().__init__(*args, **kwargs)
@@ -124,7 +157,9 @@ def apply(self, y, event_type=None):
124157

125158

126159
class TorchOVATransientMCLStrategy(TorchLabelStrategy):
127-
"""transients of specific class, most common label"""
160+
"""
161+
Transients of specific class, most common label
162+
"""
128163

129164
def __init__(self, fault_code, *args, **kwargs):
130165
super().__init__(*args, **kwargs)

toolkit/mais/mais/data/utils.py

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,37 @@
33

44

55
class StratifiedGroupKFold(BaseCrossValidator):
6-
"""GroupKFold with stratification based on the event type"""
6+
"""
7+
GroupKFold with stratification based on the event type.
8+
9+
* Constructor arguments:
10+
- **n_splits: int** -- Number of folds
11+
12+
* Methods:
13+
14+
- **split()**
15+
16+
- **get_n_splits()**
17+
"""
718

819
def __init__(self, n_splits, event_types):
920
self.base_splitter = StratifiedKFold(n_splits)
1021
self.event_types = event_types
1122

1223
def split(self, X, y, groups):
24+
"""
25+
Create the splits.
26+
27+
* Parameters:
28+
- **X: np.ndarray** - Data
29+
30+
- **y: np.array** - Labels
31+
32+
- **groups: np.array** - Groups
33+
34+
* Yields:
35+
- **splits**: [TUPLE] - Tuple with the index of training and test samples for each fold.
36+
"""
1337
unique_g = np.unique(groups)
1438
event_y = np.array([self.event_types[_] for _ in unique_g])
1539
indices = np.arange(groups.size)

0 commit comments

Comments
 (0)