Skip to content

Commit 92e4969

Browse files
authored
Merge pull request #5 from jonaswa11/main
Update Code with some minor fixes. Tested on Windows and Linux environments.
2 parents e4b921b + 9c7e582 commit 92e4969

15 files changed

+46
-173
lines changed
Binary file not shown.
-8.33 KB
Binary file not shown.

pyproject.toml

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[project]
22
name = "automotive_feature-engineering"
33
version = "0.1.0"
4-
requires-python = ">=3.10.11"
4+
requires-python = ">=3.10"
55
description = "A Python package designed to automate the feature engineering process for large in-car communication datasets within the automotive industry."
66
authors = [
77
{ name = "Your Name", email = "[email protected]" },
@@ -117,15 +117,13 @@ dependencies = [
117117
"tensorboardX==2.6.2.2",
118118
"tensorflow==2.13.0",
119119
"tensorflow-estimator==2.13.0",
120-
"tensorflow-io-gcs-filesystem==0.37.1",
121120
"termcolor==2.4.0",
122121
"threadpoolctl==3.5.0",
123122
"tifffile==2024.7.2",
124123
"typer==0.12.3",
125124
"typing_extensions==4.5.0",
126125
"tzdata==2024.1",
127126
"urllib3==2.2.2",
128-
"virtualenv==20.26.3",
129127
"Werkzeug==3.0.3",
130128
"wrapt==1.14.1"
131129
]
@@ -140,3 +138,5 @@ rl = "automotive_featureengineering:rl"
140138
static = "automotive_featureengineering:static"
141139
manual = "automotive_featureengineering:manual"
142140

141+
[tool.setuptools.package-data]
142+
"automotive_feature_engineering" = ["reinforcement_learning/*.json"]

requirements.txt

-2
Original file line numberDiff line numberDiff line change
@@ -98,14 +98,12 @@ tensorboard-data-server==0.7.2
9898
tensorboardX==2.6.2.2
9999
tensorflow==2.13.0
100100
tensorflow-estimator==2.13.0
101-
tensorflow-io-gcs-filesystem==0.37.1
102101
termcolor==2.4.0
103102
threadpoolctl==3.5.0
104103
tifffile==2024.7.2
105104
typer==0.12.3
106105
typing_extensions==4.5.0
107106
tzdata==2024.1
108107
urllib3==2.2.2
109-
virtualenv==20.26.3
110108
Werkzeug==3.0.3
111109
wrapt==1.14.1

src/automotive_feature_engineering/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# SPDX-FileCopyrightText: Copyright (c) 2024 MBition GmbH.
22
# SPDX-License-Identifier: MIT
3-
from src.automotive_feature_engineering.main_feature_engineering import (
3+
from automotive_feature_engineering.main_feature_engineering import (
44
FeatureEngineering,
55
)
66

src/automotive_feature_engineering/feature_encoding.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,12 @@
33
import os
44
import re
55
from typing import List, Tuple
6-
import seaborn as sns
76
import matplotlib.pyplot as plt
87
import numpy as np
98
import pandas as pd
109

1110
from sklearn.preprocessing import OneHotEncoder
12-
from src.automotive_feature_engineering.utils.utils import combine_dfs
11+
from automotive_feature_engineering.utils.utils import combine_dfs
1312

1413

1514
class FeatureEncoding:

src/automotive_feature_engineering/feature_interactions.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,10 @@
33
import os
44
import re
55
from typing import List, Tuple
6-
import eli5
7-
import seaborn as sns
8-
import matplotlib.pyplot as plt
96
import numpy as np
107
import pandas as pd
118

12-
from src.automotive_feature_engineering.utils.utils import combine_dfs, get_feature_df
9+
from automotive_feature_engineering.utils.utils import combine_dfs, get_feature_df
1310
from sklearn.preprocessing import PolynomialFeatures
1411

1512

src/automotive_feature_engineering/feature_scaling.py

-3
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,10 @@
33
import os
44
import re
55
from typing import List, Tuple
6-
import eli5
7-
import seaborn as sns
86
import matplotlib.pyplot as plt
97
import numpy as np
108
import pandas as pd
119
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
12-
from sklearn import preprocessing
1310

1411

1512
class FeatureScaling:

src/automotive_feature_engineering/feature_selection.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,7 @@
1010
import matplotlib.pyplot as plt
1111
import numpy as np
1212
import pandas as pd
13-
import multiprocessing as mp
14-
import src.automotive_feature_engineering.utils.utils as utils
13+
import automotive_feature_engineering.utils.utils as utils
1514

1615
from sklearn.ensemble import (
1716
RandomForestRegressor,
@@ -21,7 +20,7 @@
2120

2221
from eli5.sklearn import PermutationImportance
2322
from sklearn.inspection import permutation_importance
24-
from src.automotive_feature_engineering.utils.utils import split_df, combine_dfs
23+
from automotive_feature_engineering.utils.utils import split_df, combine_dfs
2524

2625

2726
class FeatureSelection:
@@ -427,7 +426,6 @@ def calc_globalFeatureImportance(
427426
)
428427
print(config_dict)
429428
regr = RandomForestRegressor(**config_dict)
430-
# print(feature_df.head, target_df.head)
431429
regr.fit(feature_df, target_df)
432430
print(
433431
"---Global Feature Importance calculated for RandomForestRegressor---"
@@ -804,6 +802,7 @@ def permImportance(
804802
)
805803
X = df_train_features
806804
y = df_train_target
805+
807806
regr = RandomForestRegressor(**config_dict).fit(X, y)
808807
perm = PermutationImportance(
809808
regr, random_state=config_dict.get("random_state")

src/automotive_feature_engineering/main_feature_engineering.py

+7-8
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,17 @@
33
import os
44
import re
55
from typing import List, Tuple
6-
import seaborn as sns
76
import matplotlib.pyplot as plt
87
import numpy as np
98
import pandas as pd
109

11-
from src.automotive_feature_engineering.feature_extraction import FeatureExtraction
12-
from src.automotive_feature_engineering.feature_encoding import FeatureEncoding
13-
from src.automotive_feature_engineering.feature_selection import FeatureSelection
14-
from src.automotive_feature_engineering.feature_scaling import FeatureScaling
15-
from src.automotive_feature_engineering.feature_interactions import FeatureInteractions
16-
from src.automotive_feature_engineering.sna_handling import SnaHandling
17-
from src.automotive_feature_engineering.utils import combine_dfs, get_feature_df
10+
from automotive_feature_engineering.feature_extraction import FeatureExtraction
11+
from automotive_feature_engineering.feature_encoding import FeatureEncoding
12+
from automotive_feature_engineering.feature_selection import FeatureSelection
13+
from automotive_feature_engineering.feature_scaling import FeatureScaling
14+
from automotive_feature_engineering.feature_interactions import FeatureInteractions
15+
from automotive_feature_engineering.sna_handling import SnaHandling
16+
from automotive_feature_engineering.utils import combine_dfs, get_feature_df
1817
from joblib import dump, load
1918

2019
from timeit import default_timer as timer

src/automotive_feature_engineering/outlier_detection.py

+1-5
Original file line numberDiff line numberDiff line change
@@ -3,19 +3,15 @@
33
import os
44
import re
55
from typing import List, Tuple
6-
import eli5
7-
import seaborn as sns
86
import matplotlib.pyplot as plt
97
import numpy as np
108
import pandas as pd
11-
12-
from pandas import read_csv
139
from sklearn.ensemble import IsolationForest
1410
from sklearn.neighbors import LocalOutlierFactor
1511

1612
from numpy import percentile
1713
import multiprocessing as mp
18-
from data.utils import split_df, combine_dfs
14+
from utils.utils import split_df, combine_dfs
1915

2016

2117
class OutlierDetection:

src/automotive_feature_engineering/reinforcement_learning/rl_environment_ss.py

+16-65
Original file line numberDiff line numberDiff line change
@@ -4,37 +4,27 @@
44
import os
55
from gymnasium import spaces
66
import numpy as np
7-
from gymnasium import Env
8-
from gymnasium.spaces import Discrete, Box, Dict, Tuple, MultiBinary, MultiDiscrete
9-
import os, subprocess, time, signal, random
7+
import os
108
import pandas as pd
119
import logging
12-
import json
13-
from typing import Optional, Union
10+
from typing import Optional
1411
import math
1512
import pathlib
16-
import random
17-
from src.automotive_feature_engineering.sna_handling import SnaHandling
18-
from src.automotive_feature_engineering.feature_extraction import FeatureExtraction
19-
from src.automotive_feature_engineering.feature_encoding import FeatureEncoding
20-
from src.automotive_feature_engineering.feature_selection import FeatureSelection
21-
from src.automotive_feature_engineering.feature_scaling import FeatureScaling
22-
from src.automotive_feature_engineering.feature_interactions import FeatureInteractions
23-
from src.automotive_feature_engineering.main_feature_engineering import (
24-
FeatureEngineering,
25-
)
26-
from src.automotive_feature_engineering.utils.utils import combine_dfs, get_feature_df
13+
from automotive_feature_engineering.sna_handling import SnaHandling
14+
from automotive_feature_engineering.feature_extraction import FeatureExtraction
15+
from automotive_feature_engineering.feature_encoding import FeatureEncoding
16+
from automotive_feature_engineering.feature_selection import FeatureSelection
17+
from automotive_feature_engineering.feature_scaling import FeatureScaling
18+
from automotive_feature_engineering.feature_interactions import FeatureInteractions
19+
20+
from automotive_feature_engineering.utils.utils import get_feature_df
2721
from sklearn.model_selection import train_test_split
28-
import ray
29-
import src.automotive_feature_engineering.utils.utils as utils
22+
import automotive_feature_engineering.utils.utils as utils
3023

3124
# from ray.rllib import agents
3225
from ray.rllib.utils import try_import_tf
33-
from ray.rllib.examples.models.action_mask_model import (
34-
ActionMaskModel,
35-
TorchActionMaskModel,
36-
)
37-
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
26+
27+
from sklearn.metrics import r2_score
3828
from sklearn.ensemble import RandomForestRegressor
3929

4030
tf = try_import_tf()
@@ -49,7 +39,6 @@
4939

5040
class EnergaizeEnv2(gym.Env):
5141
def __init__(self, env_config) -> None:
52-
# super(EnergaizeEnv2, self).__init__()
5342

5443
### Training and Test Data
5544
# self.df_list = env_config["df"]
@@ -128,8 +117,6 @@ def step(
128117
) -> tuple[dict[np.array, np.array], float, bool, bool, dict]:
129118
### Increase sequence length
130119
self.current_sequence_length += 1
131-
print("In Step")
132-
print(f"Current Sequence Length: ", self.current_sequence_length)
133120

134121
### Set placeholder for info
135122
infos = {}
@@ -170,12 +157,8 @@ def step(
170157

171158
### Take action
172159
try:
173-
print("in try")
174160
self.total_steps += 1
175-
print("Total Steps: ", self.total_steps)
176-
print("take action")
177161
self._take_action(action)
178-
print("action taken")
179162

180163
if self.df_train_X_train.shape[1] > 20000:
181164
reward = -1
@@ -188,7 +171,6 @@ def step(
188171
return obs, reward, terminated, truncated, infos
189172

190173
# poly features not possible if df too large
191-
192174
if len(self.df_train_X_train.columns) > 200:
193175
self.action_mask[14] = 0
194176
elif len(self.df_train_X_train.columns) <= 200 and 14 not in self.state:
@@ -305,24 +287,19 @@ def reset(
305287
##########################################
306288
def _take_action(self, action: int) -> None:
307289
if action == 0:
308-
print("Platzhalter")
290+
print("Placeholder")
309291
# 0 -> remove highly correlated features
310292
elif action == 1:
311293
print(f"Take Action {action}")
312294
feature_selection = FeatureSelection()
313-
print("feature_selection object created")
314-
# self.df_train = combine_dfs([self.df_train, self.df_train_target])
315-
print("Self Dok Path", self.alt_docu)
316-
print("Self alt_config", self.alt_config)
317-
print("Feture Selection Object", self.feature_selection)
295+
318296
importances = self.feature_selection.calc_globalFeatureImportance(
319297
self.alt_docu,
320298
"randomforest",
321299
self.df_train_X_train,
322300
self.df_train_y_train,
323301
self.alt_config,
324302
)
325-
print("importance function called")
326303
# ### Remove "file" and "I_" before processing
327304
# self.df, self.df_target = get_feature_df(
328305
# self.df, fuse_prefix=self.fuse_prefix
@@ -612,7 +589,6 @@ def _take_action(self, action: int) -> None:
612589
# Calculate reward
613590
##########################################
614591
def _calculate_performance(self) -> float:
615-
# print("CAALLLLAAAAAAAAAA")
616592

617593
### Add "file" and "I_" before processing
618594
# self.df = combine_dfs([self.df, self.df_target])
@@ -650,23 +626,6 @@ def _calculate_performance(self) -> float:
650626

651627
pred_val = regr.predict(X_val).reshape(-1, 1)
652628

653-
# ### PAWD
654-
# # measured energy [As] on validation data
655-
# val_energy_integral = self.__calculate_integral(self.rl_raster, y_val)
656-
# val_energy = float(val_energy_integral[-1])
657-
658-
# # predicted energy [As] on training data
659-
# val_predenergy_integral = self.__calculate_integral(
660-
# self.rl_raster, pred_val
661-
# )
662-
# val_predenergy = float(val_predenergy_integral[-1])
663-
# val_energy_percdev = float(100.0 * (val_predenergy / val_energy) - 100.0)
664-
665-
# model_xval.append(X_val)
666-
# val_energy_list.append(val_energy)
667-
# val_energy_percdev_list_abs_weighed.append(
668-
# np.abs(val_energy_percdev) * val_energy
669-
# )
670629

671630
### R2
672631
valR2.append(r2_score(y_val, pred_val, multioutput="raw_values"))
@@ -675,19 +634,11 @@ def _calculate_performance(self) -> float:
675634
rewards = []
676635
### R2 average
677636
r2_avg = np.average(valR2)
678-
print("r2_avg ", r2_avg)
679-
# ### PAWD
680-
# pawd = -np.abs(
681-
# np.sum(val_energy_percdev_list_abs_weighed) / np.sum(val_energy_list)
682-
# )
637+
683638
print("STATE CALC", self.state)
684-
# print("r2_avg ", r2_avg, "PAWD ", pawd)
685639

686640
transformed_r2 = self.expo_r2(r2_avg)
687641
rewards.append(transformed_r2)
688-
# rewards.append(transformed_r2)
689-
# transformed_pawd = self.expo_pawd(pawd)
690-
# rewards.append(transformed_pawd)
691642

692643
### Signals
693644
if self.df_train_X_train.shape[1] > 400:

src/automotive_feature_engineering/reinforcement_learning/rl_main.py

+3-7
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,10 @@
1919
from ray.rllib.algorithms import ppo
2020
from ray.rllib.examples.models.action_mask_model import ActionMaskModel
2121
from ray.tune.logger import pretty_print
22-
from src.automotive_feature_engineering.reinforcement_learning.rl_environment_ss import (
22+
from automotive_feature_engineering.reinforcement_learning.rl_environment_ss import (
2323
EnergaizeEnv2,
2424
)
25-
26-
# from config import Config
27-
import numpy as np
28-
from src.automotive_feature_engineering.utils import data_loader_test
29-
from src.automotive_feature_engineering.__init__ import manual
25+
from automotive_feature_engineering.__init__ import manual
3026

3127

3228
os.environ["OPENBLAS_NUM_THREADS"] = "1"
@@ -111,7 +107,7 @@ def rl_training(self):
111107
}
112108
print("Starting reinforcement learning training...")
113109

114-
ray.init(local_mode=False, ignore_reinit_error=True)
110+
ray.init(local_mode=True, ignore_reinit_error=True)
115111

116112
register_env("multienv", lambda env_config: EnergaizeEnv2(env_config))
117113

0 commit comments

Comments
 (0)