Merge pull request #5 from jonaswa11/main

mueller-mb · web-flow · commit 92e49696fd6f · 2024-09-01T19:21:01.000+02:00
Update Code with some minor fixes. Tested on Windows and Linux environments.
diff --git a/dist/automotive_feature_engineering-0.1.0-py3-none-any.whl b/dist/automotive_feature_engineering-0.1.0-py3-none-any.whl
diff --git a/dist/automotive_feature_engineering-0.1.0.tar.gz b/dist/automotive_feature_engineering-0.1.0.tar.gz
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,7 +1,7 @@
 [project]
 name = "automotive_feature-engineering"
 version = "0.1.0"
-requires-python = ">=3.10.11"
+requires-python = ">=3.10"
 description = "A Python package designed to automate the feature engineering process for large in-car communication datasets within the automotive industry."
 authors = [
   { name = "Your Name", email = "your.email@example.com" },
@@ -117,15 +117,13 @@ dependencies = [
     "tensorboardX==2.6.2.2",
     "tensorflow==2.13.0",
     "tensorflow-estimator==2.13.0",
-    "tensorflow-io-gcs-filesystem==0.37.1",
     "termcolor==2.4.0",
     "threadpoolctl==3.5.0",
     "tifffile==2024.7.2",
     "typer==0.12.3",
     "typing_extensions==4.5.0",
     "tzdata==2024.1",
     "urllib3==2.2.2",
-    "virtualenv==20.26.3",
     "Werkzeug==3.0.3",
     "wrapt==1.14.1"
 ]
@@ -140,3 +138,5 @@ rl = "automotive_featureengineering:rl"
 static = "automotive_featureengineering:static"
 manual = "automotive_featureengineering:manual"
 
+[tool.setuptools.package-data]
+"automotive_feature_engineering" = ["reinforcement_learning/*.json"]
diff --git a/requirements.txt b/requirements.txt
@@ -98,14 +98,12 @@ tensorboard-data-server==0.7.2
 tensorboardX==2.6.2.2
 tensorflow==2.13.0
 tensorflow-estimator==2.13.0
-tensorflow-io-gcs-filesystem==0.37.1
 termcolor==2.4.0
 threadpoolctl==3.5.0
 tifffile==2024.7.2
 typer==0.12.3
 typing_extensions==4.5.0
 tzdata==2024.1
 urllib3==2.2.2
-virtualenv==20.26.3
 Werkzeug==3.0.3
 wrapt==1.14.1
diff --git a/src/automotive_feature_engineering/__init__.py b/src/automotive_feature_engineering/__init__.py
@@ -1,6 +1,6 @@
 # SPDX-FileCopyrightText: Copyright (c) 2024 MBition GmbH.
 # SPDX-License-Identifier: MIT
-from src.automotive_feature_engineering.main_feature_engineering import (
+from automotive_feature_engineering.main_feature_engineering import (
     FeatureEngineering,
 )
 
diff --git a/src/automotive_feature_engineering/feature_encoding.py b/src/automotive_feature_engineering/feature_encoding.py
@@ -3,13 +3,12 @@
 import os
 import re
 from typing import List, Tuple
-import seaborn as sns
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
 
 from sklearn.preprocessing import OneHotEncoder
-from src.automotive_feature_engineering.utils.utils import combine_dfs
+from automotive_feature_engineering.utils.utils import combine_dfs
 
 
 class FeatureEncoding:
diff --git a/src/automotive_feature_engineering/feature_interactions.py b/src/automotive_feature_engineering/feature_interactions.py
@@ -3,13 +3,10 @@
 import os
 import re
 from typing import List, Tuple
-import eli5
-import seaborn as sns
-import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
 
-from src.automotive_feature_engineering.utils.utils import combine_dfs, get_feature_df
+from automotive_feature_engineering.utils.utils import combine_dfs, get_feature_df
 from sklearn.preprocessing import PolynomialFeatures
 
 
diff --git a/src/automotive_feature_engineering/feature_scaling.py b/src/automotive_feature_engineering/feature_scaling.py
@@ -3,13 +3,10 @@
 import os
 import re
 from typing import List, Tuple
-import eli5
-import seaborn as sns
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
 from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
-from sklearn import preprocessing
 
 
 class FeatureScaling:
diff --git a/src/automotive_feature_engineering/feature_selection.py b/src/automotive_feature_engineering/feature_selection.py
@@ -10,8 +10,7 @@
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
-import multiprocessing as mp
-import src.automotive_feature_engineering.utils.utils as utils
+import automotive_feature_engineering.utils.utils as utils
 
 from sklearn.ensemble import (
     RandomForestRegressor,
@@ -21,7 +20,7 @@
 
 from eli5.sklearn import PermutationImportance
 from sklearn.inspection import permutation_importance
-from src.automotive_feature_engineering.utils.utils import split_df, combine_dfs
+from automotive_feature_engineering.utils.utils import split_df, combine_dfs
 
 
 class FeatureSelection:
@@ -427,7 +426,6 @@ def calc_globalFeatureImportance(
             )
             print(config_dict)
             regr = RandomForestRegressor(**config_dict)
-            # print(feature_df.head, target_df.head)
             regr.fit(feature_df, target_df)
             print(
                 "---Global Feature Importance calculated for RandomForestRegressor---"
@@ -804,6 +802,7 @@ def permImportance(
             )
             X = df_train_features
             y = df_train_target
+
             regr = RandomForestRegressor(**config_dict).fit(X, y)
             perm = PermutationImportance(
                 regr, random_state=config_dict.get("random_state")
diff --git a/src/automotive_feature_engineering/main_feature_engineering.py b/src/automotive_feature_engineering/main_feature_engineering.py
@@ -3,18 +3,17 @@
 import os
 import re
 from typing import List, Tuple
-import seaborn as sns
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
 
-from src.automotive_feature_engineering.feature_extraction import FeatureExtraction
-from src.automotive_feature_engineering.feature_encoding import FeatureEncoding
-from src.automotive_feature_engineering.feature_selection import FeatureSelection
-from src.automotive_feature_engineering.feature_scaling import FeatureScaling
-from src.automotive_feature_engineering.feature_interactions import FeatureInteractions
-from src.automotive_feature_engineering.sna_handling import SnaHandling
-from src.automotive_feature_engineering.utils import combine_dfs, get_feature_df
+from automotive_feature_engineering.feature_extraction import FeatureExtraction
+from automotive_feature_engineering.feature_encoding import FeatureEncoding
+from automotive_feature_engineering.feature_selection import FeatureSelection
+from automotive_feature_engineering.feature_scaling import FeatureScaling
+from automotive_feature_engineering.feature_interactions import FeatureInteractions
+from automotive_feature_engineering.sna_handling import SnaHandling
+from automotive_feature_engineering.utils import combine_dfs, get_feature_df
 from joblib import dump, load
 
 from timeit import default_timer as timer
diff --git a/src/automotive_feature_engineering/outlier_detection.py b/src/automotive_feature_engineering/outlier_detection.py
@@ -3,19 +3,15 @@
 import os
 import re
 from typing import List, Tuple
-import eli5
-import seaborn as sns
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
-
-from pandas import read_csv
 from sklearn.ensemble import IsolationForest
 from sklearn.neighbors import LocalOutlierFactor
 
 from numpy import percentile
 import multiprocessing as mp
-from data.utils import split_df, combine_dfs
+from utils.utils import split_df, combine_dfs
 
 
 class OutlierDetection:
diff --git a/src/automotive_feature_engineering/reinforcement_learning/rl_environment_ss.py b/src/automotive_feature_engineering/reinforcement_learning/rl_environment_ss.py
@@ -4,37 +4,27 @@
 import os
 from gymnasium import spaces
 import numpy as np
-from gymnasium import Env
-from gymnasium.spaces import Discrete, Box, Dict, Tuple, MultiBinary, MultiDiscrete
-import os, subprocess, time, signal, random
+import os
 import pandas as pd
 import logging
-import json
-from typing import Optional, Union
+from typing import Optional
 import math
 import pathlib
-import random
-from src.automotive_feature_engineering.sna_handling import SnaHandling
-from src.automotive_feature_engineering.feature_extraction import FeatureExtraction
-from src.automotive_feature_engineering.feature_encoding import FeatureEncoding
-from src.automotive_feature_engineering.feature_selection import FeatureSelection
-from src.automotive_feature_engineering.feature_scaling import FeatureScaling
-from src.automotive_feature_engineering.feature_interactions import FeatureInteractions
-from src.automotive_feature_engineering.main_feature_engineering import (
-    FeatureEngineering,
-)
-from src.automotive_feature_engineering.utils.utils import combine_dfs, get_feature_df
+from automotive_feature_engineering.sna_handling import SnaHandling
+from automotive_feature_engineering.feature_extraction import FeatureExtraction
+from automotive_feature_engineering.feature_encoding import FeatureEncoding
+from automotive_feature_engineering.feature_selection import FeatureSelection
+from automotive_feature_engineering.feature_scaling import FeatureScaling
+from automotive_feature_engineering.feature_interactions import FeatureInteractions
+
+from automotive_feature_engineering.utils.utils import get_feature_df
 from sklearn.model_selection import train_test_split
-import ray
-import src.automotive_feature_engineering.utils.utils as utils
+import automotive_feature_engineering.utils.utils as utils
 
 # from ray.rllib import agents
 from ray.rllib.utils import try_import_tf
-from ray.rllib.examples.models.action_mask_model import (
-    ActionMaskModel,
-    TorchActionMaskModel,
-)
-from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
+
+from sklearn.metrics import r2_score
 from sklearn.ensemble import RandomForestRegressor
 
 tf = try_import_tf()
@@ -49,7 +39,6 @@
 
 class EnergaizeEnv2(gym.Env):
     def __init__(self, env_config) -> None:
-        # super(EnergaizeEnv2, self).__init__()
 
         ### Training and Test Data
         # self.df_list = env_config["df"]
@@ -128,8 +117,6 @@ def step(
     ) -> tuple[dict[np.array, np.array], float, bool, bool, dict]:
         ### Increase sequence length
         self.current_sequence_length += 1
-        print("In Step")
-        print(f"Current Sequence Length: ", self.current_sequence_length)
 
         ### Set placeholder for info
         infos = {}
@@ -170,12 +157,8 @@ def step(
 
         ### Take action
         try:
-            print("in try")
             self.total_steps += 1
-            print("Total Steps: ", self.total_steps)
-            print("take action")
             self._take_action(action)
-            print("action taken")
 
             if self.df_train_X_train.shape[1] > 20000:
                 reward = -1
@@ -188,7 +171,6 @@ def step(
             return obs, reward, terminated, truncated, infos
 
         # poly features not possible if df too large
-
         if len(self.df_train_X_train.columns) > 200:
             self.action_mask[14] = 0
         elif len(self.df_train_X_train.columns) <= 200 and 14 not in self.state:
@@ -305,24 +287,19 @@ def reset(
     ##########################################
     def _take_action(self, action: int) -> None:
         if action == 0:
-            print("Platzhalter")
+            print("Placeholder")
         # 0 -> remove highly correlated features
         elif action == 1:
             print(f"Take Action {action}")
             feature_selection = FeatureSelection()
-            print("feature_selection object created")
-            # self.df_train = combine_dfs([self.df_train, self.df_train_target])
-            print("Self Dok Path", self.alt_docu)
-            print("Self alt_config", self.alt_config)
-            print("Feture Selection Object", self.feature_selection)
+
             importances = self.feature_selection.calc_globalFeatureImportance(
                 self.alt_docu,
                 "randomforest",
                 self.df_train_X_train,
                 self.df_train_y_train,
                 self.alt_config,
             )
-            print("importance function called")
             # ### Remove "file" and "I_" before processing
             # self.df, self.df_target = get_feature_df(
             #     self.df, fuse_prefix=self.fuse_prefix
@@ -612,7 +589,6 @@ def _take_action(self, action: int) -> None:
     # Calculate reward
     ##########################################
     def _calculate_performance(self) -> float:
-        # print("CAALLLLAAAAAAAAAA")
 
         ### Add "file" and "I_" before processing
         # self.df = combine_dfs([self.df, self.df_target])
@@ -650,23 +626,6 @@ def _calculate_performance(self) -> float:
 
         pred_val = regr.predict(X_val).reshape(-1, 1)
 
-        # ### PAWD
-        # # measured energy [As] on validation data
-        # val_energy_integral = self.__calculate_integral(self.rl_raster, y_val)
-        # val_energy = float(val_energy_integral[-1])
-
-        # # predicted energy [As] on training data
-        # val_predenergy_integral = self.__calculate_integral(
-        #     self.rl_raster, pred_val
-        # )
-        # val_predenergy = float(val_predenergy_integral[-1])
-        # val_energy_percdev = float(100.0 * (val_predenergy / val_energy) - 100.0)
-
-        # model_xval.append(X_val)
-        # val_energy_list.append(val_energy)
-        # val_energy_percdev_list_abs_weighed.append(
-        #     np.abs(val_energy_percdev) * val_energy
-        # )
 
         ### R2
         valR2.append(r2_score(y_val, pred_val, multioutput="raw_values"))
@@ -675,19 +634,11 @@ def _calculate_performance(self) -> float:
         rewards = []
         ### R2 average
         r2_avg = np.average(valR2)
-        print("r2_avg ", r2_avg)
-        # ### PAWD
-        # pawd = -np.abs(
-        #     np.sum(val_energy_percdev_list_abs_weighed) / np.sum(val_energy_list)
-        # )
+
         print("STATE CALC", self.state)
-        # print("r2_avg ", r2_avg, "PAWD ", pawd)
 
         transformed_r2 = self.expo_r2(r2_avg)
         rewards.append(transformed_r2)
-        # rewards.append(transformed_r2)
-        # transformed_pawd = self.expo_pawd(pawd)
-        # rewards.append(transformed_pawd)
 
         ### Signals
         if self.df_train_X_train.shape[1] > 400:
diff --git a/src/automotive_feature_engineering/reinforcement_learning/rl_main.py b/src/automotive_feature_engineering/reinforcement_learning/rl_main.py
@@ -19,14 +19,10 @@
 from ray.rllib.algorithms import ppo
 from ray.rllib.examples.models.action_mask_model import ActionMaskModel
 from ray.tune.logger import pretty_print
-from src.automotive_feature_engineering.reinforcement_learning.rl_environment_ss import (
+from automotive_feature_engineering.reinforcement_learning.rl_environment_ss import (
     EnergaizeEnv2,
 )
-
-# from config import Config
-import numpy as np
-from src.automotive_feature_engineering.utils import data_loader_test
-from src.automotive_feature_engineering.__init__ import manual
+from automotive_feature_engineering.__init__ import manual
 
 
 os.environ["OPENBLAS_NUM_THREADS"] = "1"
@@ -111,7 +107,7 @@ def rl_training(self):
         }
         print("Starting reinforcement learning training...")
 
-        ray.init(local_mode=False, ignore_reinit_error=True)
+        ray.init(local_mode=True, ignore_reinit_error=True)
 
         register_env("multienv", lambda env_config: EnergaizeEnv2(env_config))
 
diff --git a/src/automotive_feature_engineering/sna_handling.py b/src/automotive_feature_engineering/sna_handling.py
diff --git a/src/automotive_feature_engineering/utils/__init__.py b/src/automotive_feature_engineering/utils/__init__.py

Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`# SPDX-FileCopyrightText: Copyright (c) 2024 MBition GmbH.`
`2`	`2`	`# SPDX-License-Identifier: MIT`
`3`		`-from src.automotive_feature_engineering.main_feature_engineering import (`
	`3`	`+from automotive_feature_engineering.main_feature_engineering import (`
`4`	`4`	`FeatureEngineering,`
`5`	`5`	`)`
`6`	`6`