Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Problem to my code identify the search space from gridsearch #978

Open
sasmendonca opened this issue Nov 15, 2023 · 0 comments
Open

Problem to my code identify the search space from gridsearch #978

sasmendonca opened this issue Nov 15, 2023 · 0 comments

Comments

@sasmendonca
Copy link

I have this code bellow as a backend of my jupyter notebook when I called the model:

imports

import numpy as np
import pandas as pd
from scipy import stats

Sklearn

from sklearn.metrics import r2_score
from ML.ml_utils import *
from sklearn.model_selection import train_test_split

FNN

import tensorflow as tf
import keras_tuner as kt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import SGD, Adam
from tensorflow.keras.callbacks import EarlyStopping
from keras_tuner import HyperParameters
from keras_tuner import Objective
from keras_tuner.tuners import GridSearch, RandomSearch
from your_module import create_model

class FeedForwardNN(tf.keras.Model):
def init(self, input_dim=None, random_seed=42):
super(FeedForwardNN, self).init()
self.seed = random_seed
input_dim = 2048
self.input_dim = input_dim
self.model = self.build_model()

def build_model(self, hp=None):  
    if hp is None:
        hp = kt.HyperParameters()
    tf.random.set_seed(self.seed)
    
    # Hyperparameters
    optimizer_c = hp.Choice("optimizer", ['SGD', 'Adam'])
    learning_rate = hp.Float("learning_rate", min_value=0.00001, max_value=0.1, step=10, sampling='log')
    l2_reg = hp.Float("l2_reg", min_value=0.0001, max_value=0.1, step=10, sampling='log')
    dropout_rate = hp.Float("dropout_rate", min_value=0, max_value=0.5, step=0.1)
    output_dim = hp.Fixed("output_dim", value=1)

    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Input(shape=(self.input_dim,)))
    model.add(tf.keras.layers.Dense(units=hp.Int("units_1", min_value=32, max_value=256, step=32),
                                    activation='relu', 
                                    kernel_regularizer=tf.keras.regularizers.l2(l2_reg)))

    for i in range(hp.Int("num_layers", 1, 3)):
        model.add(tf.keras.layers.Dense(units=hp.Int(f"units_{i}", min_value=32, max_value=256, step=32),
                                        activation='relu', 
                                        kernel_regularizer=tf.keras.regularizers.l2(l2_reg)))

    if optimizer_c == "SGD":
        optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate)
    elif optimizer_c == "Adam":
        optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    else:
        raise ValueError("Unsupported optimizer")

    model.add(tf.keras.layers.Dropout(dropout_rate))
    model.add(tf.keras.layers.Dense(output_dim, activation='linear'))
    
    model.compile(loss=tf.keras.losses.mean_squared_error, 
                  optimizer=optimizer, 
                  metrics=['mean_absolute_error'])

    return model

def call(self, inputs, training=False):
    # Define the forward pass of the model
    return self.model(inputs, training=training)

def fit(self, X, y, *args, **kwargs):
    return self.model.fit(X, y, *args, shuffle=True, **kwargs)

class MLModel:
def init(self, data, ml_algorithm, reg_class="regression", cv_fold=10, random_seed=42):

    self.data = data
    self.ml_algorithm = ml_algorithm
    self.reg_class = reg_class
    self.cv_fold = cv_fold
    self.seed = random_seed
    self.features_train, self.features_val, self.labels_train, self.labels_val = self.split_data()
    self.best_params, self.best_model = self.train_with_hyperparameters()
    self.model = self.train_with_hyperparameters_and_final_model()

def split_data(self):
    features_train, features_val, labels_train, labels_val = train_test_split(
        self.data.features, self.data.labels, test_size=0.2, random_state=self.seed)
    return features_train, features_val, labels_train, labels_val

def train_with_hyperparameters(self):
    if self.ml_algorithm == 'FNN':
        tuner = GridSearch(FeedForwardNN,
                            objective='val_mean_absolute_error',
                            max_trials=self.cv_fold,
                            seed=self.seed,
                            max_retries_per_trial=2,)

        tuner.search_space_summary()

        stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)

        tuner.search(self.features_train, 
                     self.labels_train, 
                     epochs=100, 
                     validation_split=0.2,
                     callbacks=[stop_early])
        
        best_h_params = tuner.get_best_hyperparameters(num_trials=1)[0]
        best_model = tuner.get_best_models()[0]

        return best_h_params, best_model

def final_model(self):
    if self.ml_algorithm == "FNN":
        best_h_params = self.train_with_hyperparameters()

        # Create a new instance of FeedForwardNN with the best hyperparameters
        final_model = FeedForwardNN(**best_h_params)
        final_model.build((None, self.features_train.shape[1]))

        early_stopping = EarlyStopping(monitor="val_loss", 
                                       patience=10, 
                                       restore_best_weights=True)

        final_model.fit(self.features_train, 
                        self.labels_train, 
                        epochs=100, 
                        validation_data=(self.features_val, self.labels_val),
                        callbacks=[early_stopping])

        return final_model
    else:
        raise ValueError('Optimal parameter error')

def train_with_hyperparameters_and_final_model(self):
    self.best_params, self.best_model = self.train_with_hyperparameters()
    final_model = self.final_model()
    return final_model

class Model_Evaluation:
def init(self, model, data, model_id=None, model_loaded=None, reg_class="regression"):
self.reg_class = reg_class
self.model_id = model_id
self.model = model
self.data = data
self.model_loaded = model_loaded
self.labels, self.y_pred, self.predictions = self.model_predict(data)
self.pred_performance = self.prediction_performance(data)

def model_predict(self, data):

    if self.reg_class == "regression":

        if self.model_id == "FNN":
            data_features = data.features
        else:
            'Prediction error'

        if self.model_loaded is not None:
            y_prediction = self.model.predict(data_features)
        else:
            y_prediction = self.model.model.predict(data_features)
        labels = self.data.labels

        predictions = pd.DataFrame(list(zip(data.cid, labels, y_prediction)),
                                   columns=["Cid", "Experimental", "Predicted"])
        predictions['Target ID'] = data.target[0]
        predictions['Algorithm'] = self.model_id
        predictions['Residuals'] = [label_i - prediction_i for label_i, prediction_i in zip(labels, y_prediction)]

        return labels, y_prediction, predictions

def prediction_performance(self, data, nantozero=False) -> pd.DataFrame:

    if self.reg_class == "regression":

        labels = self.labels
        pred = self.y_pred

        fill = 0 if nantozero else np.nan
        if len(pred) == 0:
            mae = fill
            mse = fill
            rmse = fill
            r2 = fill
            r = fill
        else:
            mae = tf.keras.metrics.mean_absolute_error(labels, pred).numpy().tolist()
            mse = tf.keras.metrics.mean_squared_error(labels, pred).numpy().tolist()
            rmse = np.sqrt(mse)

        target = data.target[0]
        model_name = self.model_id

        #Calculate r and r2
        self.labels1 = self.labels.reshape(-1, 1)
        self.y_pred1 = self.y_pred.reshape(-1, 1)
        correlation_matrix = np.corrcoef(self.labels1, self.y_pred1, rowvar=False)
        correlation_xy = correlation_matrix[0,1]
        r = correlation_xy**2
        r2 = r2_score(self.labels, self.y_pred)

        result_list = [{"MAE": mae,
                        "MSE": mse,
                        "RMSE": rmse,
                        "R2": r2,
                        "r": r,
                        "Dataset size": len(labels),
                        "Target ID": target,
                        "Algorithm": model_name}
                       ]

        # Prepare result dataset
        results = pd.DataFrame(result_list)
        results.set_index(["Target ID", "Algorithm", "Dataset size"], inplace=True)
        results.columns = pd.MultiIndex.from_product([["Value"], ["MAE", "MSE", "RMSE", "R2", "r"]],
                                                     names=["Value", "Metric"])
        results = results.stack().reset_index().set_index("Target ID")

        return results

I don't know why, but the code run without the hyperparameter search and in search_space_sumary() appers only this:
Search space summary
Default search space size: 0
Anyone can help me how to correct my code? I think is something related to the hp=kt.HyperParameters, but I tried all ways possible for me and didn't have any difference.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant