diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc index 494c0e4..f329973 100644 Binary files a/__pycache__/__init__.cpython-36.pyc and b/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_grid_search/__pycache__/__init__.cpython-36.pyc b/q01_grid_search/__pycache__/__init__.cpython-36.pyc index eed5319..c48ee19 100644 Binary files a/q01_grid_search/__pycache__/__init__.cpython-36.pyc and b/q01_grid_search/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_grid_search/__pycache__/build.cpython-36.pyc b/q01_grid_search/__pycache__/build.cpython-36.pyc index cac4a0b..7126cf3 100644 Binary files a/q01_grid_search/__pycache__/build.cpython-36.pyc and b/q01_grid_search/__pycache__/build.cpython-36.pyc differ diff --git a/q01_grid_search/build.py b/q01_grid_search/build.py index 20c99a1..fa05dd1 100644 --- a/q01_grid_search/build.py +++ b/q01_grid_search/build.py @@ -1,22 +1,27 @@ -# Default imports - -import warnings -warnings.filterwarnings("ignore") import pandas as pd +import numpy as np from sklearn.model_selection import train_test_split from sklearn.ensemble import RandomForestClassifier from sklearn.model_selection import GridSearchCV loan_data = pd.read_csv('data/loan_prediction.csv') -X_bal = loan_data.iloc[:, :-1] -y_bal = loan_data.iloc[:, -1] +X = loan_data.iloc[:, :-1] +y = loan_data.iloc[:, -1] + +X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=9) + +param_grid1 = {'max_features': ['sqrt', 4, 'log2'], + 'n_estimators': [10, 50, 120], + 'max_depth': [40, 20, 10], + 'max_leaf_nodes': [5, 10, 2]} -X_train, X_test, y_train, y_test = train_test_split(X_bal, y_bal, test_size=0.33, random_state=9) -param_grid = {"max_features": ['sqrt', 4, "log2"], - "n_estimators": [10, 50, 120], - "max_depth": [40, 20, 10], - "max_leaf_nodes": [5, 10, 2]} +rfc = RandomForestClassifier(oob_score=True ,random_state=9) +def grid_search(X_train1, y_train1,modelR,params,cv=3): + GSCV_rfc = GridSearchCV(estimator=modelR, param_grid=params, cv=cv) + GSCV_rfc.fit(X_train1,y_train1) + param_list = GSCV_rfc.cv_results_['params'] + score1 = GSCV_rfc.cv_results_['mean_test_score'] + return GSCV_rfc,param_list,score1 -# Write your solution here : diff --git a/q01_grid_search/tests/__pycache__/__init__.cpython-36.pyc b/q01_grid_search/tests/__pycache__/__init__.cpython-36.pyc index 31ac328..43b7ea2 100644 Binary files a/q01_grid_search/tests/__pycache__/__init__.cpython-36.pyc and b/q01_grid_search/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_grid_search/tests/__pycache__/test_q01_grid_search.cpython-36.pyc b/q01_grid_search/tests/__pycache__/test_q01_grid_search.cpython-36.pyc index bf1afbe..c133d8b 100644 Binary files a/q01_grid_search/tests/__pycache__/test_q01_grid_search.cpython-36.pyc and b/q01_grid_search/tests/__pycache__/test_q01_grid_search.cpython-36.pyc differ diff --git a/q02_fit/__pycache__/__init__.cpython-36.pyc b/q02_fit/__pycache__/__init__.cpython-36.pyc index 97c33cb..e72a1ab 100644 Binary files a/q02_fit/__pycache__/__init__.cpython-36.pyc and b/q02_fit/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_fit/__pycache__/build.cpython-36.pyc b/q02_fit/__pycache__/build.cpython-36.pyc index 3d41a62..3d01f8a 100644 Binary files a/q02_fit/__pycache__/build.cpython-36.pyc and b/q02_fit/__pycache__/build.cpython-36.pyc differ diff --git a/q02_fit/build.py b/q02_fit/build.py index fbafb1a..8ae4522 100644 --- a/q02_fit/build.py +++ b/q02_fit/build.py @@ -1,27 +1,41 @@ -# Default imports - import pandas as pd -from greyatomlib.random_forest_project.q01_grid_search.build import grid_search -from sklearn.metrics import confusion_matrix, accuracy_score, classification_report +import numpy as np from sklearn.model_selection import train_test_split from sklearn.ensemble import RandomForestClassifier - +from sklearn.model_selection import GridSearchCV +from sklearn.metrics import confusion_matrix +from sklearn.metrics import classification_report +from sklearn.metrics import accuracy_score loan_data = pd.read_csv('data/loan_prediction.csv') -X_bal = loan_data.iloc[:, :-1] -y_bal = loan_data.iloc[:, -1] +X = loan_data.iloc[:, :-1] +y = loan_data.iloc[:, -1] + +X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=9) + +param_grid1 = {'max_features': ['sqrt', 4, 'log2'], + 'n_estimators': [10, 50, 120], + 'max_depth': [40, 20, 10], + 'max_leaf_nodes': [5, 10, 2]} -X_train, X_test, y_train, y_test = train_test_split(X_bal, y_bal, test_size=0.33, random_state=9) -rfc = RandomForestClassifier(oob_score=True, random_state=9) -param_grid = {"max_features": ['sqrt', 4, "log2"], - "n_estimators": [10, 50, 120], - "max_depth": [40, 20, 10], - "max_leaf_nodes": [5, 10, 2]} +rfc = RandomForestClassifier(oob_score = True,random_state=9) -grid, grid_param, grid_score = grid_search(X_train, y_train, rfc, param_grid, cv=3) +def grid_search(X_train1, y_train1,modelR,params,cv): + GSCV_rfc = GridSearchCV(estimator=modelR, param_grid=params, cv=3) + GSCV_rfc.fit(X_train1,y_train1) + param_list = GSCV_rfc.cv_results_['params'] + scoreA = GSCV_rfc.cv_results_['mean_test_score'] + return GSCV_rfc,param_list,scoreA +GSCV_rfc1,param_list1,score1 = grid_search(X_train, y_train,rfc,param_grid1,3) -# Write your solution here : +model = GSCV_rfc1.best_estimator_ +y_pred = model.fit(X_train, y_train).predict(X_test) +def fit(X_test,y_test): + acc_score = accuracy_score(y_test, y_pred) + conf_matrix = confusion_matrix(y_test, y_pred) + c_report = classification_report(y_test, y_pred) + return conf_matrix,c_report,acc_score diff --git a/q02_fit/tests/__pycache__/__init__.cpython-36.pyc b/q02_fit/tests/__pycache__/__init__.cpython-36.pyc index 4a01850..ff396da 100644 Binary files a/q02_fit/tests/__pycache__/__init__.cpython-36.pyc and b/q02_fit/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_fit/tests/__pycache__/test_q02_fit.cpython-36.pyc b/q02_fit/tests/__pycache__/test_q02_fit.cpython-36.pyc index 413b2fc..72e2af4 100644 Binary files a/q02_fit/tests/__pycache__/test_q02_fit.cpython-36.pyc and b/q02_fit/tests/__pycache__/test_q02_fit.cpython-36.pyc differ