diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc index 14812de..7974dcf 100644 Binary files a/__pycache__/__init__.cpython-36.pyc and b/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_grid_search/__pycache__/__init__.cpython-36.pyc b/q01_grid_search/__pycache__/__init__.cpython-36.pyc index 9413fbb..9c78536 100644 Binary files a/q01_grid_search/__pycache__/__init__.cpython-36.pyc and b/q01_grid_search/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_grid_search/__pycache__/build.cpython-36.pyc b/q01_grid_search/__pycache__/build.cpython-36.pyc index dbd3e7a..74bd9f0 100644 Binary files a/q01_grid_search/__pycache__/build.cpython-36.pyc and b/q01_grid_search/__pycache__/build.cpython-36.pyc differ diff --git a/q01_grid_search/build.py b/q01_grid_search/build.py index 20c99a1..004c08c 100644 --- a/q01_grid_search/build.py +++ b/q01_grid_search/build.py @@ -1,22 +1,42 @@ +# %load q01_grid_search/build.py # Default imports import warnings -warnings.filterwarnings("ignore") +warnings.filterwarnings('ignore') import pandas as pd from sklearn.model_selection import train_test_split from sklearn.ensemble import RandomForestClassifier from sklearn.model_selection import GridSearchCV +import numpy as np +from sklearn.metrics import accuracy_score, roc_auc_score, make_scorer loan_data = pd.read_csv('data/loan_prediction.csv') X_bal = loan_data.iloc[:, :-1] y_bal = loan_data.iloc[:, -1] X_train, X_test, y_train, y_test = train_test_split(X_bal, y_bal, test_size=0.33, random_state=9) -param_grid = {"max_features": ['sqrt', 4, "log2"], - "n_estimators": [10, 50, 120], - "max_depth": [40, 20, 10], - "max_leaf_nodes": [5, 10, 2]} - +param_grid = {'max_features': ['sqrt', 4, 'log2'], + 'n_estimators': [10, 50, 120], + 'max_depth': [40, 20, 10], + 'max_leaf_nodes': [5, 10, 2]} +rfc = RandomForestClassifier(oob_score=True, random_state=9) # Write your solution here : +def grid_search(X_train,y_train,model, param_grid,cv=3): + clf = model + np.random.seed(9) + parameters = param_grid + acc_scorer = make_scorer(accuracy_score) + + # Run the grid search + grid_obj = GridSearchCV(clf, param_grid, scoring=acc_scorer) + grid_obj = grid_obj.fit(X_train, y_train) + variable1=list() + variable3=list() + for i in grid_obj.grid_scores_: + variable1.append(i[0]) + variable3.append(i[1]) + variable2=np.array(variable3) + return grid_obj,variable1,variable2 + diff --git a/q01_grid_search/tests/__pycache__/__init__.cpython-36.pyc b/q01_grid_search/tests/__pycache__/__init__.cpython-36.pyc index 5cb0753..4e919b8 100644 Binary files a/q01_grid_search/tests/__pycache__/__init__.cpython-36.pyc and b/q01_grid_search/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_grid_search/tests/__pycache__/test_q01_grid_search.cpython-36.pyc b/q01_grid_search/tests/__pycache__/test_q01_grid_search.cpython-36.pyc index 6061f23..f4ac326 100644 Binary files a/q01_grid_search/tests/__pycache__/test_q01_grid_search.cpython-36.pyc and b/q01_grid_search/tests/__pycache__/test_q01_grid_search.cpython-36.pyc differ diff --git a/q02_fit/__pycache__/__init__.cpython-36.pyc b/q02_fit/__pycache__/__init__.cpython-36.pyc index 7d3ab26..a45bba5 100644 Binary files a/q02_fit/__pycache__/__init__.cpython-36.pyc and b/q02_fit/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_fit/__pycache__/build.cpython-36.pyc b/q02_fit/__pycache__/build.cpython-36.pyc index 87b2785..1bd0da4 100644 Binary files a/q02_fit/__pycache__/build.cpython-36.pyc and b/q02_fit/__pycache__/build.cpython-36.pyc differ diff --git a/q02_fit/build.py b/q02_fit/build.py index fbafb1a..f99b195 100644 --- a/q02_fit/build.py +++ b/q02_fit/build.py @@ -1,3 +1,4 @@ +# %load q02_fit/build.py # Default imports import pandas as pd @@ -5,6 +6,7 @@ from sklearn.metrics import confusion_matrix, accuracy_score, classification_report from sklearn.model_selection import train_test_split from sklearn.ensemble import RandomForestClassifier +import numpy as np loan_data = pd.read_csv('data/loan_prediction.csv') @@ -13,15 +15,20 @@ X_train, X_test, y_train, y_test = train_test_split(X_bal, y_bal, test_size=0.33, random_state=9) rfc = RandomForestClassifier(oob_score=True, random_state=9) -param_grid = {"max_features": ['sqrt', 4, "log2"], - "n_estimators": [10, 50, 120], - "max_depth": [40, 20, 10], - "max_leaf_nodes": [5, 10, 2]} +param_grid = {'max_features': ['sqrt', 4, 'log2'], + 'n_estimators': [10, 50, 120], + 'max_depth': [40, 20, 10], + 'max_leaf_nodes': [5, 10, 2]} grid, grid_param, grid_score = grid_search(X_train, y_train, rfc, param_grid, cv=3) # Write your solution here : - +def fit(X_test,y_test): + y_pred=grid.predict(X_test) + variable1=confusion_matrix(y_test,y_pred) + variable2=classification_report(y_test,y_pred) + variable3=accuracy_score(y_test,y_pred) + return variable1,variable2,variable3 diff --git a/q02_fit/tests/__pycache__/__init__.cpython-36.pyc b/q02_fit/tests/__pycache__/__init__.cpython-36.pyc index 5b1da02..a604d88 100644 Binary files a/q02_fit/tests/__pycache__/__init__.cpython-36.pyc and b/q02_fit/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_fit/tests/__pycache__/test_q02_fit.cpython-36.pyc b/q02_fit/tests/__pycache__/test_q02_fit.cpython-36.pyc index 3d64856..80d34ec 100644 Binary files a/q02_fit/tests/__pycache__/test_q02_fit.cpython-36.pyc and b/q02_fit/tests/__pycache__/test_q02_fit.cpython-36.pyc differ