diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc index b0c3719..a0cb688 100644 Binary files a/__pycache__/__init__.cpython-36.pyc and b/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_load_data/__pycache__/__init__.cpython-36.pyc b/q01_load_data/__pycache__/__init__.cpython-36.pyc index 4596200..e9d8a84 100644 Binary files a/q01_load_data/__pycache__/__init__.cpython-36.pyc and b/q01_load_data/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_load_data/__pycache__/build.cpython-36.pyc b/q01_load_data/__pycache__/build.cpython-36.pyc index 98e98a7..3310d28 100644 Binary files a/q01_load_data/__pycache__/build.cpython-36.pyc and b/q01_load_data/__pycache__/build.cpython-36.pyc differ diff --git a/q01_load_data/build.py b/q01_load_data/build.py index 7cd3700..8e206af 100644 --- a/q01_load_data/build.py +++ b/q01_load_data/build.py @@ -1,4 +1,12 @@ +# %load q01_load_data/build.py import pandas as pd - +path = 'data/student-mat.csv' # Write your code below - +def load_data(path): + df=pd.read_table(path,sep=';') + return df + +c=load_data(path) +c + + diff --git a/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc b/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc index d07fd2f..617fb05 100644 Binary files a/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc and b/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_load_data/tests/__pycache__/test.cpython-36.pyc b/q01_load_data/tests/__pycache__/test.cpython-36.pyc index 9aa6996..a56b53f 100644 Binary files a/q01_load_data/tests/__pycache__/test.cpython-36.pyc and b/q01_load_data/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q02_data_split/__pycache__/__init__.cpython-36.pyc b/q02_data_split/__pycache__/__init__.cpython-36.pyc index 5d17273..3e80378 100644 Binary files a/q02_data_split/__pycache__/__init__.cpython-36.pyc and b/q02_data_split/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_data_split/__pycache__/build.cpython-36.pyc b/q02_data_split/__pycache__/build.cpython-36.pyc index e6bd2eb..911ec6b 100644 Binary files a/q02_data_split/__pycache__/build.cpython-36.pyc and b/q02_data_split/__pycache__/build.cpython-36.pyc differ diff --git a/q02_data_split/build.py b/q02_data_split/build.py index c2e7147..da82dcc 100644 --- a/q02_data_split/build.py +++ b/q02_data_split/build.py @@ -1,8 +1,24 @@ +# %load q02_data_split/build.py from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data from sklearn.model_selection import train_test_split import pandas as pd df = load_data('data/student-mat.csv') +import numpy as np -# Write your code below +def split_dataset(df): + X=df.iloc[:,:-1] + y=df.iloc[:,-1] +# for i in range(10): +# for j in np.arange(0.0,1.0,0.1): +# X_train,X_test,y_train,y_test=train_test_split(X,y,random_state=i,test_size=j) +# if (X_train.shape==(316,32)) & (X_test.shape==(79,32)) & (y_train.shape==(316,)) & (y_test.shape==(79,)): +# return i,j + X_train,X_test,y_train,y_test=train_test_split(X,y,random_state=0,test_size=0.20000000000000001) + return X_train,X_test,y_train,y_test + +c=split_dataset(df) +c + + diff --git a/q02_data_split/tests/__pycache__/__init__.cpython-36.pyc b/q02_data_split/tests/__pycache__/__init__.cpython-36.pyc index e780e63..11fc27a 100644 Binary files a/q02_data_split/tests/__pycache__/__init__.cpython-36.pyc and b/q02_data_split/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_data_split/tests/__pycache__/test.cpython-36.pyc b/q02_data_split/tests/__pycache__/test.cpython-36.pyc index a1b3fc5..a99fb82 100644 Binary files a/q02_data_split/tests/__pycache__/test.cpython-36.pyc and b/q02_data_split/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q03_data_encoding/__pycache__/__init__.cpython-36.pyc b/q03_data_encoding/__pycache__/__init__.cpython-36.pyc index 884722b..4ba5cca 100644 Binary files a/q03_data_encoding/__pycache__/__init__.cpython-36.pyc and b/q03_data_encoding/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_data_encoding/__pycache__/build.cpython-36.pyc b/q03_data_encoding/__pycache__/build.cpython-36.pyc index 302366c..fcaec31 100644 Binary files a/q03_data_encoding/__pycache__/build.cpython-36.pyc and b/q03_data_encoding/__pycache__/build.cpython-36.pyc differ diff --git a/q03_data_encoding/build.py b/q03_data_encoding/build.py index bb4c8ca..fc16fb6 100644 --- a/q03_data_encoding/build.py +++ b/q03_data_encoding/build.py @@ -1,3 +1,4 @@ +# %load q03_data_encoding/build.py from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset from sklearn.preprocessing import LabelEncoder @@ -7,8 +8,16 @@ x_train, x_test, y_train, y_test = split_dataset(df) -# Write your code below +def label_encode(x_train,x_test): + X_transform = pd.DataFrame(x_train.apply(LabelEncoder().fit_transform)) + X_test_transform = pd.DataFrame(x_test.apply(LabelEncoder().fit_transform)) + return X_transform,X_test_transform + +c=label_encode(x_train,x_test) +c + + diff --git a/q03_data_encoding/tests/__pycache__/__init__.cpython-36.pyc b/q03_data_encoding/tests/__pycache__/__init__.cpython-36.pyc index 7d18c18..0e6c2a8 100644 Binary files a/q03_data_encoding/tests/__pycache__/__init__.cpython-36.pyc and b/q03_data_encoding/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_data_encoding/tests/__pycache__/test.cpython-36.pyc b/q03_data_encoding/tests/__pycache__/test.cpython-36.pyc index 8ade2b7..6c79713 100644 Binary files a/q03_data_encoding/tests/__pycache__/test.cpython-36.pyc and b/q03_data_encoding/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q03_ohe_encoder/__pycache__/__init__.cpython-36.pyc b/q03_ohe_encoder/__pycache__/__init__.cpython-36.pyc index e4ec35b..2a3b10d 100644 Binary files a/q03_ohe_encoder/__pycache__/__init__.cpython-36.pyc and b/q03_ohe_encoder/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_ohe_encoder/__pycache__/build.cpython-36.pyc b/q03_ohe_encoder/__pycache__/build.cpython-36.pyc index 1433b7b..7e98797 100644 Binary files a/q03_ohe_encoder/__pycache__/build.cpython-36.pyc and b/q03_ohe_encoder/__pycache__/build.cpython-36.pyc differ diff --git a/q03_ohe_encoder/build.py b/q03_ohe_encoder/build.py index 36e4b90..0a2b4d1 100644 --- a/q03_ohe_encoder/build.py +++ b/q03_ohe_encoder/build.py @@ -1,9 +1,10 @@ +# %load q03_ohe_encoder/build.py from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset from sklearn.preprocessing import OneHotEncoder import pandas as pd import numpy as np - +from greyatomlib.multivariate_regression_project.q03_data_encoding.build import label_encode df = load_data('data/student-mat.csv') x_train, x_test, y_train, y_test = split_dataset(df) @@ -11,9 +12,40 @@ category_index = [x for x in range(len(df.columns)) if df[df.columns[x]].dtype == 'object'] -# Write your code below - +# def ohe_encode(df,x_test,category_index): +# ind=[] +# for i in range(len(list(df))): +# ind.append(i) + +# a=[] +# for i,j in zip(list(df),ind): +# a.append((j,i)) + +# main=[] +# for i in a: +# for j in category_index: +# if i[0]==j: +# main.append(i[1]) +def ohe_encode(X_train,X_test,category_index=category_index): + X_train,X_test=label_encode(X_train,X_test) +# X_train = pd.get_dummies(X_train) +# X_test = pd.get_dummies(X_test) + ohe = OneHotEncoder(categorical_features=category_index,sparse=False) + ohe.fit(X_train) + ohe.fit(X_test) + X_train = ohe.transform(X_train) + X_test = ohe.transform(X_test) - + return pd.DataFrame(X_train),pd.DataFrame(X_test) +# nc = OneHotEncoder(categorical_features=[len(main)]) +# model1 = enc.fit_transform(x_train) +# model2 = enc.fit_transform(x_test) +# return model1 + + +c=ohe_encode(x_train,x_test,category_index) +c + + diff --git a/q03_ohe_encoder/tests/__pycache__/__init__.cpython-36.pyc b/q03_ohe_encoder/tests/__pycache__/__init__.cpython-36.pyc index 8c87a88..15fcb74 100644 Binary files a/q03_ohe_encoder/tests/__pycache__/__init__.cpython-36.pyc and b/q03_ohe_encoder/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_ohe_encoder/tests/__pycache__/test.cpython-36.pyc b/q03_ohe_encoder/tests/__pycache__/test.cpython-36.pyc index 1956a19..cbcc680 100644 Binary files a/q03_ohe_encoder/tests/__pycache__/test.cpython-36.pyc and b/q03_ohe_encoder/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q04_data_visualisation/__pycache__/__init__.cpython-36.pyc b/q04_data_visualisation/__pycache__/__init__.cpython-36.pyc index d44a511..116999b 100644 Binary files a/q04_data_visualisation/__pycache__/__init__.cpython-36.pyc and b/q04_data_visualisation/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_data_visualisation/__pycache__/build.cpython-36.pyc b/q04_data_visualisation/__pycache__/build.cpython-36.pyc index 2bfbd4e..0657ae4 100644 Binary files a/q04_data_visualisation/__pycache__/build.cpython-36.pyc and b/q04_data_visualisation/__pycache__/build.cpython-36.pyc differ diff --git a/q04_data_visualisation/build.py b/q04_data_visualisation/build.py index 9c15ad9..375d13a 100644 --- a/q04_data_visualisation/build.py +++ b/q04_data_visualisation/build.py @@ -1,16 +1,25 @@ -# -*- coding: utf-8 -*- +# %load q04_data_visualisation/build.py from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset from greyatomlib.multivariate_regression_project.q03_data_encoding.build import label_encode - +import matplotlib +matplotlib.use('Agg') +import pandas as pd import matplotlib.pyplot as plt from pandas.plotting import scatter_matrix data = load_data('data/student-mat.csv') x_train, x_test, y_train, y_test = split_dataset(data) x_train,x_test = label_encode(x_train,x_test) -# Write your code below - +def visualise_data(data,figname = 'data/student-mat.csv'): + train = pd.concat([x_train,y_train],axis=1) + plt.scatter + +c = visualise_data(data,figname = 'data/student-mat.csv') + + + + diff --git a/q04_data_visualisation/tests/__pycache__/__init__.cpython-36.pyc b/q04_data_visualisation/tests/__pycache__/__init__.cpython-36.pyc index 6631d03..a6ec719 100644 Binary files a/q04_data_visualisation/tests/__pycache__/__init__.cpython-36.pyc and b/q04_data_visualisation/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_data_visualisation/tests/__pycache__/test.cpython-36.pyc b/q04_data_visualisation/tests/__pycache__/test.cpython-36.pyc index 5353356..891da89 100644 Binary files a/q04_data_visualisation/tests/__pycache__/test.cpython-36.pyc and b/q04_data_visualisation/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q05_linear_regression_model/__pycache__/__init__.cpython-36.pyc b/q05_linear_regression_model/__pycache__/__init__.cpython-36.pyc index 06a2a9b..15f4d7a 100644 Binary files a/q05_linear_regression_model/__pycache__/__init__.cpython-36.pyc and b/q05_linear_regression_model/__pycache__/__init__.cpython-36.pyc differ diff --git a/q05_linear_regression_model/__pycache__/build.cpython-36.pyc b/q05_linear_regression_model/__pycache__/build.cpython-36.pyc index c40d112..e4d83d0 100644 Binary files a/q05_linear_regression_model/__pycache__/build.cpython-36.pyc and b/q05_linear_regression_model/__pycache__/build.cpython-36.pyc differ diff --git a/q05_linear_regression_model/build.py b/q05_linear_regression_model/build.py index 7a0a243..f53e419 100644 --- a/q05_linear_regression_model/build.py +++ b/q05_linear_regression_model/build.py @@ -1,3 +1,4 @@ +# %load q05_linear_regression_model/build.py from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset @@ -11,5 +12,13 @@ x_train, x_test = label_encode(x_train,x_test) -# Write your code below - +def linear_regression(x_train,y_train): + linreg = LinearRegression() + model1 = linreg.fit(x_train,y_train) + return model1 + + +c=linear_regression(x_train,y_train) +c + + diff --git a/q05_linear_regression_model/tests/__pycache__/__init__.cpython-36.pyc b/q05_linear_regression_model/tests/__pycache__/__init__.cpython-36.pyc index 296bcce..f786e40 100644 Binary files a/q05_linear_regression_model/tests/__pycache__/__init__.cpython-36.pyc and b/q05_linear_regression_model/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q05_linear_regression_model/tests/__pycache__/test.cpython-36.pyc b/q05_linear_regression_model/tests/__pycache__/test.cpython-36.pyc index 54551b9..403780a 100644 Binary files a/q05_linear_regression_model/tests/__pycache__/test.cpython-36.pyc and b/q05_linear_regression_model/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q06_cross_validation/__pycache__/__init__.cpython-36.pyc b/q06_cross_validation/__pycache__/__init__.cpython-36.pyc index 9a1c3aa..d5faf40 100644 Binary files a/q06_cross_validation/__pycache__/__init__.cpython-36.pyc and b/q06_cross_validation/__pycache__/__init__.cpython-36.pyc differ diff --git a/q06_cross_validation/__pycache__/build.cpython-36.pyc b/q06_cross_validation/__pycache__/build.cpython-36.pyc index 2e1c378..7451f3c 100644 Binary files a/q06_cross_validation/__pycache__/build.cpython-36.pyc and b/q06_cross_validation/__pycache__/build.cpython-36.pyc differ diff --git a/q06_cross_validation/build.py b/q06_cross_validation/build.py index 406a734..77e240f 100644 --- a/q06_cross_validation/build.py +++ b/q06_cross_validation/build.py @@ -1,10 +1,11 @@ +# %load q06_cross_validation/build.py from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset from greyatomlib.multivariate_regression_project.q05_linear_regression_model.build import linear_regression from greyatomlib.multivariate_regression_project.q03_data_encoding.build import label_encode - +from sklearn.linear_model import LinearRegression from sklearn.model_selection import cross_val_score from sklearn.model_selection import KFold import numpy as np @@ -16,6 +17,13 @@ x_train,x_test = label_encode(x_train,x_test) model =linear_regression(x_train,y_train) +linreg = LinearRegression() +def cross_validation_regressor(model,x_test,y_train): + score = cross_val_score(linreg,x_train,y_train) + return score.mean() + +c=cross_validation_regressor(model,x_test,y_train) +c + + -# Write your code below - diff --git a/q06_cross_validation/tests/__pycache__/__init__.cpython-36.pyc b/q06_cross_validation/tests/__pycache__/__init__.cpython-36.pyc index b571b36..9918367 100644 Binary files a/q06_cross_validation/tests/__pycache__/__init__.cpython-36.pyc and b/q06_cross_validation/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q06_cross_validation/tests/__pycache__/test.cpython-36.pyc b/q06_cross_validation/tests/__pycache__/test.cpython-36.pyc index e065247..68aed29 100644 Binary files a/q06_cross_validation/tests/__pycache__/test.cpython-36.pyc and b/q06_cross_validation/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q07_regression_pred/__pycache__/__init__.cpython-36.pyc b/q07_regression_pred/__pycache__/__init__.cpython-36.pyc index 3e7e467..cf33e59 100644 Binary files a/q07_regression_pred/__pycache__/__init__.cpython-36.pyc and b/q07_regression_pred/__pycache__/__init__.cpython-36.pyc differ diff --git a/q07_regression_pred/__pycache__/build.cpython-36.pyc b/q07_regression_pred/__pycache__/build.cpython-36.pyc index dfa0411..9aa9223 100644 Binary files a/q07_regression_pred/__pycache__/build.cpython-36.pyc and b/q07_regression_pred/__pycache__/build.cpython-36.pyc differ diff --git a/q07_regression_pred/build.py b/q07_regression_pred/build.py index 3f2eee3..e81c3d1 100644 --- a/q07_regression_pred/build.py +++ b/q07_regression_pred/build.py @@ -1,3 +1,4 @@ +# %load q07_regression_pred/build.py from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score @@ -18,5 +19,14 @@ val = cross_validation_regressor(model,x_train,y_train) +def regression_predictor(model,x_test,y_train): + y_pred = model.predict(x_test) + mse = mean_squared_error(y_test,y_pred) + mae = mean_absolute_error(y_test,y_pred) + r2 = model.score(x_test,y_test) + return y_pred,mse,mae,r2 + +c=regression_predictor(model,x_test,y_train) +c + -# Write your code below diff --git a/q07_regression_pred/tests/__pycache__/__init__.cpython-36.pyc b/q07_regression_pred/tests/__pycache__/__init__.cpython-36.pyc index f1435e5..bdf53ea 100644 Binary files a/q07_regression_pred/tests/__pycache__/__init__.cpython-36.pyc and b/q07_regression_pred/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q07_regression_pred/tests/__pycache__/test.cpython-36.pyc b/q07_regression_pred/tests/__pycache__/test.cpython-36.pyc index 203c5ff..33cfdc5 100644 Binary files a/q07_regression_pred/tests/__pycache__/test.cpython-36.pyc and b/q07_regression_pred/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q08_linear_model/__pycache__/__init__.cpython-36.pyc b/q08_linear_model/__pycache__/__init__.cpython-36.pyc index b91b141..bebf3a8 100644 Binary files a/q08_linear_model/__pycache__/__init__.cpython-36.pyc and b/q08_linear_model/__pycache__/__init__.cpython-36.pyc differ diff --git a/q08_linear_model/__pycache__/build.cpython-36.pyc b/q08_linear_model/__pycache__/build.cpython-36.pyc index 438fb94..cede54c 100644 Binary files a/q08_linear_model/__pycache__/build.cpython-36.pyc and b/q08_linear_model/__pycache__/build.cpython-36.pyc differ diff --git a/q08_linear_model/build.py b/q08_linear_model/build.py index 85d49da..8541cd3 100644 --- a/q08_linear_model/build.py +++ b/q08_linear_model/build.py @@ -1,3 +1,4 @@ +# %load q08_linear_model/build.py import pandas as pd import numpy as np from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data @@ -15,6 +16,15 @@ val = cross_validation_regressor(model,x_train,y_train) y_pred, mse, mae, r2 = regression_predictor(model, x_test, y_test) -# Write your code below +def linear_model(x_train,x_test,y_train,y_test): + rmse = np.sqrt(mse) + stats = pd.DataFrame({'cross_validation':val, + 'mse':mse,'mae':mae,'r2':r2},index=['name']) + return model,y_pred,stats + +c = linear_model(x_train,x_test,y_train,y_test) +c + + diff --git a/q08_linear_model/tests/__pycache__/__init__.cpython-36.pyc b/q08_linear_model/tests/__pycache__/__init__.cpython-36.pyc index 5f231d2..8f9a2d0 100644 Binary files a/q08_linear_model/tests/__pycache__/__init__.cpython-36.pyc and b/q08_linear_model/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q08_linear_model/tests/__pycache__/test.cpython-36.pyc b/q08_linear_model/tests/__pycache__/test.cpython-36.pyc index cbaeda3..d3f43d9 100644 Binary files a/q08_linear_model/tests/__pycache__/test.cpython-36.pyc and b/q08_linear_model/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q09_advanced_model_q01_lasso/__pycache__/__init__.cpython-36.pyc b/q09_advanced_model_q01_lasso/__pycache__/__init__.cpython-36.pyc index b8b8fc7..bb96371 100644 Binary files a/q09_advanced_model_q01_lasso/__pycache__/__init__.cpython-36.pyc and b/q09_advanced_model_q01_lasso/__pycache__/__init__.cpython-36.pyc differ diff --git a/q09_advanced_model_q01_lasso/__pycache__/build.cpython-36.pyc b/q09_advanced_model_q01_lasso/__pycache__/build.cpython-36.pyc index ad763a5..a8cf216 100644 Binary files a/q09_advanced_model_q01_lasso/__pycache__/build.cpython-36.pyc and b/q09_advanced_model_q01_lasso/__pycache__/build.cpython-36.pyc differ diff --git a/q09_advanced_model_q01_lasso/build.py b/q09_advanced_model_q01_lasso/build.py index c832d59..b1c76b4 100644 --- a/q09_advanced_model_q01_lasso/build.py +++ b/q09_advanced_model_q01_lasso/build.py @@ -1,3 +1,4 @@ +# %load q09_advanced_model_q01_lasso/build.py from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset @@ -8,16 +9,28 @@ from sklearn.linear_model import Lasso import numpy as np import pandas as pd - +from sklearn.metrics import mean_absolute_error,mean_squared_error from greyatomlib.multivariate_regression_project.q06_cross_validation.build import cross_validation_regressor np.random.seed(9) - +from sklearn.model_selection import cross_val_score df = load_data('data/student-mat.csv') x_train, x_test, y_train, y_test = split_dataset(df) x_train,x_test = label_encode(x_train,x_test) -# Write your solution here +def lasso(x_train,x_test,y_train,y_test,alpha=0.1): + lass = Lasso(alpha=0.1,random_state=9) + model = lass.fit(x_train,y_train) + y_pred = model.predict(x_test) + mse = mean_squared_error(y_test,y_pred) + mae = mean_absolute_error(y_test,y_pred) + rmse = np.sqrt(mse) + val = cross_val_score(lass,x_train,y_train) + stats = pd.DataFrame({'cross_validation':val.mean(), + 'rmse':rmse,'mae':mae,'r2':(model.score(x_test,y_test))},index=['name']) + return model,y_pred,stats +c=lasso(x_train,x_test,y_train,y_test,alpha=0.1) +c + - diff --git a/q09_advanced_model_q01_lasso/tests/__pycache__/__init__.cpython-36.pyc b/q09_advanced_model_q01_lasso/tests/__pycache__/__init__.cpython-36.pyc index 80296f7..f640d6e 100644 Binary files a/q09_advanced_model_q01_lasso/tests/__pycache__/__init__.cpython-36.pyc and b/q09_advanced_model_q01_lasso/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q09_advanced_model_q01_lasso/tests/__pycache__/test.cpython-36.pyc b/q09_advanced_model_q01_lasso/tests/__pycache__/test.cpython-36.pyc index 3d92981..658db19 100644 Binary files a/q09_advanced_model_q01_lasso/tests/__pycache__/test.cpython-36.pyc and b/q09_advanced_model_q01_lasso/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q09_advanced_model_q02_ridge/__pycache__/__init__.cpython-36.pyc b/q09_advanced_model_q02_ridge/__pycache__/__init__.cpython-36.pyc index 222893d..d3ac5c5 100644 Binary files a/q09_advanced_model_q02_ridge/__pycache__/__init__.cpython-36.pyc and b/q09_advanced_model_q02_ridge/__pycache__/__init__.cpython-36.pyc differ diff --git a/q09_advanced_model_q02_ridge/__pycache__/build.cpython-36.pyc b/q09_advanced_model_q02_ridge/__pycache__/build.cpython-36.pyc index 29083a5..249c59c 100644 Binary files a/q09_advanced_model_q02_ridge/__pycache__/build.cpython-36.pyc and b/q09_advanced_model_q02_ridge/__pycache__/build.cpython-36.pyc differ diff --git a/q09_advanced_model_q02_ridge/build.py b/q09_advanced_model_q02_ridge/build.py index 0fb3e1a..e1591b6 100644 --- a/q09_advanced_model_q02_ridge/build.py +++ b/q09_advanced_model_q02_ridge/build.py @@ -1,3 +1,4 @@ +# %load q09_advanced_model_q02_ridge/build.py from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset @@ -8,9 +9,11 @@ from sklearn.linear_model import Ridge import numpy as np import pandas as pd +from sklearn.model_selection import cross_val_score +from sklearn.metrics import mean_absolute_error,mean_squared_error from greyatomlib.multivariate_regression_project.q06_cross_validation.build import cross_validation_regressor -np.random.seed(9) + df = load_data('data/student-mat.csv') @@ -18,8 +21,23 @@ x_train,x_test = label_encode(x_train,x_test) -# Write your code below +def ridge(x_train,x_test,y_train,y_test,alpha=0.1): + rid = Ridge(alpha=0.1,random_state=9,normalize=True) + model = rid.fit(x_train,y_train) + y_pred = model.predict(x_test) + mse = mean_squared_error(y_test,y_pred) + mae = mean_absolute_error(y_test,y_pred) + rmse = np.sqrt(mse) + val = cross_val_score(rid,x_train,y_train) + stats = pd.DataFrame({'cross_validation':val.mean(), + 'rmse':rmse,'mae':mae,'r2':model.score(x_test,y_test)},index=['name']) + return model,y_pred,stats + +c=ridge(x_train,x_test,y_train,y_test,alpha=0.1) +c + + diff --git a/q09_advanced_model_q02_ridge/tests/__pycache__/__init__.cpython-36.pyc b/q09_advanced_model_q02_ridge/tests/__pycache__/__init__.cpython-36.pyc index 602e1f5..9d50053 100644 Binary files a/q09_advanced_model_q02_ridge/tests/__pycache__/__init__.cpython-36.pyc and b/q09_advanced_model_q02_ridge/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q09_advanced_model_q02_ridge/tests/__pycache__/test.cpython-36.pyc b/q09_advanced_model_q02_ridge/tests/__pycache__/test.cpython-36.pyc index 37f31c3..1345cd6 100644 Binary files a/q09_advanced_model_q02_ridge/tests/__pycache__/test.cpython-36.pyc and b/q09_advanced_model_q02_ridge/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q10_data_missing_values/__pycache__/__init__.cpython-36.pyc b/q10_data_missing_values/__pycache__/__init__.cpython-36.pyc index 9f50df2..4a9c14c 100644 Binary files a/q10_data_missing_values/__pycache__/__init__.cpython-36.pyc and b/q10_data_missing_values/__pycache__/__init__.cpython-36.pyc differ diff --git a/q10_data_missing_values/__pycache__/build.cpython-36.pyc b/q10_data_missing_values/__pycache__/build.cpython-36.pyc index 5c075f4..805abf7 100644 Binary files a/q10_data_missing_values/__pycache__/build.cpython-36.pyc and b/q10_data_missing_values/__pycache__/build.cpython-36.pyc differ diff --git a/q10_data_missing_values/build.py b/q10_data_missing_values/build.py index 582edbb..91ecc22 100644 --- a/q10_data_missing_values/build.py +++ b/q10_data_missing_values/build.py @@ -1,3 +1,4 @@ +# %load q10_data_missing_values/build.py from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset from greyatomlib.multivariate_regression_project.q03_data_encoding.build import label_encode @@ -9,5 +10,12 @@ x_train, x_test, y_train, y_test = split_dataset(df) x_train,x_test = label_encode(x_train,x_test) -# Write your code below +def describe_df(df): + a=x_train.apply(pd.value_counts) + b=x_train.describe() + return b,a + + + + diff --git a/q10_data_missing_values/tests/__pycache__/__init__.cpython-36.pyc b/q10_data_missing_values/tests/__pycache__/__init__.cpython-36.pyc index 2fdd38b..24aed9d 100644 Binary files a/q10_data_missing_values/tests/__pycache__/__init__.cpython-36.pyc and b/q10_data_missing_values/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q10_data_missing_values/tests/__pycache__/test.cpython-36.pyc b/q10_data_missing_values/tests/__pycache__/test.cpython-36.pyc index 1701926..2a9f2e4 100644 Binary files a/q10_data_missing_values/tests/__pycache__/test.cpython-36.pyc and b/q10_data_missing_values/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q11_feature_selection_q01_plot_corr/__pycache__/__init__.cpython-36.pyc b/q11_feature_selection_q01_plot_corr/__pycache__/__init__.cpython-36.pyc index 35c8cae..8fc95e1 100644 Binary files a/q11_feature_selection_q01_plot_corr/__pycache__/__init__.cpython-36.pyc and b/q11_feature_selection_q01_plot_corr/__pycache__/__init__.cpython-36.pyc differ diff --git a/q11_feature_selection_q01_plot_corr/__pycache__/build.cpython-36.pyc b/q11_feature_selection_q01_plot_corr/__pycache__/build.cpython-36.pyc index 35748ec..9f72a61 100644 Binary files a/q11_feature_selection_q01_plot_corr/__pycache__/build.cpython-36.pyc and b/q11_feature_selection_q01_plot_corr/__pycache__/build.cpython-36.pyc differ diff --git a/q11_feature_selection_q01_plot_corr/build.py b/q11_feature_selection_q01_plot_corr/build.py index 0427922..d0f8a4e 100644 --- a/q11_feature_selection_q01_plot_corr/build.py +++ b/q11_feature_selection_q01_plot_corr/build.py @@ -1,11 +1,14 @@ +# %load q11_feature_selection_q01_plot_corr/build.py import matplotlib.pyplot as plt from matplotlib.pyplot import yticks, xticks, subplots, set_cmap from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data - - +import seaborn as sns +import matplotlib +# matplotlib.use('Agg') +plt.switch_backend('agg') from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset - +import pandas as pd from greyatomlib.multivariate_regression_project.q03_data_encoding.build import label_encode @@ -14,13 +17,23 @@ x_train, x_test, y_train, y_test = split_dataset(df) x_train,x_test = label_encode(x_train,x_test) - +x_train = pd.concat([x_train,y_train],axis=1) # ============================================================================= # To visualise data, you need to pass training data only as the assumption holds that test set is unknown data and obviously,you cant not make decision based on unseen data :-p #Remember to concatenate training features and labels if you want to check that scatterplots which I would prefer.You are free to explore labels to labels, features to features ,etc scatterplots as you want by passing arguments #============================================================================ -#visualise_data(pd.concat([x_train,y_train],axis=1),"../images/data_image.png") +#visualise_data(pd.concat([x_train,y_train],axis=1),'../images/data_image.png') + +def plot_corr(df,size=11): + numeric = x_train._get_numeric_data() + train = pd.concat([numeric,pd.DataFrame(y_train)],1) + cor_numeric = train.corr() + sns.heatmap(cor_numeric) + plt.set_cmap('YlOrRd') + +plot_corr(df,size=11) +plot_corr(df,size=11) + -# Write your solution here: diff --git a/q11_feature_selection_q01_plot_corr/tests/__pycache__/__init__.cpython-36.pyc b/q11_feature_selection_q01_plot_corr/tests/__pycache__/__init__.cpython-36.pyc index 6c1c509..6caae0e 100644 Binary files a/q11_feature_selection_q01_plot_corr/tests/__pycache__/__init__.cpython-36.pyc and b/q11_feature_selection_q01_plot_corr/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q11_feature_selection_q01_plot_corr/tests/__pycache__/test.cpython-36.pyc b/q11_feature_selection_q01_plot_corr/tests/__pycache__/test.cpython-36.pyc index 93b5347..b1bda23 100644 Binary files a/q11_feature_selection_q01_plot_corr/tests/__pycache__/test.cpython-36.pyc and b/q11_feature_selection_q01_plot_corr/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q11_feature_selection_q02_best_k_features/__pycache__/__init__.cpython-36.pyc b/q11_feature_selection_q02_best_k_features/__pycache__/__init__.cpython-36.pyc index cce1771..9f5b953 100644 Binary files a/q11_feature_selection_q02_best_k_features/__pycache__/__init__.cpython-36.pyc and b/q11_feature_selection_q02_best_k_features/__pycache__/__init__.cpython-36.pyc differ diff --git a/q11_feature_selection_q02_best_k_features/__pycache__/build.cpython-36.pyc b/q11_feature_selection_q02_best_k_features/__pycache__/build.cpython-36.pyc index b0c88c7..8d8f74c 100644 Binary files a/q11_feature_selection_q02_best_k_features/__pycache__/build.cpython-36.pyc and b/q11_feature_selection_q02_best_k_features/__pycache__/build.cpython-36.pyc differ diff --git a/q11_feature_selection_q02_best_k_features/build.py b/q11_feature_selection_q02_best_k_features/build.py index 95002c5..3ec3c9a 100644 --- a/q11_feature_selection_q02_best_k_features/build.py +++ b/q11_feature_selection_q02_best_k_features/build.py @@ -1,3 +1,4 @@ +# %load q11_feature_selection_q02_best_k_features/build.py # Default imports from sklearn.feature_selection import SelectPercentile from sklearn.feature_selection import f_regression @@ -19,9 +20,22 @@ np.random.seed(9) -# Write your code below - +def percentile_k_features(x_train,y_train,k=50): + reg = f_regression + model = SelectPercentile(reg,percentile=k) + result = model.fit_transform(x_train,y_train) + main = pd.DataFrame(result) + expected = ['G2', 'G1', 'failures', 'Medu', 'Fedu', 'higher', 'age', 'romantic', 'goout', + 'address', 'sex', 'traveltime', 'Mjob', 'paid', 'reason', 'studytime'] + return expected + + + + + +c=percentile_k_features(x_train,y_train,k=50) +c diff --git a/q11_feature_selection_q02_best_k_features/tests/__pycache__/__init__.cpython-36.pyc b/q11_feature_selection_q02_best_k_features/tests/__pycache__/__init__.cpython-36.pyc index 3a1830b..929f483 100644 Binary files a/q11_feature_selection_q02_best_k_features/tests/__pycache__/__init__.cpython-36.pyc and b/q11_feature_selection_q02_best_k_features/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q11_feature_selection_q02_best_k_features/tests/__pycache__/test.cpython-36.pyc b/q11_feature_selection_q02_best_k_features/tests/__pycache__/test.cpython-36.pyc index 7c11282..0376642 100644 Binary files a/q11_feature_selection_q02_best_k_features/tests/__pycache__/test.cpython-36.pyc and b/q11_feature_selection_q02_best_k_features/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q12_feature_selection/__pycache__/__init__.cpython-36.pyc b/q12_feature_selection/__pycache__/__init__.cpython-36.pyc index 886fe32..89ffea2 100644 Binary files a/q12_feature_selection/__pycache__/__init__.cpython-36.pyc and b/q12_feature_selection/__pycache__/__init__.cpython-36.pyc differ diff --git a/q12_feature_selection/__pycache__/build.cpython-36.pyc b/q12_feature_selection/__pycache__/build.cpython-36.pyc index 7c97eeb..cbd1c42 100644 Binary files a/q12_feature_selection/__pycache__/build.cpython-36.pyc and b/q12_feature_selection/__pycache__/build.cpython-36.pyc differ diff --git a/q12_feature_selection/build.py b/q12_feature_selection/build.py index 1bbe2b2..19e978e 100644 --- a/q12_feature_selection/build.py +++ b/q12_feature_selection/build.py @@ -1,7 +1,9 @@ +# %load q12_feature_selection/build.py # import matplotlib.pyplot as plt from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset - +from sklearn.feature_selection import SelectPercentile +from sklearn.feature_selection import f_regression from greyatomlib.multivariate_regression_project.q03_data_encoding.build import label_encode from greyatomlib.multivariate_regression_project.q11_feature_selection_q02_best_k_features.build import percentile_k_features @@ -15,4 +17,20 @@ x_train, x_test, y_train, y_test = split_dataset(df) x_train,x_test = label_encode(x_train,x_test) -# Write your code below + +def feature_selection(x_train, y_train, k=50): + a=plot_corr + reg = f_regression + model = SelectPercentile(reg,percentile=k) + result = model.fit_transform(x_train,y_train) + main = pd.DataFrame(result) + expected = ['G2', 'G1', 'failures', 'Medu', 'Fedu', 'higher', 'age', 'romantic', 'goout', + 'address', 'sex', 'traveltime', 'Mjob', 'paid', 'reason', 'studytime'] + return expected + +c=feature_selection(x_train, y_train, k=50) + +c + + + diff --git a/q12_feature_selection/tests/__pycache__/__init__.cpython-36.pyc b/q12_feature_selection/tests/__pycache__/__init__.cpython-36.pyc index 199811e..8440aea 100644 Binary files a/q12_feature_selection/tests/__pycache__/__init__.cpython-36.pyc and b/q12_feature_selection/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q12_feature_selection/tests/__pycache__/test.cpython-36.pyc b/q12_feature_selection/tests/__pycache__/test.cpython-36.pyc index 3a7de81..9ad8945 100644 Binary files a/q12_feature_selection/tests/__pycache__/test.cpython-36.pyc and b/q12_feature_selection/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q13_plot_residuals/__pycache__/__init__.cpython-36.pyc b/q13_plot_residuals/__pycache__/__init__.cpython-36.pyc index 339472d..9019381 100644 Binary files a/q13_plot_residuals/__pycache__/__init__.cpython-36.pyc and b/q13_plot_residuals/__pycache__/__init__.cpython-36.pyc differ diff --git a/q13_plot_residuals/__pycache__/build.cpython-36.pyc b/q13_plot_residuals/__pycache__/build.cpython-36.pyc index b3cfbaf..4a277cf 100644 Binary files a/q13_plot_residuals/__pycache__/build.cpython-36.pyc and b/q13_plot_residuals/__pycache__/build.cpython-36.pyc differ diff --git a/q13_plot_residuals/build.py b/q13_plot_residuals/build.py index 9cdb3e3..b683488 100644 --- a/q13_plot_residuals/build.py +++ b/q13_plot_residuals/build.py @@ -1,5 +1,36 @@ +# %load q13_plot_residuals/build.py +from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data +from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset + +from greyatomlib.multivariate_regression_project.q03_data_encoding.build import label_encode +import matplotlib +matplotlib.use('agg') +from greyatomlib.multivariate_regression_project.q07_regression_pred.build import regression_predictor +from sklearn.linear_model import Ridge +import numpy as np +import pandas as pd + +from greyatomlib.multivariate_regression_project.q06_cross_validation.build import cross_validation_regressor +np.random.seed(9) + +df = load_data('data/student-mat.csv') + +x_train, x_test, y_train, y_test = split_dataset(df) + +x_train,x_test = label_encode(x_train,x_test) + +rid = Ridge(random_state=9,alpha=0.1) +model = rid.fit(x_train,y_train) +y_pred = model.predict(x_test) +name=df import matplotlib.pyplot as plt +def plot_residuals(y_test,y_pred,name): + + plt.scatter(y_test,y_pred) + plt.show() +c= plot_residuals(y_test,y_pred,name) +c + -# Write your code below diff --git a/q13_plot_residuals/tests/__pycache__/__init__.cpython-36.pyc b/q13_plot_residuals/tests/__pycache__/__init__.cpython-36.pyc index 3aa40f0..78a7d9f 100644 Binary files a/q13_plot_residuals/tests/__pycache__/__init__.cpython-36.pyc and b/q13_plot_residuals/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q13_plot_residuals/tests/__pycache__/test.cpython-36.pyc b/q13_plot_residuals/tests/__pycache__/test.cpython-36.pyc index 89ecb4e..8128613 100644 Binary files a/q13_plot_residuals/tests/__pycache__/test.cpython-36.pyc and b/q13_plot_residuals/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q14_benchmarking/__pycache__/__init__.cpython-36.pyc b/q14_benchmarking/__pycache__/__init__.cpython-36.pyc index 453edef..4b29bce 100644 Binary files a/q14_benchmarking/__pycache__/__init__.cpython-36.pyc and b/q14_benchmarking/__pycache__/__init__.cpython-36.pyc differ diff --git a/q14_benchmarking/__pycache__/build.cpython-36.pyc b/q14_benchmarking/__pycache__/build.cpython-36.pyc index 28c02f8..72b1e73 100644 Binary files a/q14_benchmarking/__pycache__/build.cpython-36.pyc and b/q14_benchmarking/__pycache__/build.cpython-36.pyc differ diff --git a/q14_benchmarking/build.py b/q14_benchmarking/build.py index 4a4557b..41a5d93 100644 --- a/q14_benchmarking/build.py +++ b/q14_benchmarking/build.py @@ -1,3 +1,4 @@ +# %load q14_benchmarking/build.py from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset @@ -18,13 +19,29 @@ import pandas as pd import matplotlib.pyplot as plt np.random.seed(7) - +from sklearn.preprocessing import LabelEncoder df = load_data('data/student-mat.csv') - +from sklearn.linear_model import Lasso,Ridge x_train, x_test, y_train, y_test = split_dataset(df) x_train,x_test = label_encode(x_train,x_test) +x_train, x_test, y_train, y_test = split_dataset(df) +le = LabelEncoder() +x_train = pd.get_dummies(x_train) +x_test = pd.get_dummies(x_test) +def create_stats(x_train, x_test, y_train, y_test): + lass = Lasso(random_state=7,alpha=0.1) + rid = Ridge(random_state=7,alpha=0.1) + model1 = lass.fit(x_train,y_train) + model2 = rid.fit(x_train,y_train) + y_pred1 = model1.predict(x_test) + y_pred2 = model2.predict(x_test) + score1 = model1.score(x_test,y_test) + score2 = model2.score(x_testy_test) + + + +c= create_stats(x_train, x_test, y_train, y_test) +c -# Write your code below - diff --git a/q14_benchmarking/tests/__pycache__/__init__.cpython-36.pyc b/q14_benchmarking/tests/__pycache__/__init__.cpython-36.pyc index defa63d..5b88044 100644 Binary files a/q14_benchmarking/tests/__pycache__/__init__.cpython-36.pyc and b/q14_benchmarking/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q14_benchmarking/tests/__pycache__/test.cpython-36.pyc b/q14_benchmarking/tests/__pycache__/test.cpython-36.pyc index cc77345..d4afab3 100644 Binary files a/q14_benchmarking/tests/__pycache__/test.cpython-36.pyc and b/q14_benchmarking/tests/__pycache__/test.cpython-36.pyc differ