-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmodel.py
76 lines (62 loc) · 2.1 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pickle
#reading the file
data=pd.read_csv('C:/Users/lenovo/Desktop/ProjectS/ML & DL/ML projects/Placement-Predictor-Flask/Placement_Data_Full_Class.csv - Sheet1.csv')
data
#droping the serial no and salary col
data=data.drop('sl_no',axis=1)
data=data.drop('salary',axis=1)
data.head()
# for i in range(0,215):
# if(data['gender'][i]=='M'):
# data['gender'][i]=1
# else:
# data['gender'][i]=0
#catgorising col for further labelling
data["gender"]=data["gender"].astype('category')
data["ssc_b"]=data["ssc_b"].astype('category')
data["hsc_b"]=data["hsc_b"].astype('category')
data["degree_t"]=data["degree_t"].astype('category')
data["workex"]=data["workex"].astype('category')
data["specialisation"]=data["specialisation"].astype('category')
data["status"]=data["status"].astype('category')
data["hsc_s"]=data["hsc_s"].astype('category')
data.dtypes
#labelling the col
data["gender"]=data["gender"].cat.codes
data["ssc_b"]=data["ssc_b"].cat.codes
data["hsc_b"]=data["hsc_b"].cat.codes
data["degree_t"]=data["degree_t"].cat.codes
data["workex"]=data["workex"].cat.codes
data["specialisation"]=data["specialisation"].cat.codes
data["status"]=data["status"].cat.codes
data["hsc_s"]=data["hsc_s"].cat.codes
data
#selecting the features and labels
X=data.iloc[:,:-1].values
Y=data.iloc[:,-1].values
Y
#dividing the data into train and split
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.2)
X_train
data.head()
#creating a classifier using sklearn
from sklearn.linear_model import LogisticRegression
clf = LogisticRegression(random_state=0).fit(X_train,Y_train)
#printing the acc
clf.score(X_test,Y_test)
#predicting for random value
clf.predict([[1,77,0,95,0,2,80,2,0,0,1,0]])
#creating a Y_pred for test data
Y_pred=clf.predict(X_test)
Y_pred
#model generation
pickle.dump(clf, open('model.pkl','wb'))
#evalution of the classifier
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(Y_test, Y_pred)
print(cm)
accuracy_score(Y_test, Y_pred)