-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathcs_stacking.py
executable file
·92 lines (84 loc) · 4.15 KB
/
cs_stacking.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
#!usr/bin/python3
"""
this module contains implementation of cost-sensitive stacking method described in:
Bahnsen, Alejandro Correa, Djamila Aouada, and Bjorn Ottersten. "Ensemble of example-dependent cost-sensitive decision trees." arXiv preprint arXiv:1505.04637 (2015).
"""
# Author: Dec4f
# License: GPLv3
__all__ = ['BaseStacking']
import numpy as np
from log_reg import LogisticRegression
from split_tools import KFoldCV
class BaseStacking():
"""Base stacking model
Stacking is an estimator that combines different base classifiers by learning a second level algorithm on top of them.
Each base classifier are built on training set S to output a prediction, and the output prediction is used as features for the second level learner to train on.
Parameters
----------
clfs : {array-like}
a list of base cost-sensitive classifiers
n_fold : int, default=5
number of folds to use in internal cross validation
Examples
--------
"""
def __init__(self, algo_param, n_fold=5):
"""
init a stacking model
----------
n_fold : fold number for cross validation
"""
self.logreg = LogisticRegression(*algo_param)
self.n_fold = n_fold
self.kcv = KFoldCV(self.n_fold, shuffle=True, seed=12345)
def fit_pred(self, train_set, test_set, clfs):
"""
fit the stacking model to data
----------
train_set : Array-like
the training set examples
test_set : Array-like
the testing set examples, the class label of this will not be used
clfs : List
a list of base classifiers
"""
x_train = train_set[:, 1:-1]
y_train = train_set[:, -1]
x_test = test_set[:, 1:-1]
stk_train_set = np.zeros((len(clfs), len(train_set))) # shape = (n_clf, n_trainset)
stk_test_set = np.zeros((len(clfs), len(test_set))) # shape = (n_clf, n_testset)
for i, clf in enumerate(clfs):
stk_train_set[i], stk_test_set[i] = self.stacking_cv(clf, x_train, y_train, x_test)
ul_train_set = stk_train_set.T # shape = (n_train, n_clf)
ul_test_set = stk_test_set.T # shape = (n_test, n_clf)
self.logreg.fit(ul_train_set[:, 1:-1], train_set[:, -1])
return self.get_pure_knn_pred(self.logreg, ul_test_set[:, 1:-1])
def stacking_cv(self, base_clf, x_train, y_train, x_test):
"""
out-of-folder prediction
----------
for each classifier, train them on trainset of trainset, and make prediction on testset of trainset AND the original testset.
the output of first prediction is vertically stacked together over folds to be get a new feature column in stacking trainset, and the output of second prediction is taken average over folds to be new feature column in stacking test set
"""
test_kf = np.zeros((self.n_fold, len(x_test))) # shape = (n_fold, n_test)
train_ul = np.zeros((len(x_train),)) # shape = (n_train, 1)
test_ul = np.zeros((len(x_test),)) # shape = (n_test, 1)
for i, (train_idx, test_idx) in enumerate(self.kcv.split(x_train)):
# do train-test-split within input training set by spliting it into lower level training and testing sets
x_train_ll = x_train[train_idx] # shape = (n_train_train, n_attr)
y_train_ll = y_train[train_idx] # shape = (n_train_train, 1)
x_test_ll = x_train[test_idx] # shape = (n_test_train, n_attr)
base_clf.fit(x_train_ll, y_train_ll)
train_ul[test_idx] = self.get_pure_knn_pred(base_clf, x_test_ll) #shape=(n_test_train, 1)
test_kf[i, :] = self.get_pure_knn_pred(base_clf, x_test) #shape=(n_fold, n_test)
test_ul = np.mean(test_kf, axis=0) # shape = (1, n_test)
# return upper level train and test data generated by one clf
return train_ul, test_ul
def get_pure_knn_pred(self, clf, X):
"""
a function that extracts pure prediction results from the KNN and disregard the second output
"""
pred = np.zeros(len(X), dtype=bool)
for i, row in enumerate(X):
pred[i], _ = clf.predict(row)
return pred