55import numpy as np
66from dsbox .overfitdetector .detector import Detector
77from sklearn .linear_model import LogisticRegression
8+ from sklearn import preprocessing
89import logging
910import math
1011
@@ -13,7 +14,10 @@ class Detectorests(unittest.TestCase):
1314 def setUp (self ):
1415 self .__detector = Detector ()
1516 self .__dir_path = os .getcwd ()
16- self .__detector .set_logger (logging .ERROR )
17+ logging .basicConfig (level = logging .DEBUG )
18+
19+ self .__train_data_file = self .__dir_path + "/tests/dsbox/overfit-detector/test_data/trainData.csv.gz"
20+ self .__train_labels_file = self .__dir_path + "/tests/dsbox/overfit-detector/test_data/trainTargets.csv.gz"
1721
1822 datas = {
1923 "indep1" : [],
@@ -48,15 +52,28 @@ def setUp(self):
4852 self .__test_df = pd .DataFrame (datas )
4953
5054 def test_detector (self ):
51- data = np .array ([1. , 2. , 3. , 4. ])
52- labels = np .array (1 )
53- for i in range (10 ):
54- data = np .vstack ([data , [1. , 2. , 3. , 4. ]])
55- labels = np .append (labels , 1 )
56- for i in range (10 ):
57- data = np .vstack ([data , [2. , 3. , 4. , 5. ]])
58- labels = np .append (labels , 0 )
5955
56+ #data = np.array([1., 2., 3., 4.])
57+ #labels = np.array(1)
58+ #for i in range(10):
59+ # data = np.vstack([data, [1., 2., 3., 4.]])
60+ # labels = np.append(labels, 1)
61+ #for i in range(10):
62+ # data = np.vstack([data, [2., 3., 4., 5.]])
63+ # labels = np.append(labels, 0)
64+
65+ data = pd .read_csv (self .__train_data_file , header = 0 ).fillna (0.0 ).replace ('' , '0' )
66+ del data ['d3mIndex' ]
67+ labels = pd .read_csv (self .__train_labels_file , header = 0 ).fillna (0.0 ).replace ('' , '0' )['Hall_of_Fame' ]
68+
69+ # Encode the categorical data in training data
70+ # Encode the categorical data in the test targets, uses the first target of the dataset as a target
71+ trainDataLabelEncoders = dict ()
72+ for col in ['Player' , 'Position' ]:
73+ trainDataLabelEncoders [col ] = preprocessing .LabelEncoder ().fit (data [col ])
74+ data [col ] = trainDataLabelEncoders [col ].transform (data [col ])
75+
76+ # Train the model
6077 mdl = LogisticRegression ().fit (data , labels )
6178 dd = Detector (n_sample_instances = 10 , n_sample_iterations = 20 , columns = ['0' , '1' , '2' , '3' ], model = mdl )
6279 dd .set_training_data (inputs = data , outputs = labels )
0 commit comments