-
-
Notifications
You must be signed in to change notification settings - Fork 46
/
Copy pathtest_estimators.py
115 lines (86 loc) · 3.43 KB
/
test_estimators.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import pytest
import dask
from dask_glm.estimators import LogisticRegression, LinearRegression, PoissonRegression
from dask_glm.datasets import make_classification, make_regression, make_poisson
from dask_glm.regularizers import Regularizer
@pytest.fixture(params=[r() for r in Regularizer.__subclasses__()])
def solver(request):
"""Parametrized fixture for all the solver names"""
return request.param
@pytest.fixture(params=[r() for r in Regularizer.__subclasses__()])
def regularizer(request):
"""Parametrized fixture for all the regularizer names"""
return request.param
class DoNothingTransformer(object):
def fit(self, X, y=None):
return self
def transform(self, X, y=None):
return X
def fit_transform(self, X, y=None):
return X
def get_params(self, deep=True):
return {}
X, y = make_classification()
def test_lr_init(solver):
LogisticRegression(solver=solver)
def test_pr_init(solver):
PoissonRegression(solver=solver)
@pytest.mark.parametrize('fit_intercept', [True, False])
@pytest.mark.parametrize('is_sparse,is_numpy', [
(True, False),
(False, False),
(False, True)])
def test_fit(fit_intercept, is_sparse, is_numpy):
X, y = make_classification(n_samples=100, n_features=5, chunksize=10, is_sparse=is_sparse)
if is_numpy:
X, y = dask.compute(X, y)
lr = LogisticRegression(fit_intercept=fit_intercept)
lr.fit(X, y)
lr.predict(X)
lr.predict_proba(X)
@pytest.mark.parametrize('fit_intercept', [True, False])
@pytest.mark.parametrize('is_sparse', [True, False])
def test_lm(fit_intercept, is_sparse):
X, y = make_regression(n_samples=100, n_features=5, chunksize=10, is_sparse=is_sparse)
lr = LinearRegression(fit_intercept=fit_intercept)
lr.fit(X, y)
lr.predict(X)
if fit_intercept:
assert lr.intercept_ is not None
@pytest.mark.parametrize('fit_intercept', [True, False])
@pytest.mark.parametrize('is_sparse', [True, False])
def test_big(fit_intercept, is_sparse):
with dask.config.set(scheduler='synchronous'):
X, y = make_classification(is_sparse=is_sparse)
lr = LogisticRegression(fit_intercept=fit_intercept)
lr.fit(X, y)
lr.predict(X)
lr.predict_proba(X)
if fit_intercept:
assert lr.intercept_ is not None
@pytest.mark.parametrize('fit_intercept', [True, False])
@pytest.mark.parametrize('is_sparse', [True, False])
def test_poisson_fit(fit_intercept, is_sparse):
with dask.config.set(scheduler='synchronous'):
X, y = make_poisson(is_sparse=is_sparse)
pr = PoissonRegression(fit_intercept=fit_intercept)
pr.fit(X, y)
pr.predict(X)
pr.get_deviance(X, y)
if fit_intercept:
assert pr.intercept_ is not None
def test_in_pipeline():
from sklearn.pipeline import make_pipeline
X, y = make_classification(n_samples=100, n_features=5, chunksize=10)
pipe = make_pipeline(DoNothingTransformer(), LogisticRegression())
pipe.fit(X, y)
def test_gridsearch():
from sklearn.pipeline import make_pipeline
dcv = pytest.importorskip('dask_searchcv')
X, y = make_classification(n_samples=100, n_features=5, chunksize=10)
grid = {
'logisticregression__lamduh': [.001, .01, .1, .5]
}
pipe = make_pipeline(DoNothingTransformer(), LogisticRegression())
search = dcv.GridSearchCV(pipe, grid, cv=3)
search.fit(X, y)