From 8d316f2e0280757b4d2019b72ff13da248234338 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 15 Nov 2017 15:25:59 -0600 Subject: [PATCH] Deprecate estimator API, move it dask-ml. --- dask_glm/estimators.py | 8 +++ dask_glm/tests/test_estimators.py | 108 ++---------------------------- docs/api.rst | 8 --- docs/estimators.rst | 37 ---------- docs/index.rst | 5 +- 5 files changed, 16 insertions(+), 150 deletions(-) delete mode 100644 docs/estimators.rst diff --git a/dask_glm/estimators.py b/dask_glm/estimators.py index 9da2dcb..8d36a32 100644 --- a/dask_glm/estimators.py +++ b/dask_glm/estimators.py @@ -1,6 +1,8 @@ """ Models following scikit-learn's estimator API. """ +import warnings + from sklearn.base import BaseEstimator from . import algorithms @@ -10,6 +12,12 @@ poisson_deviance ) +msg = ("The 'dask_glm.estimators' module is deprecated in favor of " + "'dask_ml.linear_models'. Please install 'dask-ml' and update " + "your imports.") + +warnings.warn(msg, FutureWarning) + class _GLM(BaseEstimator): diff --git a/dask_glm/tests/test_estimators.py b/dask_glm/tests/test_estimators.py index fc913f5..ee050a4 100644 --- a/dask_glm/tests/test_estimators.py +++ b/dask_glm/tests/test_estimators.py @@ -1,107 +1,9 @@ import pytest -from dask_glm.estimators import LogisticRegression, LinearRegression, PoissonRegression -from dask_glm.datasets import make_classification, make_regression, make_poisson -from dask_glm.regularizers import Regularizer +def test_warns(): + with pytest.warns(FutureWarning) as w: + import dask_glm.estimators # noqa -@pytest.fixture(params=[r() for r in Regularizer.__subclasses__()]) -def solver(request): - """Parametrized fixture for all the solver names""" - return request.param - - -@pytest.fixture(params=[r() for r in Regularizer.__subclasses__()]) -def regularizer(request): - """Parametrized fixture for all the regularizer names""" - return request.param - - -class DoNothingTransformer(object): - def fit(self, X, y=None): - return self - - def transform(self, X, y=None): - return X - - def fit_transform(self, X, y=None): - return X - - def get_params(self, deep=True): - return {} - - -X, y = make_classification() - - -def test_lr_init(solver): - LogisticRegression(solver=solver) - - -def test_pr_init(solver): - PoissonRegression(solver=solver) - - -@pytest.mark.parametrize('fit_intercept', [True, False]) -def test_fit(fit_intercept): - X, y = make_classification(n_samples=100, n_features=5, chunksize=10) - lr = LogisticRegression(fit_intercept=fit_intercept) - lr.fit(X, y) - lr.predict(X) - lr.predict_proba(X) - - -@pytest.mark.parametrize('fit_intercept', [True, False]) -def test_lm(fit_intercept): - X, y = make_regression(n_samples=100, n_features=5, chunksize=10) - lr = LinearRegression(fit_intercept=fit_intercept) - lr.fit(X, y) - lr.predict(X) - if fit_intercept: - assert lr.intercept_ is not None - - -@pytest.mark.parametrize('fit_intercept', [True, False]) -def test_big(fit_intercept): - import dask - dask.set_options(get=dask.get) - X, y = make_classification() - lr = LogisticRegression(fit_intercept=fit_intercept) - lr.fit(X, y) - lr.predict(X) - lr.predict_proba(X) - if fit_intercept: - assert lr.intercept_ is not None - - -@pytest.mark.parametrize('fit_intercept', [True, False]) -def test_poisson_fit(fit_intercept): - import dask - dask.set_options(get=dask.get) - X, y = make_poisson() - pr = PoissonRegression(fit_intercept=fit_intercept) - pr.fit(X, y) - pr.predict(X) - pr.get_deviance(X, y) - if fit_intercept: - assert pr.intercept_ is not None - - -def test_in_pipeline(): - from sklearn.pipeline import make_pipeline - X, y = make_classification(n_samples=100, n_features=5, chunksize=10) - pipe = make_pipeline(DoNothingTransformer(), LogisticRegression()) - pipe.fit(X, y) - - -def test_gridsearch(): - from sklearn.pipeline import make_pipeline - dcv = pytest.importorskip('dask_searchcv') - - X, y = make_classification(n_samples=100, n_features=5, chunksize=10) - grid = { - 'logisticregression__lamduh': [.001, .01, .1, .5] - } - pipe = make_pipeline(DoNothingTransformer(), LogisticRegression()) - search = dcv.GridSearchCV(pipe, grid, cv=3) - search.fit(X, y) + assert len(w) + assert 'dask-ml' in str(w[-1]) diff --git a/docs/api.rst b/docs/api.rst index 4f83654..e012bfa 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -5,14 +5,6 @@ API Reference ------------- -.. _api.estimators: - -Estimators -========== - -.. automodule:: dask_glm.estimators - :members: - .. _api.families: Families diff --git a/docs/estimators.rst b/docs/estimators.rst deleted file mode 100644 index 5d4c011..0000000 --- a/docs/estimators.rst +++ /dev/null @@ -1,37 +0,0 @@ -Estimators -========== - -The :mod:`estimators` module offers a scikit-learn compatible API for -specifying your model and hyper-parameters, and fitting your model to data. - -.. code-block:: python - - >>> from dask_glm.estimators import LogisticRegression - >>> from dask_glm.datasets import make_classification - >>> X, y = make_classification() - >>> lr = LogisticRegression() - >>> lr.fit(X, y) - >>> lr - LogisticRegression(abstol=0.0001, fit_intercept=True, lamduh=1.0, - max_iter=100, over_relax=1, regularizer='l2', reltol=0.01, rho=1, - solver='admm', tol=0.0001) - - -All of the estimators follow a similar API. They can be instantiated with -a set of parameters that control the fit, including whether to add an intercept, -which solver to use, how to regularize the inputs, and various optimization -parameters. - -Given an instantiated estimator, you pass the data to the ``.fit`` method. -It takes an ``X``, the feature matrix or exogenous data, and a ``y`` the -target or endogenous data. Each of these can be a NumPy or dask array. - -With a fit model, you can make new predictions using the ``.predict`` method, -and can score known observations with the ``.score`` method. - -.. code-block:: python - - >>> lr.predict(X).compute() - array([False, False, False, True, ... True, False, True, True], dtype=bool) - -See the :ref:`api-reference` for more. diff --git a/docs/index.rst b/docs/index.rst index 912f0b6..f8b134d 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -9,13 +9,13 @@ Dask-glm *Dask-glm is a library for fitting Generalized Linear Models on large datasets* Dask-glm builds on the `dask`_ project to fit `GLM`_'s on datasets in parallel. -It offers a `scikit-learn`_ compatible API for specifying your model. +It provides the optimizers and regularizers used by libraries like `dask-ml`_, +which builds scikit-learn-style APIs on top of those components. .. toctree:: :maxdepth: 2 :caption: Contents: - estimators examples api @@ -30,3 +30,4 @@ Indices and tables .. _dask: http://dask.pydata.org/en/latest/ .. _GLM: https://en.wikipedia.org/wiki/Generalized_linear_model .. _scikit-learn: http://scikit-learn.org/ +.. _dask-ml: http://dask-ml.readthedocs.org/