From 8d316f2e0280757b4d2019b72ff13da248234338 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Wed, 15 Nov 2017 15:25:59 -0600
Subject: [PATCH] Deprecate estimator API, move it dask-ml.

---
 dask_glm/estimators.py            |   8 +++
 dask_glm/tests/test_estimators.py | 108 ++----------------------------
 docs/api.rst                      |   8 ---
 docs/estimators.rst               |  37 ----------
 docs/index.rst                    |   5 +-
 5 files changed, 16 insertions(+), 150 deletions(-)
 delete mode 100644 docs/estimators.rst

diff --git a/dask_glm/estimators.py b/dask_glm/estimators.py
index 9da2dcb..8d36a32 100644
--- a/dask_glm/estimators.py
+++ b/dask_glm/estimators.py
@@ -1,6 +1,8 @@
 """
 Models following scikit-learn's estimator API.
 """
+import warnings
+
 from sklearn.base import BaseEstimator
 
 from . import algorithms
@@ -10,6 +12,12 @@
     poisson_deviance
 )
 
+msg = ("The 'dask_glm.estimators' module is deprecated in favor of "
+       "'dask_ml.linear_models'. Please install 'dask-ml' and update "
+       "your imports.")
+
+warnings.warn(msg, FutureWarning)
+
 
 class _GLM(BaseEstimator):
 
diff --git a/dask_glm/tests/test_estimators.py b/dask_glm/tests/test_estimators.py
index fc913f5..ee050a4 100644
--- a/dask_glm/tests/test_estimators.py
+++ b/dask_glm/tests/test_estimators.py
@@ -1,107 +1,9 @@
 import pytest
 
-from dask_glm.estimators import LogisticRegression, LinearRegression, PoissonRegression
-from dask_glm.datasets import make_classification, make_regression, make_poisson
-from dask_glm.regularizers import Regularizer
 
+def test_warns():
+    with pytest.warns(FutureWarning) as w:
+        import dask_glm.estimators  # noqa
 
-@pytest.fixture(params=[r() for r in Regularizer.__subclasses__()])
-def solver(request):
-    """Parametrized fixture for all the solver names"""
-    return request.param
-
-
-@pytest.fixture(params=[r() for r in Regularizer.__subclasses__()])
-def regularizer(request):
-    """Parametrized fixture for all the regularizer names"""
-    return request.param
-
-
-class DoNothingTransformer(object):
-    def fit(self, X, y=None):
-        return self
-
-    def transform(self, X, y=None):
-        return X
-
-    def fit_transform(self, X, y=None):
-        return X
-
-    def get_params(self, deep=True):
-        return {}
-
-
-X, y = make_classification()
-
-
-def test_lr_init(solver):
-    LogisticRegression(solver=solver)
-
-
-def test_pr_init(solver):
-    PoissonRegression(solver=solver)
-
-
-@pytest.mark.parametrize('fit_intercept', [True, False])
-def test_fit(fit_intercept):
-    X, y = make_classification(n_samples=100, n_features=5, chunksize=10)
-    lr = LogisticRegression(fit_intercept=fit_intercept)
-    lr.fit(X, y)
-    lr.predict(X)
-    lr.predict_proba(X)
-
-
-@pytest.mark.parametrize('fit_intercept', [True, False])
-def test_lm(fit_intercept):
-    X, y = make_regression(n_samples=100, n_features=5, chunksize=10)
-    lr = LinearRegression(fit_intercept=fit_intercept)
-    lr.fit(X, y)
-    lr.predict(X)
-    if fit_intercept:
-        assert lr.intercept_ is not None
-
-
-@pytest.mark.parametrize('fit_intercept', [True, False])
-def test_big(fit_intercept):
-    import dask
-    dask.set_options(get=dask.get)
-    X, y = make_classification()
-    lr = LogisticRegression(fit_intercept=fit_intercept)
-    lr.fit(X, y)
-    lr.predict(X)
-    lr.predict_proba(X)
-    if fit_intercept:
-        assert lr.intercept_ is not None
-
-
-@pytest.mark.parametrize('fit_intercept', [True, False])
-def test_poisson_fit(fit_intercept):
-    import dask
-    dask.set_options(get=dask.get)
-    X, y = make_poisson()
-    pr = PoissonRegression(fit_intercept=fit_intercept)
-    pr.fit(X, y)
-    pr.predict(X)
-    pr.get_deviance(X, y)
-    if fit_intercept:
-        assert pr.intercept_ is not None
-
-
-def test_in_pipeline():
-    from sklearn.pipeline import make_pipeline
-    X, y = make_classification(n_samples=100, n_features=5, chunksize=10)
-    pipe = make_pipeline(DoNothingTransformer(), LogisticRegression())
-    pipe.fit(X, y)
-
-
-def test_gridsearch():
-    from sklearn.pipeline import make_pipeline
-    dcv = pytest.importorskip('dask_searchcv')
-
-    X, y = make_classification(n_samples=100, n_features=5, chunksize=10)
-    grid = {
-        'logisticregression__lamduh': [.001, .01, .1, .5]
-    }
-    pipe = make_pipeline(DoNothingTransformer(), LogisticRegression())
-    search = dcv.GridSearchCV(pipe, grid, cv=3)
-    search.fit(X, y)
+    assert len(w)
+    assert 'dask-ml' in str(w[-1])
diff --git a/docs/api.rst b/docs/api.rst
index 4f83654..e012bfa 100644
--- a/docs/api.rst
+++ b/docs/api.rst
@@ -5,14 +5,6 @@
 API Reference
 -------------
 
-.. _api.estimators:
-
-Estimators
-==========
-
-.. automodule:: dask_glm.estimators
-   :members:
-
 .. _api.families:
 
 Families
diff --git a/docs/estimators.rst b/docs/estimators.rst
deleted file mode 100644
index 5d4c011..0000000
--- a/docs/estimators.rst
+++ /dev/null
@@ -1,37 +0,0 @@
-Estimators
-==========
-
-The :mod:`estimators` module offers a scikit-learn compatible API for
-specifying your model and hyper-parameters, and fitting your model to data.
-
-.. code-block:: python
-
-   >>> from dask_glm.estimators import LogisticRegression
-   >>> from dask_glm.datasets import make_classification
-   >>> X, y = make_classification()
-   >>> lr = LogisticRegression()
-   >>> lr.fit(X, y)
-   >>> lr
-   LogisticRegression(abstol=0.0001, fit_intercept=True, lamduh=1.0,
-             max_iter=100, over_relax=1, regularizer='l2', reltol=0.01, rho=1,
-             solver='admm', tol=0.0001)
-
-
-All of the estimators follow a similar API. They can be instantiated with
-a set of parameters that control the fit, including whether to add an intercept,
-which solver to use, how to regularize the inputs, and various optimization
-parameters.
-
-Given an instantiated estimator, you pass the data to the ``.fit`` method.
-It takes an ``X``, the feature matrix or exogenous data, and a ``y`` the
-target or endogenous data. Each of these can be a NumPy or dask array.
-
-With a fit model, you can make new predictions using the ``.predict`` method,
-and can score known observations with the ``.score`` method.
-
-.. code-block:: python
-
-   >>> lr.predict(X).compute()
-   array([False, False, False, True, ... True, False, True, True], dtype=bool)
-
-See the :ref:`api-reference` for more.
diff --git a/docs/index.rst b/docs/index.rst
index 912f0b6..f8b134d 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -9,13 +9,13 @@ Dask-glm
 *Dask-glm is a library for fitting Generalized Linear Models on large datasets*
 
 Dask-glm builds on the `dask`_ project to fit `GLM`_'s on datasets in parallel.
-It offers a `scikit-learn`_ compatible API for specifying your model.
+It provides the optimizers and regularizers used by libraries like `dask-ml`_,
+which builds scikit-learn-style APIs on top of those components.
 
 .. toctree::
    :maxdepth: 2
    :caption: Contents:
 
-   estimators
    examples
    api
 
@@ -30,3 +30,4 @@ Indices and tables
 .. _dask: http://dask.pydata.org/en/latest/
 .. _GLM: https://en.wikipedia.org/wiki/Generalized_linear_model
 .. _scikit-learn: http://scikit-learn.org/
+.. _dask-ml: http://dask-ml.readthedocs.org/