From 4bfdc0188e95bf42b44202e7b86e631665b6f778 Mon Sep 17 00:00:00 2001
From: Jiwei Liu <aixueer4ever@gmail.com>
Date: Wed, 28 Oct 2020 20:23:10 -0700
Subject: [PATCH 1/4] fix is_dask_array_sparse

---
 dask_glm/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dask_glm/utils.py b/dask_glm/utils.py
index 0fe3429..7c22464 100644
--- a/dask_glm/utils.py
+++ b/dask_glm/utils.py
@@ -121,7 +121,7 @@ def is_dask_array_sparse(X):
     """
     Check using _meta if a dask array contains sparse arrays
     """
-    return isinstance(X._meta, sparse.SparseArray)
+    return isinstance(X, da.Array) and isinstance(X._meta, sparse.SparseArray)
 
 
 @dispatch(np.ndarray)

From 2ad455ff199dc26dcea2b4e699b1b28e50a7b579 Mon Sep 17 00:00:00 2001
From: Jiwei Liu <aixueer4ever@gmail.com>
Date: Wed, 28 Oct 2020 20:59:09 -0700
Subject: [PATCH 2/4] numpy works. cupy works except admm & lbfgs

---
 dask_glm/algorithms.py |  8 ++++----
 dask_glm/utils.py      | 17 ++++++++++++++---
 2 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/dask_glm/algorithms.py b/dask_glm/algorithms.py
index 1320e7b..45f9c2d 100644
--- a/dask_glm/algorithms.py
+++ b/dask_glm/algorithms.py
@@ -11,7 +11,7 @@
 from scipy.optimize import fmin_l_bfgs_b
 
 
-from dask_glm.utils import dot, normalize, scatter_array, get_distributed_client
+from dask_glm.utils import dot, normalize, scatter_array, get_distributed_client, safe_zeros_like
 from dask_glm.families import Logistic
 from dask_glm.regularizers import Regularizer
 
@@ -97,7 +97,7 @@ def gradient_descent(X, y, max_iter=100, tol=1e-14, family=Logistic, **kwargs):
     stepSize = 1.0
     recalcRate = 10
     backtrackMult = firstBacktrackMult
-    beta = np.zeros_like(X._meta, shape=p)
+    beta = safe_zeros_like(X, shape=p)
 
     for k in range(max_iter):
         # how necessary is this recalculation?
@@ -161,7 +161,7 @@ def newton(X, y, max_iter=50, tol=1e-8, family=Logistic, **kwargs):
     """
     gradient, hessian = family.gradient, family.hessian
     n, p = X.shape
-    beta = np.zeros_like(X._meta, shape=p)
+    beta = safe_zeros_like(X, shape=p)
     Xbeta = dot(X, beta)
 
     iter_count = 0
@@ -387,7 +387,7 @@ def proximal_grad(X, y, regularizer='l1', lamduh=0.1, family=Logistic,
     stepSize = 1.0
     recalcRate = 10
     backtrackMult = firstBacktrackMult
-    beta = np.zeros_like(X._meta, shape=p)
+    beta = safe_zeros_like(X, shape=p)
     regularizer = Regularizer.get(regularizer)
 
     for k in range(max_iter):
diff --git a/dask_glm/utils.py b/dask_glm/utils.py
index 7c22464..ebf0bc3 100644
--- a/dask_glm/utils.py
+++ b/dask_glm/utils.py
@@ -23,7 +23,7 @@ def normalize_inputs(X, y, *args, **kwargs):
                 raise ValueError('Multiple constant columns detected!')
             mean[intercept_idx] = 0
             std[intercept_idx] = 1
-            mean = mean if len(intercept_idx[0]) else np.zeros_like(X._meta, shape=mean.shape)
+            mean = mean if len(intercept_idx[0]) else safe_zeros_like(X, shape=mean.shape)
             Xn = (X - mean) / std
             out = algo(Xn, y, *args, **kwargs).copy()
             i_adj = np.sum(out * mean / std)
@@ -41,7 +41,7 @@ def sigmoid(x):
 
 @dispatch(object)
 def exp(A):
-    return A.exp()
+    return np.exp(A)
 
 
 @dispatch(float)
@@ -91,7 +91,7 @@ def sign(A):
 
 @dispatch(object)
 def log1p(A):
-    return A.log1p()
+    return np.log1p(A)
 
 
 @dispatch(np.ndarray)
@@ -149,6 +149,11 @@ def add_intercept(X):
     return X_i
 
 
+@dispatch(object)
+def add_intercept(X):
+    return np.concatenate([X, np.ones_like(X, shape=(X.shape[0], 1))], axis=1)
+
+
 def make_y(X, beta=np.array([1.5, -3]), chunks=2):
     n, p = X.shape
     z0 = X.dot(beta)
@@ -205,3 +210,9 @@ def get_distributed_client():
         return get_client()
     except ValueError:
         return None
+
+
+def safe_zeros_like(X, shape):
+    if isinstance(X, da.Array):
+        return np.zeros_like(X._meta, shape=shape)
+    return np.zeros_like(X, shape=shape)

From 9a8170c11fc253bccaf8cbcfea95dc2b97aaf649 Mon Sep 17 00:00:00 2001
From: Jiwei Liu <aixueer4ever@gmail.com>
Date: Wed, 28 Oct 2020 22:49:07 -0700
Subject: [PATCH 3/4] add one test for numpy input

---
 dask_glm/tests/test_estimators.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/dask_glm/tests/test_estimators.py b/dask_glm/tests/test_estimators.py
index d2212c4..9a19b9b 100644
--- a/dask_glm/tests/test_estimators.py
+++ b/dask_glm/tests/test_estimators.py
@@ -45,8 +45,13 @@ def test_pr_init(solver):
 
 @pytest.mark.parametrize('fit_intercept', [True, False])
 @pytest.mark.parametrize('is_sparse', [True, False])
-def test_fit(fit_intercept, is_sparse):
+@pytest.mark.parametrize('is_numpy', [True, False])
+def test_fit(fit_intercept, is_sparse, is_numpy):
     X, y = make_classification(n_samples=100, n_features=5, chunksize=10, is_sparse=is_sparse)
+    if is_numpy:
+        if is_sparse:
+            return
+        X, y = dask.compute(X, y)
     lr = LogisticRegression(fit_intercept=fit_intercept)
     lr.fit(X, y)
     lr.predict(X)

From 1af0b03c5f1daf3ead17ecbde07ade18296ea51c Mon Sep 17 00:00:00 2001
From: Jiwei Liu <aixueer4ever@gmail.com>
Date: Wed, 28 Oct 2020 23:03:16 -0700
Subject: [PATCH 4/4] fix test_fit

---
 dask_glm/tests/test_estimators.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/dask_glm/tests/test_estimators.py b/dask_glm/tests/test_estimators.py
index 9a19b9b..fdded81 100644
--- a/dask_glm/tests/test_estimators.py
+++ b/dask_glm/tests/test_estimators.py
@@ -44,13 +44,13 @@ def test_pr_init(solver):
 
 
 @pytest.mark.parametrize('fit_intercept', [True, False])
-@pytest.mark.parametrize('is_sparse', [True, False])
-@pytest.mark.parametrize('is_numpy', [True, False])
+@pytest.mark.parametrize('is_sparse,is_numpy', [
+                         (True, False),
+                         (False, False),
+                         (False, True)])
 def test_fit(fit_intercept, is_sparse, is_numpy):
     X, y = make_classification(n_samples=100, n_features=5, chunksize=10, is_sparse=is_sparse)
     if is_numpy:
-        if is_sparse:
-            return
         X, y = dask.compute(X, y)
     lr = LogisticRegression(fit_intercept=fit_intercept)
     lr.fit(X, y)