apache · rok · May 13, 2025
@@ -382,19 +382,19 @@ shape: {self.shape}"""
     @staticmethod
     def from_scipy(obj, dim_names=None):
         """
-        Convert scipy.sparse.coo_matrix to arrow::SparseCOOTensor
+        Convert scipy.sparse.coo_array or scipy.sparse.coo_matrix to arrow::SparseCOOTensor
 
         Parameters
         ----------
-        obj : scipy.sparse.csr_matrix
-            The scipy matrix that should be converted.
+        obj : scipy.sparse.coo_array or scipy.sparse.coo_matrix
+            The scipy array or matrix that should be converted.
         dim_names : list, optional
             Names of the dimensions.
         """
         import scipy.sparse
-        if not isinstance(obj, scipy.sparse.coo_matrix):
+        if not (isinstance(obj, scipy.sparse.coo_array) or isinstance(obj, scipy.sparse.coo_matrix)):
             raise TypeError(
-                f"Expected scipy.sparse.coo_matrix, got {type(obj)}")
+                f"Expected scipy.sparse.coo_array or scipy.sparse.coo_matrix, got {type(obj)}")
 
         cdef shared_ptr[CSparseCOOTensor] csparse_tensor
         cdef vector[int64_t] c_shape
@@ -409,10 +409,11 @@ shape: {self.shape}"""
         row = obj.row
         col = obj.col
 
-        # When SciPy's coo_matrix has canonical format, its indices matrix is
-        # sorted in column-major order.  As Arrow's SparseCOOIndex is sorted
-        # in row-major order if it is canonical, we must sort indices matrix
-        # into row-major order to keep its canonicalness, here.
+        # When SciPy's coo_array and coo_matrix have canonical format, their
+        # indices matrix is sorted in column-major order. As Arrow's
+        # SparseCOOIndex is sorted in row-major order if it is canonical,
+        # we must sort indices matrix into row-major order to keep it's
+        # canonicalness here.
         if obj.has_canonical_format:
             order = np.lexsort((col, row))  # sort in row-major order
             row = row[order]
@@ -493,9 +494,9 @@ shape: {self.shape}"""
 
     def to_scipy(self):
         """
-        Convert arrow::SparseCOOTensor to scipy.sparse.coo_matrix.
+        Convert arrow::SparseCOOTensor to scipy.sparse.coo_array.
         """
-        from scipy.sparse import coo_matrix
+        from scipy.sparse import coo_array
         cdef PyObject* out_data
         cdef PyObject* out_coords
 
@@ -504,12 +505,12 @@ shape: {self.shape}"""
         data = PyObject_to_object(out_data)
         coords = PyObject_to_object(out_coords)
         row, col = coords[:, 0], coords[:, 1]
-        result = coo_matrix((data[:, 0], (row, col)), shape=self.shape)
+        result = coo_array((data[:, 0], (row, col)), shape=self.shape)
 
         # As the description in from_scipy above, we sorted indices matrix
-        # in row-major order if SciPy's coo_matrix has canonical format.
-        # So, we must call sum_duplicates() to make the result coo_matrix
-        # has canonical format.
+        # in row-major order if SciPy's coo_array has canonical format.
+        # So, we must call sum_duplicates() to make the resulting coo_array
+        # have canonical format.
         if self.has_canonical_format:
             result.sum_duplicates()
         return result
@@ -693,19 +694,19 @@ shape: {self.shape}"""
     @staticmethod
     def from_scipy(obj, dim_names=None):
         """
-        Convert scipy.sparse.csr_matrix to arrow::SparseCSRMatrix.
+        Convert scipy.sparse.csr_array or scipy.sparse.csr_matrix to arrow::SparseCSRMatrix.
 
         Parameters
         ----------
-        obj : scipy.sparse.csr_matrix
+        obj : scipy.sparse.csr_array or scipy.sparse.csr_matrix
             The scipy matrix that should be converted.
         dim_names : list, optional
             Names of the dimensions.
         """
         import scipy.sparse
-        if not isinstance(obj, scipy.sparse.csr_matrix):
+        if not (isinstance(obj, scipy.sparse.csr_array) or isinstance(obj, scipy.sparse.csr_matrix)):
             raise TypeError(
-                f"Expected scipy.sparse.csr_matrix, got {type(obj)}")
+                f"Expected scipy.sparse.csr_array or scipy.sparse.csr_matrix, got {type(obj)}")
 
         cdef shared_ptr[CSparseCSRMatrix] csparse_tensor
         cdef vector[int64_t] c_shape
@@ -764,9 +765,9 @@ shape: {self.shape}"""
 
     def to_scipy(self):
         """
-        Convert arrow::SparseCSRMatrix to scipy.sparse.csr_matrix.
+        Convert arrow::SparseCSRMatrix to scipy.sparse.csr_array.
         """
-        from scipy.sparse import csr_matrix
+        from scipy.sparse import csr_array
         cdef PyObject* out_data
         cdef PyObject* out_indptr
         cdef PyObject* out_indices
@@ -778,7 +779,7 @@ shape: {self.shape}"""
         data = PyObject_to_object(out_data)
         indptr = PyObject_to_object(out_indptr)
         indices = PyObject_to_object(out_indices)
-        result = csr_matrix((data[:, 0], indices, indptr), shape=self.shape)
+        result = csr_array((data[:, 0], indices, indptr), shape=self.shape)
         return result
 
     def to_tensor(self):

@@ -26,10 +26,12 @@
 import pyarrow as pa
 
 try:
-    from scipy.sparse import csr_matrix, coo_matrix
+    from scipy.sparse import csr_array, coo_array, csr_matrix, coo_matrix
 except ImportError:
     coo_matrix = None
     csr_matrix = None
+    csr_array = None
+    coo_array = None
 
 try:
     import sparse
@@ -51,6 +53,15 @@
     ('f8', pa.float64())
 ]
 
+# Scipy does not support float16
+scipy_type_pairs = [
+    x for x in tensor_type_pairs if x[1] != pa.float16()]
+
+shape_dim_name_pairs = [
+    ((4, 6), ("x", "y")),
+    ((24,), ("x",)),
+]
+
 
 @pytest.mark.parametrize('sparse_tensor_type', [
     pa.SparseCSRMatrix,
@@ -395,17 +406,18 @@ def test_dense_to_sparse_tensor(dtype_str, arrow_type, sparse_tensor_type):
 
 
 @pytest.mark.skipif(not coo_matrix, reason="requires scipy")
-@pytest.mark.parametrize('dtype_str,arrow_type', tensor_type_pairs)
-def test_sparse_coo_tensor_scipy_roundtrip(dtype_str, arrow_type):
+@pytest.mark.parametrize('sparse_object', (coo_array, coo_matrix))
+@pytest.mark.parametrize('dtype_str,arrow_type', scipy_type_pairs)
+@pytest.mark.parametrize('shape,dim_names', shape_dim_name_pairs)
+def test_sparse_coo_tensor_scipy_roundtrip(dtype_str, arrow_type,
+                                           sparse_object, shape, dim_names):
     dtype = np.dtype(dtype_str)
     data = np.array([1, 2, 3, 4, 5, 6]).astype(dtype)
     row = np.array([0, 0, 2, 3, 1, 3])
     col = np.array([0, 2, 0, 4, 5, 5])
-    shape = (4, 6)
-    dim_names = ('x', 'y')
 
     # non-canonical sparse coo matrix
-    scipy_matrix = coo_matrix((data, (row, col)), shape=shape)
+    scipy_matrix = sparse_object((data, (row, col)), shape=shape)
     sparse_tensor = pa.SparseCOOTensor.from_scipy(scipy_matrix,
                                                   dim_names=dim_names)
     out_scipy_matrix = sparse_tensor.to_scipy()
@@ -420,11 +432,7 @@ def test_sparse_coo_tensor_scipy_roundtrip(dtype_str, arrow_type):
     assert np.array_equal(scipy_matrix.row, out_scipy_matrix.row)
     assert np.array_equal(scipy_matrix.col, out_scipy_matrix.col)
 
-    if dtype_str == 'f2':
-        dense_array = \
-            scipy_matrix.astype(np.float32).toarray().astype(np.float16)
-    else:
-        dense_array = scipy_matrix.toarray()
+    dense_array = scipy_matrix.toarray()
     assert np.array_equal(dense_array, sparse_tensor.to_tensor().to_numpy())
 
     # canonical sparse coo matrix
@@ -439,16 +447,17 @@ def test_sparse_coo_tensor_scipy_roundtrip(dtype_str, arrow_type):
 
 
 @pytest.mark.skipif(not csr_matrix, reason="requires scipy")
-@pytest.mark.parametrize('dtype_str,arrow_type', tensor_type_pairs)
-def test_sparse_csr_matrix_scipy_roundtrip(dtype_str, arrow_type):
+@pytest.mark.parametrize('sparse_object', (csr_array, csr_matrix))
+@pytest.mark.parametrize('dtype_str,arrow_type', scipy_type_pairs)
+@pytest.mark.parametrize('shape,dim_names', shape_dim_name_pairs)
+def test_sparse_csr_matrix_scipy_roundtrip(dtype_str, arrow_type,
+                                           sparse_object, shape, dim_names):
     dtype = np.dtype(dtype_str)
     data = np.array([8, 2, 5, 3, 4, 6]).astype(dtype)
     indptr = np.array([0, 2, 3, 4, 6])
     indices = np.array([0, 2, 5, 0, 4, 5])
-    shape = (4, 6)
-    dim_names = ('x', 'y')
 
-    sparse_array = csr_matrix((data, indices, indptr), shape=shape)
+    sparse_array = sparse_object((data, indices, indptr), shape=shape)
     sparse_tensor = pa.SparseCSRMatrix.from_scipy(sparse_array,
                                                   dim_names=dim_names)
     out_sparse_array = sparse_tensor.to_scipy()
@@ -460,11 +469,7 @@ def test_sparse_csr_matrix_scipy_roundtrip(dtype_str, arrow_type):
     assert np.array_equal(sparse_array.indptr, out_sparse_array.indptr)
     assert np.array_equal(sparse_array.indices, out_sparse_array.indices)
 
-    if dtype_str == 'f2':
-        dense_array = \
-            sparse_array.astype(np.float32).toarray().astype(np.float16)
-    else:
-        dense_array = sparse_array.toarray()
+    dense_array = sparse_array.toarray()
     assert np.array_equal(dense_array, sparse_tensor.to_tensor().to_numpy())