diff --git a/python/pyarrow/tensor.pxi b/python/pyarrow/tensor.pxi index 801360e1591a2..73715c0609816 100644 --- a/python/pyarrow/tensor.pxi +++ b/python/pyarrow/tensor.pxi @@ -421,19 +421,19 @@ shape: {self.shape}""" @staticmethod def from_scipy(obj, dim_names=None): """ - Convert scipy.sparse.coo_matrix to arrow::SparseCOOTensor + Convert scipy.sparse.coo_array or scipy.sparse.coo_matrix to arrow::SparseCOOTensor Parameters ---------- - obj : scipy.sparse.csr_matrix - The scipy matrix that should be converted. + obj : scipy.sparse.coo_array or scipy.sparse.coo_matrix + The scipy array or matrix that should be converted. dim_names : list, optional Names of the dimensions. """ import scipy.sparse - if not isinstance(obj, scipy.sparse.coo_matrix): + if not isinstance(obj, (scipy.sparse.coo_array, scipy.sparse.coo_matrix)): raise TypeError( - f"Expected scipy.sparse.coo_matrix, got {type(obj)}") + f"Expected scipy.sparse.coo_array or scipy.sparse.coo_matrix, got {type(obj)}") cdef shared_ptr[CSparseCOOTensor] csparse_tensor cdef vector[int64_t] c_shape @@ -448,10 +448,11 @@ shape: {self.shape}""" row = obj.row col = obj.col - # When SciPy's coo_matrix has canonical format, its indices matrix is - # sorted in column-major order. As Arrow's SparseCOOIndex is sorted - # in row-major order if it is canonical, we must sort indices matrix - # into row-major order to keep its canonicalness, here. + # When SciPy's coo_array and coo_matrix have canonical format, their + # indices matrix is sorted in column-major order. As Arrow's + # SparseCOOIndex is sorted in row-major order if it is canonical, + # we must sort indices matrix into row-major order to keep it's + # canonicalness here. if obj.has_canonical_format: order = np.lexsort((col, row)) # sort in row-major order row = row[order] @@ -532,9 +533,9 @@ shape: {self.shape}""" def to_scipy(self): """ - Convert arrow::SparseCOOTensor to scipy.sparse.coo_matrix. + Convert arrow::SparseCOOTensor to scipy.sparse.coo_array. """ - from scipy.sparse import coo_matrix + from scipy.sparse import coo_array cdef PyObject* out_data cdef PyObject* out_coords @@ -543,12 +544,12 @@ shape: {self.shape}""" data = PyObject_to_object(out_data) coords = PyObject_to_object(out_coords) row, col = coords[:, 0], coords[:, 1] - result = coo_matrix((data[:, 0], (row, col)), shape=self.shape) + result = coo_array((data[:, 0], (row, col)), shape=self.shape) # As the description in from_scipy above, we sorted indices matrix - # in row-major order if SciPy's coo_matrix has canonical format. - # So, we must call sum_duplicates() to make the result coo_matrix - # has canonical format. + # in row-major order if SciPy's coo_array has canonical format. + # So, we must call sum_duplicates() to make the resulting coo_array + # have canonical format. if self.has_canonical_format: result.sum_duplicates() return result @@ -732,19 +733,19 @@ shape: {self.shape}""" @staticmethod def from_scipy(obj, dim_names=None): """ - Convert scipy.sparse.csr_matrix to arrow::SparseCSRMatrix. + Convert scipy.sparse.csr_array or scipy.sparse.csr_matrix to arrow::SparseCSRMatrix. Parameters ---------- - obj : scipy.sparse.csr_matrix + obj : scipy.sparse.csr_array or scipy.sparse.csr_matrix The scipy matrix that should be converted. dim_names : list, optional Names of the dimensions. """ import scipy.sparse - if not isinstance(obj, scipy.sparse.csr_matrix): + if not isinstance(obj, (scipy.sparse.csr_array, scipy.sparse.csr_matrix)): raise TypeError( - f"Expected scipy.sparse.csr_matrix, got {type(obj)}") + f"Expected scipy.sparse.csr_array or scipy.sparse.csr_matrix, got {type(obj)}") cdef shared_ptr[CSparseCSRMatrix] csparse_tensor cdef vector[int64_t] c_shape @@ -803,9 +804,9 @@ shape: {self.shape}""" def to_scipy(self): """ - Convert arrow::SparseCSRMatrix to scipy.sparse.csr_matrix. + Convert arrow::SparseCSRMatrix to scipy.sparse.csr_array. """ - from scipy.sparse import csr_matrix + from scipy.sparse import csr_array cdef PyObject* out_data cdef PyObject* out_indptr cdef PyObject* out_indices @@ -817,7 +818,7 @@ shape: {self.shape}""" data = PyObject_to_object(out_data) indptr = PyObject_to_object(out_indptr) indices = PyObject_to_object(out_indices) - result = csr_matrix((data[:, 0], indices, indptr), shape=self.shape) + result = csr_array((data[:, 0], indices, indptr), shape=self.shape) return result def to_tensor(self): @@ -973,19 +974,19 @@ shape: {self.shape}""" @staticmethod def from_scipy(obj, dim_names=None): """ - Convert scipy.sparse.csc_matrix to arrow::SparseCSCMatrix + Convert scipy.sparse.csc_array or scipy.sparse.csc_matrix to arrow::SparseCSCMatrix Parameters ---------- - obj : scipy.sparse.csc_matrix + obj : scipy.sparse.csc_array or scipy.sparse.csc_matrix The scipy matrix that should be converted. dim_names : list, optional Names of the dimensions. """ import scipy.sparse - if not isinstance(obj, scipy.sparse.csc_matrix): + if not isinstance(obj, (scipy.sparse.csc_array, scipy.sparse.csc_matrix)): raise TypeError( - f"Expected scipy.sparse.csc_matrix, got {type(obj)}") + f"Expected scipy.sparse.csc_array or scipy.sparse.csc_matrix, got {type(obj)}") cdef shared_ptr[CSparseCSCMatrix] csparse_tensor cdef vector[int64_t] c_shape @@ -1044,9 +1045,9 @@ shape: {self.shape}""" def to_scipy(self): """ - Convert arrow::SparseCSCMatrix to scipy.sparse.csc_matrix + Convert arrow::SparseCSCMatrix to scipy.sparse.csc_array """ - from scipy.sparse import csc_matrix + from scipy.sparse import csc_array cdef PyObject* out_data cdef PyObject* out_indptr cdef PyObject* out_indices @@ -1058,7 +1059,7 @@ shape: {self.shape}""" data = PyObject_to_object(out_data) indptr = PyObject_to_object(out_indptr) indices = PyObject_to_object(out_indices) - result = csc_matrix((data[:, 0], indices, indptr), shape=self.shape) + result = csc_array((data[:, 0], indices, indptr), shape=self.shape) return result def to_tensor(self): diff --git a/python/pyarrow/tests/test_sparse_tensor.py b/python/pyarrow/tests/test_sparse_tensor.py index 6cd3d2c41e7c0..eca8090d77a9c 100644 --- a/python/pyarrow/tests/test_sparse_tensor.py +++ b/python/pyarrow/tests/test_sparse_tensor.py @@ -26,10 +26,12 @@ import pyarrow as pa try: - from scipy.sparse import csr_matrix, coo_matrix + from scipy.sparse import csr_array, coo_array, csr_matrix, coo_matrix except ImportError: coo_matrix = None csr_matrix = None + csr_array = None + coo_array = None try: import sparse @@ -400,17 +402,19 @@ def test_dense_to_sparse_tensor(dtype_str, arrow_type, sparse_tensor_type): @pytest.mark.skipif(not coo_matrix, reason="requires scipy") +@pytest.mark.parametrize('sparse_object', (coo_array, coo_matrix)) @pytest.mark.parametrize('dtype_str,arrow_type', scipy_type_pairs) -def test_sparse_coo_tensor_scipy_roundtrip(dtype_str, arrow_type): +def test_sparse_coo_tensor_scipy_roundtrip(dtype_str, arrow_type, + sparse_object): + shape = (4, 6) + dim_names = ("x", "y") dtype = np.dtype(dtype_str) data = np.array([1, 2, 3, 4, 5, 6]).astype(dtype) row = np.array([0, 0, 2, 3, 1, 3]) col = np.array([0, 2, 0, 4, 5, 5]) - shape = (4, 6) - dim_names = ('x', 'y') # non-canonical sparse coo matrix - scipy_matrix = coo_matrix((data, (row, col)), shape=shape) + scipy_matrix = sparse_object((data, (row, col)), shape=shape) sparse_tensor = pa.SparseCOOTensor.from_scipy(scipy_matrix, dim_names=dim_names) out_scipy_matrix = sparse_tensor.to_scipy() @@ -440,16 +444,18 @@ def test_sparse_coo_tensor_scipy_roundtrip(dtype_str, arrow_type): @pytest.mark.skipif(not csr_matrix, reason="requires scipy") +@pytest.mark.parametrize('sparse_object', (csr_array, csr_matrix)) @pytest.mark.parametrize('dtype_str,arrow_type', scipy_type_pairs) -def test_sparse_csr_matrix_scipy_roundtrip(dtype_str, arrow_type): +def test_sparse_csr_matrix_scipy_roundtrip(dtype_str, arrow_type, + sparse_object): + shape = (4, 6) + dim_names = ("x", "y") dtype = np.dtype(dtype_str) data = np.array([8, 2, 5, 3, 4, 6]).astype(dtype) indptr = np.array([0, 2, 3, 4, 6]) indices = np.array([0, 2, 5, 0, 4, 5]) - shape = (4, 6) - dim_names = ('x', 'y') - sparse_array = csr_matrix((data, indices, indptr), shape=shape) + sparse_array = sparse_object((data, indices, indptr), shape=shape) sparse_tensor = pa.SparseCSRMatrix.from_scipy(sparse_array, dim_names=dim_names) out_sparse_array = sparse_tensor.to_scipy()