Skip to content

Commit cb6b604

Browse files
authored
Improve performance of GCXS dot ndarray (#643)
* improve memory access order for csr_ndarray_dot operation * improve access order for csc_ndarray_dot * re-use csr_dot_ndarray for ndarray_dot_csc * make it easier for numba to optimize the csc_ndarray function. * add benchmarking * ensure aligned memory
1 parent a1d2081 commit cb6b604

File tree

2 files changed

+43
-48
lines changed

2 files changed

+43
-48
lines changed

benchmarks/benchmark_gcxs.py

+25
Original file line numberDiff line numberDiff line change
@@ -67,3 +67,28 @@ def time_index_slice3(self):
6767

6868
def time_index_fancy(self):
6969
self.x[self.index]
70+
71+
72+
class DenseMultiplySuite:
73+
params = ([0, 1], [1, 20, 100])
74+
param_names = ["compressed axis", "n_vectors"]
75+
76+
def setup(self, compressed_axis, n_vecs):
77+
rng = np.random.default_rng(1337)
78+
n = 10000
79+
x = sparse.random((n, n), density=0.001, format="gcxs", random_state=rng).change_compressed_axes(
80+
(compressed_axis,)
81+
)
82+
self.x = x
83+
self.t = rng.random((n, n_vecs))
84+
self.u = rng.random((n_vecs, n))
85+
86+
# Numba compilation
87+
self.x @ self.t
88+
self.u @ self.x
89+
90+
def time_gcxs_dot_ndarray(self, *args):
91+
self.x @ self.t
92+
93+
def time_ndarray_dot_gcxs(self, *args):
94+
self.u @ self.x

sparse/_common.py

+18-48
Original file line numberDiff line numberDiff line change
@@ -414,7 +414,8 @@ def _dot(a, b, return_type=None):
414414

415415
# compressed_axes == (1,)
416416
if return_type is None or return_type == np.ndarray:
417-
return _dot_ndarray_csc_type(a.dtype, b.dtype)(out_shape, b.data, b.indices, b.indptr, a)
417+
out = _dot_csr_ndarray_type(bt.dtype, at.dtype)(out_shape[::-1], bt.data, bt.indices, bt.indptr, at)
418+
return out.T
418419
data, indices, indptr = _dot_csr_ndarray_type_sparse(bt.dtype, at.dtype)(
419420
out_shape[::-1], bt.data, bt.indices, bt.indptr, at
420421
)
@@ -717,15 +718,15 @@ def _dot_csr_ndarray(out_shape, a_data, a_indices, a_indptr, b): # pragma: no c
717718
out_shape : Tuple[int]
718719
The shape of the output array.
719720
"""
720-
out = np.empty(out_shape, dtype=dtr)
721+
b = np.ascontiguousarray(b) # ensure memory aligned
722+
out = np.zeros(out_shape, dtype=dtr)
721723
for i in range(out_shape[0]):
722-
for j in range(out_shape[1]):
723-
val = 0
724-
for k in range(a_indptr[i], a_indptr[i + 1]):
725-
ind = a_indices[k]
726-
v = a_data[k]
727-
val += v * b[ind, j]
728-
out[i, j] = val
724+
val = out[i]
725+
for k in range(a_indptr[i], a_indptr[i + 1]):
726+
ind = a_indices[k]
727+
v = a_data[k]
728+
for j in range(out_shape[1]):
729+
val[j] += v * b[ind, j]
729730
return out
730731

731732
return _dot_csr_ndarray
@@ -866,51 +867,20 @@ def _dot_csc_ndarray(a_shape, b_shape, a_data, a_indices, a_indptr, b): # pragm
866867
a_shape, b_shape : Tuple[int]
867868
The shapes of the input arrays.
868869
"""
870+
b = np.ascontiguousarray(b) # ensure memory aligned
869871
out = np.zeros((a_shape[0], b_shape[1]), dtype=dtr)
870-
for j in range(b_shape[1]):
871-
for i in range(b_shape[0]):
872-
for k in range(a_indptr[i], a_indptr[i + 1]):
873-
out[a_indices[k], j] += a_data[k] * b[i, j]
872+
for i in range(b_shape[0]):
873+
for k in range(a_indptr[i], a_indptr[i + 1]):
874+
ind = a_indices[k]
875+
v = a_data[k]
876+
val = out[ind]
877+
for j in range(b_shape[1]):
878+
val[j] += v * b[i, j]
874879
return out
875880

876881
return _dot_csc_ndarray
877882

878883

879-
@_memoize_dtype
880-
def _dot_ndarray_csc_type(dt1, dt2):
881-
dtr = _dot_dtype(dt1, dt2)
882-
883-
@numba.jit(
884-
nopython=True,
885-
nogil=True,
886-
locals={"data_curr": numba.np.numpy_support.from_dtype(dtr)},
887-
)
888-
def _dot_ndarray_csc(out_shape, b_data, b_indices, b_indptr, a): # pragma: no cover
889-
"""
890-
Utility function taking in one `ndarray` and one ``GCXS`` and
891-
calculating their dot product: a @ b for b with compressed columns.
892-
893-
Parameters
894-
----------
895-
a : np.ndarray
896-
The input array ``a``.
897-
b_data, b_indices, b_indptr : np.ndarray
898-
The data, indices, and index pointers of ``b``.
899-
out_shape : Tuple[int]
900-
The shape of the output array.
901-
"""
902-
out = np.empty(out_shape, dtype=dtr)
903-
for i in range(out_shape[0]):
904-
for j in range(out_shape[1]):
905-
total = 0
906-
for k in range(b_indptr[j], b_indptr[j + 1]):
907-
total += a[i, b_indices[k]] * b_data[k]
908-
out[i, j] = total
909-
return out
910-
911-
return _dot_ndarray_csc
912-
913-
914884
@_memoize_dtype
915885
def _dot_coo_coo_type(dt1, dt2):
916886
dtr = _dot_dtype(dt1, dt2)

0 commit comments

Comments
 (0)