@@ -414,7 +414,8 @@ def _dot(a, b, return_type=None):
414
414
415
415
# compressed_axes == (1,)
416
416
if return_type is None or return_type == np .ndarray :
417
- return _dot_ndarray_csc_type (a .dtype , b .dtype )(out_shape , b .data , b .indices , b .indptr , a )
417
+ out = _dot_csr_ndarray_type (bt .dtype , at .dtype )(out_shape [::- 1 ], bt .data , bt .indices , bt .indptr , at )
418
+ return out .T
418
419
data , indices , indptr = _dot_csr_ndarray_type_sparse (bt .dtype , at .dtype )(
419
420
out_shape [::- 1 ], bt .data , bt .indices , bt .indptr , at
420
421
)
@@ -717,15 +718,15 @@ def _dot_csr_ndarray(out_shape, a_data, a_indices, a_indptr, b): # pragma: no c
717
718
out_shape : Tuple[int]
718
719
The shape of the output array.
719
720
"""
720
- out = np .empty (out_shape , dtype = dtr )
721
+ b = np .ascontiguousarray (b ) # ensure memory aligned
722
+ out = np .zeros (out_shape , dtype = dtr )
721
723
for i in range (out_shape [0 ]):
722
- for j in range (out_shape [1 ]):
723
- val = 0
724
- for k in range (a_indptr [i ], a_indptr [i + 1 ]):
725
- ind = a_indices [k ]
726
- v = a_data [k ]
727
- val += v * b [ind , j ]
728
- out [i , j ] = val
724
+ val = out [i ]
725
+ for k in range (a_indptr [i ], a_indptr [i + 1 ]):
726
+ ind = a_indices [k ]
727
+ v = a_data [k ]
728
+ for j in range (out_shape [1 ]):
729
+ val [j ] += v * b [ind , j ]
729
730
return out
730
731
731
732
return _dot_csr_ndarray
@@ -866,51 +867,20 @@ def _dot_csc_ndarray(a_shape, b_shape, a_data, a_indices, a_indptr, b): # pragm
866
867
a_shape, b_shape : Tuple[int]
867
868
The shapes of the input arrays.
868
869
"""
870
+ b = np .ascontiguousarray (b ) # ensure memory aligned
869
871
out = np .zeros ((a_shape [0 ], b_shape [1 ]), dtype = dtr )
870
- for j in range (b_shape [1 ]):
871
- for i in range (b_shape [0 ]):
872
- for k in range (a_indptr [i ], a_indptr [i + 1 ]):
873
- out [a_indices [k ], j ] += a_data [k ] * b [i , j ]
872
+ for i in range (b_shape [0 ]):
873
+ for k in range (a_indptr [i ], a_indptr [i + 1 ]):
874
+ ind = a_indices [k ]
875
+ v = a_data [k ]
876
+ val = out [ind ]
877
+ for j in range (b_shape [1 ]):
878
+ val [j ] += v * b [i , j ]
874
879
return out
875
880
876
881
return _dot_csc_ndarray
877
882
878
883
879
- @_memoize_dtype
880
- def _dot_ndarray_csc_type (dt1 , dt2 ):
881
- dtr = _dot_dtype (dt1 , dt2 )
882
-
883
- @numba .jit (
884
- nopython = True ,
885
- nogil = True ,
886
- locals = {"data_curr" : numba .np .numpy_support .from_dtype (dtr )},
887
- )
888
- def _dot_ndarray_csc (out_shape , b_data , b_indices , b_indptr , a ): # pragma: no cover
889
- """
890
- Utility function taking in one `ndarray` and one ``GCXS`` and
891
- calculating their dot product: a @ b for b with compressed columns.
892
-
893
- Parameters
894
- ----------
895
- a : np.ndarray
896
- The input array ``a``.
897
- b_data, b_indices, b_indptr : np.ndarray
898
- The data, indices, and index pointers of ``b``.
899
- out_shape : Tuple[int]
900
- The shape of the output array.
901
- """
902
- out = np .empty (out_shape , dtype = dtr )
903
- for i in range (out_shape [0 ]):
904
- for j in range (out_shape [1 ]):
905
- total = 0
906
- for k in range (b_indptr [j ], b_indptr [j + 1 ]):
907
- total += a [i , b_indices [k ]] * b_data [k ]
908
- out [i , j ] = total
909
- return out
910
-
911
- return _dot_ndarray_csc
912
-
913
-
914
884
@_memoize_dtype
915
885
def _dot_coo_coo_type (dt1 , dt2 ):
916
886
dtr = _dot_dtype (dt1 , dt2 )
0 commit comments