11
11
add,
12
12
accum,
13
13
)
14
- # Computes A*B and stores the result in C using the semiring semiring.
14
+ # Computes A*B and stores the result in C
15
15
row = @index (Global, Linear)
16
16
acc = monoid_neutral_element
17
17
for i = a_row_ptr[row]: a_row_ptr[row+ 1 ]- 1
21
21
c[row] = accum (c[row], acc, row, 1 , row, 1 )
22
22
end
23
23
24
- @kernel function masked_csr_spmv_kernel ! (
24
+ @kernel function sparse_masked_csr_spmv_kernel ! (
25
25
c,
26
26
@Const (a_row_ptr),
27
27
@Const (a_col_val),
33
33
add,
34
34
accum,
35
35
)
36
- # Computes A*B and stores the result in C using the semiring semiring.
36
+ # Computes A*B and stores the result in C
37
37
entry_nb = @index (Global, Linear)
38
38
row = mask[entry_nb]
39
39
acc = monoid_neutral_element
44
44
c[row] = accum (c[row], acc, row, 1 , row, 1 )
45
45
end
46
46
47
+ @kernel function dense_masked_csr_spmv_kernel! (
48
+ c,
49
+ @Const (a_row_ptr),
50
+ @Const (a_col_val),
51
+ @Const (a_nz_val),
52
+ @Const (b),
53
+ @Const (monoid_neutral_element),
54
+ @Const (mask),
55
+ @Const (mask_zero),
56
+ mul,
57
+ add,
58
+ accum,
59
+ )
60
+ # Computes A*B and stores the result in C
61
+ row = @index (Global, Linear)
62
+ if mask[row] != mask_zero
63
+ acc = monoid_neutral_element
64
+ for i = a_row_ptr[row]: a_row_ptr[row+ 1 ]- 1
65
+ col = a_col_val[i]
66
+ acc = add (acc, mul (a_nz_val[i], b[col], row, col, col, 1 ), row, col, col, 1 )
67
+ end
68
+ c[row] = accum (c[row], acc, row, 1 , row, 1 )
69
+ end
70
+ end
71
+
72
+
47
73
function gpu_spmv! (
48
- C:: AV ,
74
+ C:: ResVec ,
49
75
A:: SparseGPUMatrixCSR{Tv,Ti} ,
50
- B:: AV ;
76
+ B:: InputVec ;
51
77
mul:: Function = GPUGraphs_mul,
52
78
add:: Function = GPUGraphs_add,
53
79
accum:: Function = GPUGraphs_second,
54
- mask:: Union{SparseGPUVector{Bool,Ti} , Nothing} = nothing ,
55
- ) where {Tv,Ti,AV <: AbstractVector{Tv } }
56
- # Computes A*B and stores the result in C using the semiring semiring.
80
+ mask:: Union{MaskVec , Nothing} = nothing ,
81
+ ) where {Tv,Ti<: Integer , Tmask <: Integer , ResType <: Number , InputType <: Number , ResVec <: AbstractVector{ResType} , InputVec <: AbstractVector{InputType} , MaskVec <: AbstractVector{Tmask } }
82
+ # Computes A*B and stores the result in C
57
83
# Check dimensions
58
84
if size (A, 2 ) != length (B)
59
85
throw (DimensionMismatch (" Matrix dimensions must agree" ))
@@ -63,8 +89,40 @@ function gpu_spmv!(
63
89
end
64
90
# Call the kernel
65
91
backend = get_backend (C)
66
- if mask != = nothing
67
- kernel! = masked_csr_spmv_kernel! (backend)
92
+
93
+ # No mask
94
+ if mask === nothing
95
+ kernel! = csr_spmv_kernel! (backend)
96
+ kernel! (
97
+ C,
98
+ A. rowptr,
99
+ A. colval,
100
+ A. nzval,
101
+ B,
102
+ monoid_neutral (Tv, add),
103
+ mul,
104
+ add,
105
+ accum;
106
+ ndrange = size (A, 1 ),
107
+ )
108
+ return
109
+ end
110
+ # Check mask type
111
+ if ! (typeof (mask) <: AbstractVector{Tmask} )
112
+ throw (DimensionMismatch (" Mask must be a vector" ))
113
+ end
114
+ # Check mask length
115
+ if length (mask) != size (A, 1 )
116
+ throw (DimensionMismatch (" Mask length must be equal to the number of rows in A" ))
117
+ end
118
+ # Check mask backend
119
+ if get_backend (mask) != backend
120
+ throw (ArgumentError (" Mask must be on the same backend as A" ))
121
+ end
122
+
123
+ # SparseVector mask
124
+ if typeof (mask) <: AbstractSparseGPUVector{Tmask,Ti}
125
+ kernel! = sparse_masked_csr_spmv_kernel! (backend)
68
126
kernel! (
69
127
C,
70
128
A. rowptr,
@@ -81,19 +139,26 @@ function gpu_spmv!(
81
139
return
82
140
end
83
141
84
- kernel! = csr_spmv_kernel! (backend)
85
- kernel! (
86
- C,
87
- A. rowptr,
88
- A. colval,
89
- A. nzval,
90
- B,
91
- monoid_neutral (Tv, add),
92
- mul,
93
- add,
94
- accum;
95
- ndrange = size (A, 1 ),
96
- )
142
+ # DenseVector mask
143
+ if typeof (mask) <: AbstractVector{Tmask}
144
+ kernel! = dense_masked_csr_spmv_kernel! (backend)
145
+ kernel! (
146
+ C,
147
+ A. rowptr,
148
+ A. colval,
149
+ A. nzval,
150
+ B,
151
+ monoid_neutral (Tv, add),
152
+ mask,
153
+ zero (Tmask),
154
+ mul,
155
+ add,
156
+ accum;
157
+ ndrange = size (A, 1 ),
158
+ )
159
+ return
160
+ end
161
+
97
162
end
98
163
99
164
108
173
add,
109
174
accum,
110
175
)
111
- # Computes A*B and stores the result in C using the semiring semiring.
176
+ # Computes A*B and stores the result in C
112
177
col = @index (Global, Linear)
113
178
acc = monoid_neutral_element
114
179
for i = a_col_ptr[col]: a_col_ptr[col+ 1 ]- 1
@@ -126,7 +191,7 @@ function gpu_spmv!(
126
191
add:: Function = GPUGraphs_add,
127
192
accum:: Function = GPUGraphs_second,
128
193
) where {Tv,Ti,AV<: AbstractVector{Tv} }
129
- # Computes A*B and stores the result in C using the semiring semiring.
194
+ # Computes A*B and stores the result in C
130
195
# Check dimensions
131
196
if size (A, 2 ) != length (B)
132
197
throw (DimensionMismatch (" Matrix dimensions must agree" ))
@@ -183,7 +248,7 @@ function gpu_spmv!(
183
248
add:: Function = GPUGraphs_add,
184
249
accum:: Function = GPUGraphs_second,
185
250
) where {Tv,Ti,AV<: AbstractVector{Tv} }
186
- # Computes A*B and stores the result in C using the semiring semiring.
251
+ # Computes A*B and stores the result in C
187
252
# Check dimensions
188
253
if size (A, 2 ) != length (B)
189
254
throw (DimensionMismatch (" Matrix dimensions must agree" ))
0 commit comments